small fixes in xseg-tool , filed
[archipelago] / xseg / peers / kernel / xsegbd.c
1 /* xsegbd.c
2  *
3  */
4
5 #include <linux/module.h>
6 #include <linux/moduleparam.h>
7 #include <linux/init.h>
8 #include <linux/sched.h>
9 #include <linux/kernel.h>
10 #include <linux/slab.h>
11 #include <linux/fs.h>
12 #include <linux/errno.h>
13 #include <linux/timer.h>
14 #include <linux/types.h>
15 #include <linux/vmalloc.h>
16 #include <linux/genhd.h>
17 #include <linux/blkdev.h>
18 #include <linux/bio.h>
19 #include <linux/device.h>
20 #include <linux/completion.h>
21
22 #include <sys/kernel/segdev.h>
23 #include "xsegbd.h"
24
25 #define XSEGBD_MINORS 1
26 /* define max request size to be used in xsegbd */
27 //FIXME should we make this 4MB instead of 256KB ?
28 #define XSEGBD_MAX_REQUEST_SIZE 262144U
29
30 MODULE_DESCRIPTION("xsegbd");
31 MODULE_AUTHOR("XSEG");
32 MODULE_LICENSE("GPL");
33
34 static long sector_size = 0;
35 static long blksize = 512;
36 static int major = 0;
37 static int max_nr_pending = 1024;
38 static char name[XSEGBD_SEGMENT_NAMELEN] = "xsegbd";
39 static char spec[256] = "segdev:xsegbd:4:256:12";
40
41 module_param(sector_size, long, 0644);
42 module_param(blksize, long, 0644);
43 module_param(max_nr_pending, int, 0644);
44 module_param(major, int, 0644);
45 module_param_string(name, name, sizeof(name), 0644);
46 module_param_string(spec, spec, sizeof(spec), 0644);
47
48 struct pending {
49         struct request *request;
50         struct completion *comp;
51         struct xsegbd_device *dev;
52 };
53
54 static struct xq blk_queue_pending;
55 static struct pending *blk_req_pending;
56 static unsigned int nr_pending;
57 static spinlock_t __lock;
58 static struct xsegbd xsegbd;
59 static DEFINE_MUTEX(xsegbd_mutex);
60 static LIST_HEAD(xsegbd_dev_list);
61 static DEFINE_SPINLOCK(xsegbd_dev_list_lock);
62
63 /* ************************* */
64 /* ***** sysfs helpers ***** */
65 /* ************************* */
66
67 static struct xsegbd_device *dev_to_xsegbd(struct device *dev)
68 {
69         return container_of(dev, struct xsegbd_device, dev);
70 }
71
72 static struct device *xsegbd_get_dev(struct xsegbd_device *xsegbd_dev)
73 {
74         /* FIXME */
75         return get_device(&xsegbd_dev->dev);
76 }
77
78 static void xsegbd_put_dev(struct xsegbd_device *xsegbd_dev)
79 {
80         put_device(&xsegbd_dev->dev);
81 }
82
83 /* ************************* */
84 /* ** XSEG Initialization ** */
85 /* ************************* */
86
87 static void xseg_callback(struct xseg *xseg, uint32_t portno);
88
89 int xsegbd_xseg_init(void)
90 {
91         int r;
92
93         if (!xsegbd.name[0])
94                 strncpy(xsegbd.name, name, XSEGBD_SEGMENT_NAMELEN);
95
96         r = xseg_initialize();
97         if (r) {
98                 XSEGLOG("cannot initialize 'segdev' peer");
99                 goto err;
100         }
101
102         r = xseg_parse_spec(spec, &xsegbd.config);
103         if (r)
104                 goto err;
105
106         if (strncmp(xsegbd.config.type, "segdev", 16))
107                 XSEGLOG("WARNING: unexpected segment type '%s' vs 'segdev'",
108                          xsegbd.config.type);
109
110         XSEGLOG("joining segment");
111         xsegbd.xseg = xseg_join(        xsegbd.config.type,
112                                         xsegbd.config.name,
113                                         "segdev",
114                                         xseg_callback           );
115         if (!xsegbd.xseg) {
116                 XSEGLOG("cannot find segment");
117                 r = -ENODEV;
118                 goto err;
119         }
120
121         return 0;
122 err:
123         return r;
124
125 }
126
127 int xsegbd_xseg_quit(void)
128 {
129         struct segdev *segdev;
130
131         /* make sure to unmap the segment first */
132         segdev = segdev_get(0);
133         clear_bit(SEGDEV_RESERVED, &segdev->flags);
134         xsegbd.xseg->priv->segment_type.ops.unmap(xsegbd.xseg, xsegbd.xseg->segment_size);
135         segdev_put(segdev);
136
137         return 0;
138 }
139
140
141 /* ***************************** */
142 /* ** Block Device Operations ** */
143 /* ***************************** */
144
145 static int xsegbd_open(struct block_device *bdev, fmode_t mode)
146 {
147         struct gendisk *disk = bdev->bd_disk;
148         struct xsegbd_device *xsegbd_dev = disk->private_data;
149
150         xsegbd_get_dev(xsegbd_dev);
151
152         return 0;
153 }
154
155 static int xsegbd_release(struct gendisk *gd, fmode_t mode)
156 {
157         struct xsegbd_device *xsegbd_dev = gd->private_data;
158
159         xsegbd_put_dev(xsegbd_dev);
160
161         return 0;
162 }
163
164 static int xsegbd_ioctl(struct block_device *bdev, fmode_t mode,
165                         unsigned int cmd, unsigned long arg)
166 {
167         return -ENOTTY;
168 }
169
170 static const struct block_device_operations xsegbd_ops = {
171         .owner          = THIS_MODULE,
172         .open           = xsegbd_open,
173         .release        = xsegbd_release,
174         .ioctl          = xsegbd_ioctl 
175 };
176
177
178 /* *************************** */
179 /* ** Device Initialization ** */
180 /* *************************** */
181
182 static void xseg_request_fn(struct request_queue *rq);
183 static int xsegbd_get_size(struct xsegbd_device *xsegbd_dev);
184
185 static int xsegbd_dev_init(struct xsegbd_device *xsegbd_dev)
186 {
187         int ret = -ENOMEM;
188         struct gendisk *disk;
189         unsigned int max_request_size_bytes;
190
191         spin_lock_init(&xsegbd_dev->lock);
192
193         xsegbd_dev->xsegbd = &xsegbd;
194
195         xsegbd_dev->blk_queue = blk_alloc_queue(GFP_KERNEL);
196         if (!xsegbd_dev->blk_queue)
197                 goto out;
198
199         blk_init_allocated_queue(xsegbd_dev->blk_queue, xseg_request_fn, &xsegbd_dev->lock);
200         xsegbd_dev->blk_queue->queuedata = xsegbd_dev;
201
202         blk_queue_flush(xsegbd_dev->blk_queue, REQ_FLUSH | REQ_FUA);
203         blk_queue_logical_block_size(xsegbd_dev->blk_queue, 512);
204         blk_queue_physical_block_size(xsegbd_dev->blk_queue, blksize);
205         blk_queue_bounce_limit(xsegbd_dev->blk_queue, BLK_BOUNCE_ANY);
206         
207         //blk_queue_max_segments(dev->blk_queue, 512);
208
209         max_request_size_bytes = XSEGBD_MAX_REQUEST_SIZE;
210         blk_queue_max_hw_sectors(xsegbd_dev->blk_queue, max_request_size_bytes >> 9);
211         blk_queue_max_segment_size(xsegbd_dev->blk_queue, max_request_size_bytes);
212         blk_queue_io_min(xsegbd_dev->blk_queue, max_request_size_bytes);
213         blk_queue_io_opt(xsegbd_dev->blk_queue, max_request_size_bytes);
214
215         queue_flag_set_unlocked(QUEUE_FLAG_NONROT, xsegbd_dev->blk_queue);
216
217         /* vkoukis says we don't need partitions */
218         xsegbd_dev->gd = disk = alloc_disk(1);
219         if (!disk)
220                 /* FIXME: We call xsegbd_dev_release if something goes wrong, to cleanup
221                  * disks/queues/etc.
222                  * Would it be better to do the cleanup here, and conditionally cleanup
223                  * in dev_release?
224                  */
225                 goto out;
226
227         disk->major = xsegbd_dev->major;
228         disk->first_minor = 0; // id * XSEGBD_MINORS;
229         disk->fops = &xsegbd_ops;
230         disk->queue = xsegbd_dev->blk_queue;
231         disk->private_data = xsegbd_dev;
232         disk->flags |= GENHD_FL_SUPPRESS_PARTITION_INFO;
233         snprintf(disk->disk_name, 32, "xsegbd%u", xsegbd_dev->id);
234
235         ret = 0;
236         spin_lock_irq(&__lock);
237         if (nr_pending + xsegbd_dev->nr_requests > max_nr_pending)
238                 ret = -ENOBUFS;
239         else
240                 nr_pending += xsegbd_dev->nr_requests;
241         spin_unlock_irq(&__lock);
242
243         if (ret)
244                 goto out;
245
246         /* allow a non-zero sector_size parameter to override the disk size */
247         if (sector_size)
248                 xsegbd_dev->sectors = sector_size;
249         else {
250                 ret = xsegbd_get_size(xsegbd_dev);
251                 if (ret)
252                         goto out;
253         }
254
255         set_capacity(disk, xsegbd_dev->sectors);
256         XSEGLOG("xsegbd active...");
257         add_disk(disk); /* immediately activates the device */
258
259         return 0;
260
261 out:
262         return ret;
263 }
264
265 static void xsegbd_dev_release(struct device *dev)
266 {
267         struct xsegbd_device *xsegbd_dev = dev_to_xsegbd(dev);
268
269         /* cleanup gendisk and blk_queue the right way */
270         if (xsegbd_dev->gd) {
271                 if (xsegbd_dev->gd->flags & GENHD_FL_UP)
272                         del_gendisk(xsegbd_dev->gd);
273
274                 blk_cleanup_queue(xsegbd_dev->blk_queue);
275                 put_disk(xsegbd_dev->gd);
276         }
277
278         /* xsegbd actually does not need use waiting. 
279          * maybe we use xseg_cancel_wait for clarity
280          * with xseg_segdev kernel driver convert this
281          * to a noop
282          */
283 //      xseg_cancel_wait(xseg, xsegbd_dev->src_portno);
284
285         if (xseg_free_requests(xsegbd.xseg, xsegbd_dev->src_portno, xsegbd_dev->nr_requests) != 0)
286                 XSEGLOG("Error trying to free requests!\n");
287
288         WARN_ON(nr_pending < xsegbd_dev->nr_requests);
289         spin_lock_irq(&__lock);
290         nr_pending -= xsegbd_dev->nr_requests;
291         spin_unlock_irq(&__lock);
292
293         unregister_blkdev(xsegbd_dev->major, XSEGBD_NAME);
294
295         spin_lock(&xsegbd_dev_list_lock);
296         list_del_init(&xsegbd_dev->node);
297         spin_unlock(&xsegbd_dev_list_lock);
298         kfree(xsegbd_dev);
299
300         module_put(THIS_MODULE);
301 }
302
303 /* ******************* */
304 /* ** Critical Path ** */
305 /* ******************* */
306
307 static void blk_to_xseg(struct xseg *xseg, struct xseg_request *xreq,
308                         struct request *blkreq)
309 {
310         struct bio_vec *bvec;
311         struct req_iterator iter;
312         uint64_t off = 0;
313         char *data = XSEG_TAKE_PTR(xreq->data, xseg->segment);
314         rq_for_each_segment(bvec, blkreq, iter) {
315                 char *bdata = kmap_atomic(bvec->bv_page) + bvec->bv_offset;
316                 memcpy(data + off, bdata, bvec->bv_len);
317                 off += bvec->bv_len;
318                 kunmap_atomic(bdata);
319         }
320 }
321
322 static void xseg_to_blk(struct xseg *xseg, struct xseg_request *xreq,
323                         struct request *blkreq)
324 {
325         struct bio_vec *bvec;
326         struct req_iterator iter;
327         uint64_t off = 0;
328         char *data = XSEG_TAKE_PTR(xreq->data, xseg->segment);
329         rq_for_each_segment(bvec, blkreq, iter) {
330                 char *bdata = kmap_atomic(bvec->bv_page) + bvec->bv_offset;
331                 memcpy(bdata, data + off, bvec->bv_len);
332                 off += bvec->bv_len;
333                 kunmap_atomic(bdata);
334         }
335 }
336
337 static void xseg_request_fn(struct request_queue *rq)
338 {
339         struct xseg_request *xreq;
340         struct xsegbd_device *xsegbd_dev = rq->queuedata;
341         struct request *blkreq;
342         struct pending *pending;
343         xqindex blkreq_idx;
344         char *target;
345         uint64_t datalen;
346
347         for (;;) {
348                 xreq = xseg_get_request(xsegbd.xseg, xsegbd_dev->src_portno);
349                 if (!xreq)
350                         break;
351
352                 blkreq = blk_fetch_request(rq);
353                 if (!blkreq)
354                         break;
355
356                 if (blkreq->cmd_type != REQ_TYPE_FS) {
357                         XSEGLOG("non-fs cmd_type: %u. *shrug*", blkreq->cmd_type);
358                         __blk_end_request_all(blkreq, 0);
359                 }
360
361
362                 datalen = blk_rq_bytes(blkreq);
363                 BUG_ON(xseg_prep_request(xsegbd.xseg, xreq, xsegbd_dev->targetlen, datalen));
364                 BUG_ON(xreq->bufferlen - xsegbd_dev->targetlen < datalen);
365
366                 target = XSEG_TAKE_PTR(xreq->target, xsegbd.xseg->segment);
367                 strncpy(target, xsegbd_dev->target, xsegbd_dev->targetlen);
368                 blkreq_idx = xq_pop_head(&blk_queue_pending, 1);
369                 BUG_ON(blkreq_idx == Noneidx);
370                 pending = &blk_req_pending[blkreq_idx];
371                 pending->dev = xsegbd_dev;
372                 pending->request = blkreq;
373                 pending->comp = NULL;
374                 xreq->priv = (uint64_t)blkreq_idx;
375                 xreq->size = datalen;
376                 xreq->offset = blk_rq_pos(blkreq) << 9;
377                 /*
378                 if (xreq->offset >= (sector_size << 9))
379                         XSEGLOG("sector offset: %lu > %lu, flush:%u, fua:%u",
380                                  blk_rq_pos(blkreq), sector_size,
381                                  blkreq->cmd_flags & REQ_FLUSH,
382                                  blkreq->cmd_flags & REQ_FUA);
383                 */
384
385                 if (blkreq->cmd_flags & REQ_FLUSH)
386                         xreq->flags |= XF_FLUSH;
387
388                 if (blkreq->cmd_flags & REQ_FUA)
389                         xreq->flags |= XF_FUA;
390
391                 if (rq_data_dir(blkreq)) {
392                         /* unlock for data transfers? */
393                         blk_to_xseg(xsegbd.xseg, xreq, blkreq);
394                         xreq->op = X_WRITE;
395                 } else {
396                         xreq->op = X_READ;
397                 }
398
399                 BUG_ON(xseg_submit(xsegbd.xseg, xsegbd_dev->dst_portno, xreq) == NoSerial);
400         }
401
402         WARN_ON(xseg_signal(xsegbd_dev->xsegbd->xseg, xsegbd_dev->dst_portno) < 0);
403         if (xreq)
404                 BUG_ON(xseg_put_request(xsegbd_dev->xsegbd->xseg, xsegbd_dev->src_portno, xreq) == NoSerial);
405 }
406
407 int update_dev_sectors_from_request(    struct xsegbd_device *xsegbd_dev,
408                                         struct xseg_request *xreq       )
409 {
410         void *data;
411
412         if (xreq->state & XS_FAILED)
413                 return -ENOENT;
414
415         if (!(xreq->state & XS_SERVED))
416                 return -EIO;
417
418         data = XSEG_TAKE_PTR(xreq->data, xsegbd.xseg->segment);
419         xsegbd_dev->sectors = *((uint64_t *) data) / 512ULL;
420         return 0;
421 }
422
423 static int xsegbd_get_size(struct xsegbd_device *xsegbd_dev)
424 {
425         struct xseg_request *xreq;
426         char *target;
427         uint64_t datalen;
428         xqindex blkreq_idx;
429         struct pending *pending;
430         struct completion comp;
431         int ret = -EBUSY;
432
433         xreq = xseg_get_request(xsegbd.xseg, xsegbd_dev->src_portno);
434         if (!xreq)
435                 goto out;
436
437         datalen = sizeof(uint64_t);
438         BUG_ON(xseg_prep_request(xsegbd.xseg, xreq, xsegbd_dev->targetlen, datalen));
439         BUG_ON(xreq->bufferlen - xsegbd_dev->targetlen < datalen);
440
441         init_completion(&comp);
442         blkreq_idx = xq_pop_head(&blk_queue_pending, 1);
443         BUG_ON(blkreq_idx == Noneidx);
444         pending = &blk_req_pending[blkreq_idx];
445         pending->dev = xsegbd_dev;
446         pending->request = NULL;
447         pending->comp = &comp;
448         xreq->priv = (uint64_t)blkreq_idx;
449
450         target = XSEG_TAKE_PTR(xreq->target, xsegbd.xseg->segment);
451         strncpy(target, xsegbd_dev->target, xsegbd_dev->targetlen);
452         xreq->size = datalen;
453         xreq->offset = 0;
454
455         xreq->op = X_INFO;
456
457         /* waiting is not needed.
458          * but it should be better to use xseg_prepare_wait
459          * and the xseg_segdev kernel driver, would be a no op
460          */
461 //      port = &xsegbd.xseg->ports[xsegbd_dev->src_portno];
462 //      port->waitcue = (uint64_t)(long)xsegbd_dev;
463
464         BUG_ON(xseg_submit(xsegbd.xseg, xsegbd_dev->dst_portno, xreq) == NoSerial);
465         WARN_ON(xseg_signal(xsegbd.xseg, xsegbd_dev->dst_portno) < 0);
466
467         wait_for_completion_interruptible(&comp);
468         XSEGLOG("Woken up after wait_for_completion_interruptible()\n");
469         ret = update_dev_sectors_from_request(xsegbd_dev, xreq);
470         XSEGLOG("get_size: sectors = %ld\n", (long)xsegbd_dev->sectors);
471 out:
472         BUG_ON(xseg_put_request(xsegbd.xseg, xsegbd_dev->src_portno, xreq) == NoSerial);
473         return ret;
474 }
475
476 static void xseg_callback(struct xseg *xseg, uint32_t portno)
477 {
478         struct xsegbd_device *xsegbd_dev = NULL, *old_dev = NULL;
479         struct xseg_request *xreq;
480         struct request *blkreq;
481         struct pending *pending;
482         unsigned long flags;
483         uint32_t blkreq_idx;
484         int err;
485
486         for (;;) {
487                 xreq = xseg_receive(xseg, portno);
488                 if (!xreq)
489                         break;
490
491                 /* we rely upon our peers to not have touched ->priv */
492                 blkreq_idx = (uint64_t)xreq->priv;
493                 if (blkreq_idx >= max_nr_pending) {
494                         WARN_ON(1);
495                         continue;
496                 }
497
498                 pending = &blk_req_pending[blkreq_idx];
499                 if (pending->comp) {
500                         /* someone is blocking on this request
501                            and will handle it when we wake them up. */
502                         complete(pending->comp);
503                         /* the request is blocker's responsibility so
504                            we will not put_request(); */
505                         continue;
506                 }
507
508                 /* this is now treated as a block I/O request to end */
509                 blkreq = pending->request;
510                 pending->request = NULL;
511                 xsegbd_dev = pending->dev;
512                 pending->dev = NULL;
513                 WARN_ON(!blkreq);
514
515                 if ((xsegbd_dev != old_dev) && old_dev) {
516                         spin_lock_irqsave(&old_dev->lock, flags);
517                         xseg_request_fn(old_dev->blk_queue);
518                         spin_unlock_irqrestore(&old_dev->lock, flags);
519                 }
520
521                 old_dev = xsegbd_dev;
522
523                 if (!(xreq->state & XS_SERVED))
524                         goto blk_end;
525
526                 if (xreq->serviced != blk_rq_bytes(blkreq))
527                         goto blk_end;
528
529                 /* unlock for data transfer? */
530                 if (!rq_data_dir(blkreq))
531                         xseg_to_blk(xseg, xreq, blkreq);
532
533                 err = 0;
534 blk_end:
535                 blk_end_request_all(blkreq, err);
536                 xq_append_head(&blk_queue_pending, blkreq_idx, 1);
537                 BUG_ON(xseg_put_request(xseg, xreq->portno, xreq) == NoSerial);
538         }
539
540         if (xsegbd_dev) {
541                 spin_lock_irqsave(&xsegbd_dev->lock, flags);
542                 xseg_request_fn(xsegbd_dev->blk_queue);
543                 spin_unlock_irqrestore(&xsegbd_dev->lock, flags);
544         }
545 }
546
547
548 /* sysfs interface */
549
550 static struct bus_type xsegbd_bus_type = {
551         .name   = "xsegbd",
552 };
553
554 static ssize_t xsegbd_size_show(struct device *dev,
555                                         struct device_attribute *attr, char *buf)
556 {
557         struct xsegbd_device *xsegbd_dev = dev_to_xsegbd(dev);
558
559         return sprintf(buf, "%llu\n", (unsigned long long) xsegbd_dev->sectors * 512ULL);
560 }
561
562 static ssize_t xsegbd_major_show(struct device *dev,
563                                         struct device_attribute *attr, char *buf)
564 {
565         struct xsegbd_device *xsegbd_dev = dev_to_xsegbd(dev);
566
567         return sprintf(buf, "%d\n", xsegbd_dev->major);
568 }
569
570 static ssize_t xsegbd_srcport_show(struct device *dev,
571                                         struct device_attribute *attr, char *buf)
572 {
573         struct xsegbd_device *xsegbd_dev = dev_to_xsegbd(dev);
574
575         return sprintf(buf, "%u\n", (unsigned) xsegbd_dev->src_portno);
576 }
577
578 static ssize_t xsegbd_dstport_show(struct device *dev,
579                                         struct device_attribute *attr, char *buf)
580 {
581         struct xsegbd_device *xsegbd_dev = dev_to_xsegbd(dev);
582
583         return sprintf(buf, "%u\n", (unsigned) xsegbd_dev->dst_portno);
584 }
585
586 static ssize_t xsegbd_id_show(struct device *dev,
587                                         struct device_attribute *attr, char *buf)
588 {
589         struct xsegbd_device *xsegbd_dev = dev_to_xsegbd(dev);
590
591         return sprintf(buf, "%u\n", (unsigned) xsegbd_dev->id);
592 }
593
594 static ssize_t xsegbd_reqs_show(struct device *dev,
595                                         struct device_attribute *attr, char *buf)
596 {
597         struct xsegbd_device *xsegbd_dev = dev_to_xsegbd(dev);
598
599         return sprintf(buf, "%u\n", (unsigned) xsegbd_dev->nr_requests);
600 }
601
602 static ssize_t xsegbd_target_show(struct device *dev,
603                                         struct device_attribute *attr, char *buf)
604 {
605         struct xsegbd_device *xsegbd_dev = dev_to_xsegbd(dev);
606
607         return sprintf(buf, "%s\n", xsegbd_dev->target);
608 }
609
610 static ssize_t xsegbd_image_refresh(struct device *dev,
611                                         struct device_attribute *attr,
612                                         const char *buf,
613                                         size_t size)
614 {
615         struct xsegbd_device *xsegbd_dev = dev_to_xsegbd(dev);
616         int rc, ret = size;
617
618         mutex_lock_nested(&xsegbd_mutex, SINGLE_DEPTH_NESTING);
619
620         rc = xsegbd_get_size(xsegbd_dev);
621         if (rc < 0) {
622                 ret = rc;
623                 goto out;
624         }
625
626         set_capacity(xsegbd_dev->gd, xsegbd_dev->sectors);
627
628 out:
629         mutex_unlock(&xsegbd_mutex);
630         return ret;
631 }
632
633 static DEVICE_ATTR(size, S_IRUGO, xsegbd_size_show, NULL);
634 static DEVICE_ATTR(major, S_IRUGO, xsegbd_major_show, NULL);
635 static DEVICE_ATTR(srcport, S_IRUGO, xsegbd_srcport_show, NULL);
636 static DEVICE_ATTR(dstport, S_IRUGO, xsegbd_dstport_show, NULL);
637 static DEVICE_ATTR(id , S_IRUGO, xsegbd_id_show, NULL);
638 static DEVICE_ATTR(reqs , S_IRUGO, xsegbd_reqs_show, NULL);
639 static DEVICE_ATTR(target, S_IRUGO, xsegbd_target_show, NULL);
640 static DEVICE_ATTR(refresh , S_IWUSR, NULL, xsegbd_image_refresh);
641
642 static struct attribute *xsegbd_attrs[] = {
643         &dev_attr_size.attr,
644         &dev_attr_major.attr,
645         &dev_attr_srcport.attr,
646         &dev_attr_dstport.attr,
647         &dev_attr_id.attr,
648         &dev_attr_reqs.attr,
649         &dev_attr_target.attr,
650         &dev_attr_refresh.attr,
651         NULL
652 };
653
654 static struct attribute_group xsegbd_attr_group = {
655         .attrs = xsegbd_attrs,
656 };
657
658 static const struct attribute_group *xsegbd_attr_groups[] = {
659         &xsegbd_attr_group,
660         NULL
661 };
662
663 static void xsegbd_sysfs_dev_release(struct device *dev)
664 {
665 }
666
667 static struct device_type xsegbd_device_type = {
668         .name           = "xsegbd",
669         .groups         = xsegbd_attr_groups,
670         .release        = xsegbd_sysfs_dev_release,
671 };
672
673 static void xsegbd_root_dev_release(struct device *dev)
674 {
675 }
676
677 static struct device xsegbd_root_dev = {
678         .init_name      = "xsegbd",
679         .release        = xsegbd_root_dev_release,
680 };
681
682 static int xsegbd_bus_add_dev(struct xsegbd_device *xsegbd_dev)
683 {
684         int ret = -ENOMEM;
685         struct device *dev;
686
687         mutex_lock_nested(&xsegbd_mutex, SINGLE_DEPTH_NESTING);
688         dev = &xsegbd_dev->dev;
689
690         dev->bus = &xsegbd_bus_type;
691         dev->type = &xsegbd_device_type;
692         dev->parent = &xsegbd_root_dev;
693         dev->release = xsegbd_dev_release;
694         dev_set_name(dev, "%d", xsegbd_dev->id);
695
696         ret = device_register(dev);
697
698         mutex_unlock(&xsegbd_mutex);
699         return ret;
700 }
701
702 static void xsegbd_bus_del_dev(struct xsegbd_device *xsegbd_dev)
703 {
704         device_unregister(&xsegbd_dev->dev);
705 }
706
707 static ssize_t xsegbd_add(struct bus_type *bus, const char *buf, size_t count)
708 {
709         struct xsegbd_device *xsegbd_dev;
710         struct xseg_port *xport;
711         ssize_t ret = -ENOMEM;
712         int new_id = 0;
713         struct list_head *tmp;
714
715         if (!try_module_get(THIS_MODULE))
716                 return -ENODEV;
717
718         xsegbd_dev = kzalloc(sizeof(*xsegbd_dev), GFP_KERNEL);
719         if (!xsegbd_dev)
720                 goto out;
721
722         spin_lock_init(&xsegbd_dev->lock);
723         INIT_LIST_HEAD(&xsegbd_dev->node);
724
725         /* parse cmd */
726         if (sscanf(buf, "%" __stringify(XSEGBD_TARGET_NAMELEN) "s "
727                         "%d:%d:%d", xsegbd_dev->target, &xsegbd_dev->src_portno,
728                         &xsegbd_dev->dst_portno, &xsegbd_dev->nr_requests) < 3) {
729                 ret = -EINVAL;
730                 goto out_dev;
731         }
732         xsegbd_dev->targetlen = strlen(xsegbd_dev->target);
733
734         spin_lock(&xsegbd_dev_list_lock);
735
736         list_for_each(tmp, &xsegbd_dev_list) {
737                 struct xsegbd_device *entry;
738
739                 entry = list_entry(tmp, struct xsegbd_device, node);
740
741                 if (entry->src_portno == xsegbd_dev->src_portno) {
742                         ret = -EINVAL;
743                         goto out_unlock;
744                 }
745
746                 if (entry->id >= new_id)
747                         new_id = entry->id + 1;
748         }
749
750         xsegbd_dev->id = new_id;
751
752         list_add_tail(&xsegbd_dev->node, &xsegbd_dev_list);
753
754         spin_unlock(&xsegbd_dev_list_lock);
755
756         XSEGLOG("registering block device major %d", major);
757         ret = register_blkdev(major, XSEGBD_NAME);
758         if (ret < 0) {
759                 XSEGLOG("cannot register block device!");
760                 ret = -EBUSY;
761                 goto out_delentry;
762         }
763         xsegbd_dev->major = ret;
764         XSEGLOG("registered block device major %d", xsegbd_dev->major);
765
766         ret = xsegbd_bus_add_dev(xsegbd_dev);
767         if (ret)
768                 goto out_blkdev;
769
770         XSEGLOG("binding to source port %u (destination %u)",
771                         xsegbd_dev->src_portno, xsegbd_dev->dst_portno);
772         xport = xseg_bind_port(xsegbd.xseg, xsegbd_dev->src_portno);
773         if (!xport) {
774                 XSEGLOG("cannot bind to port");
775                 ret = -EFAULT;
776
777                 goto out_bus;
778         }
779         /* make sure we don't get any requests until we're ready to handle them */
780         xport->waitcue = (long) NULL;
781
782         XSEGLOG("allocating %u requests", xsegbd_dev->nr_requests);
783         if (xseg_alloc_requests(xsegbd.xseg, xsegbd_dev->src_portno, xsegbd_dev->nr_requests)) {
784                 XSEGLOG("cannot allocate requests");
785                 ret = -EFAULT;
786
787                 goto out_bus;
788         }
789
790         ret = xsegbd_dev_init(xsegbd_dev);
791         if (ret)
792                 goto out_bus;
793
794         return count;
795
796 out_bus:
797         xsegbd_bus_del_dev(xsegbd_dev);
798
799         return ret;
800
801 out_blkdev:
802         unregister_blkdev(xsegbd_dev->major, XSEGBD_NAME);
803
804 out_delentry:
805         spin_lock(&xsegbd_dev_list_lock);
806         list_del_init(&xsegbd_dev->node);
807
808 out_unlock:
809         spin_unlock(&xsegbd_dev_list_lock);
810
811 out_dev:
812         kfree(xsegbd_dev);
813
814 out:
815         return ret;
816 }
817
818 static struct xsegbd_device *__xsegbd_get_dev(unsigned long id)
819 {
820         struct list_head *tmp;
821         struct xsegbd_device *xsegbd_dev;
822
823
824         spin_lock(&xsegbd_dev_list_lock);
825         list_for_each(tmp, &xsegbd_dev_list) {
826                 xsegbd_dev = list_entry(tmp, struct xsegbd_device, node);
827                 if (xsegbd_dev->id == id) {
828                         spin_unlock(&xsegbd_dev_list_lock);
829                         return xsegbd_dev;
830                 }
831         }
832         spin_unlock(&xsegbd_dev_list_lock);
833         return NULL;
834 }
835
836 static ssize_t xsegbd_remove(struct bus_type *bus, const char *buf, size_t count)
837 {
838         struct xsegbd_device *xsegbd_dev = NULL;
839         int id, ret;
840         unsigned long ul_id;
841
842         ret = strict_strtoul(buf, 10, &ul_id);
843         if (ret)
844                 return ret;
845
846         id = (int) ul_id;
847         if (id != ul_id)
848                 return -EINVAL;
849
850         mutex_lock_nested(&xsegbd_mutex, SINGLE_DEPTH_NESTING);
851
852         ret = count;
853         xsegbd_dev = __xsegbd_get_dev(id);
854         if (!xsegbd_dev) {
855                 ret = -ENOENT;
856                 goto out_unlock;
857         }
858
859         xsegbd_bus_del_dev(xsegbd_dev);
860
861 out_unlock:
862         mutex_unlock(&xsegbd_mutex);
863         return ret;
864 }
865
866 static struct bus_attribute xsegbd_bus_attrs[] = {
867         __ATTR(add, S_IWUSR, NULL, xsegbd_add),
868         __ATTR(remove, S_IWUSR, NULL, xsegbd_remove),
869         __ATTR_NULL
870 };
871
872 static int xsegbd_sysfs_init(void)
873 {
874         int ret;
875
876         ret = device_register(&xsegbd_root_dev);
877         if (ret < 0)
878                 return ret;
879
880         xsegbd_bus_type.bus_attrs = xsegbd_bus_attrs;
881         ret = bus_register(&xsegbd_bus_type);
882         if (ret < 0)
883                 device_unregister(&xsegbd_root_dev);
884
885         return ret;
886 }
887
888 static void xsegbd_sysfs_cleanup(void)
889 {
890         bus_unregister(&xsegbd_bus_type);
891         device_unregister(&xsegbd_root_dev);
892 }
893
894 /* *************************** */
895 /* ** Module Initialization ** */
896 /* *************************** */
897
898 static int __init xsegbd_init(void)
899 {
900         int ret = -ENOMEM;
901
902         if (!xq_alloc_seq(&blk_queue_pending, max_nr_pending, max_nr_pending))
903                 goto out;
904
905         blk_req_pending = kzalloc(sizeof(struct pending) * max_nr_pending, GFP_KERNEL);
906         if (!blk_req_pending)
907                 goto out_queue;
908
909         ret = -ENOSYS;
910         ret = xsegbd_xseg_init();
911         if (ret)
912                 goto out_pending;
913
914         ret = xsegbd_sysfs_init();
915         if (ret)
916                 goto out_xseg;
917
918         XSEGLOG("initialization complete");
919
920 out:
921         return ret;
922
923 out_xseg:
924         xsegbd_xseg_quit();
925 out_pending:
926         kfree(blk_req_pending);
927 out_queue:
928         xq_free(&blk_queue_pending);
929         goto out;
930 }
931
932 static void __exit xsegbd_exit(void)
933 {
934         xsegbd_sysfs_cleanup();
935         xsegbd_xseg_quit();
936 }
937
938 module_init(xsegbd_init);
939 module_exit(xsegbd_exit);
940