remove debug from launce script.
[archipelago] / xseg / peers / kernel / xsegbd.c
1 /* xsegbd.c
2  *
3  */
4
5 #include <linux/module.h>
6 #include <linux/moduleparam.h>
7 #include <linux/init.h>
8 #include <linux/sched.h>
9 #include <linux/kernel.h>
10 #include <linux/slab.h>
11 #include <linux/fs.h>
12 #include <linux/errno.h>
13 #include <linux/timer.h>
14 #include <linux/types.h>
15 #include <linux/vmalloc.h>
16 #include <linux/genhd.h>
17 #include <linux/blkdev.h>
18 #include <linux/bio.h>
19 #include <linux/device.h>
20 #include <linux/completion.h>
21
22 #include <sys/kernel/segdev.h>
23 #include "xsegbd.h"
24
25 #define XSEGBD_MINORS 1
26 /* define max request size to be used in xsegbd */
27 //FIXME should we make this 4MB instead of 256KB ?
28 #define XSEGBD_MAX_REQUEST_SIZE 262144U
29
30 MODULE_DESCRIPTION("xsegbd");
31 MODULE_AUTHOR("XSEG");
32 MODULE_LICENSE("GPL");
33
34 static long sector_size = 0;
35 static long blksize = 512;
36 static int major = 0;
37 static int max_dev = 1024;
38 static char name[XSEGBD_SEGMENT_NAMELEN] = "xsegbd";
39 static char spec[256] = "segdev:xsegbd:4:1024:12";
40
41 module_param(sector_size, long, 0644);
42 module_param(blksize, long, 0644);
43 module_param(max_dev, int, 0644);
44 module_param(major, int, 0644);
45 module_param_string(name, name, sizeof(name), 0644);
46 module_param_string(spec, spec, sizeof(spec), 0644);
47
48 static struct xsegbd xsegbd;
49 static struct xsegbd_device **xsegbd_devices; /* indexed by portno */
50 static DEFINE_MUTEX(xsegbd_mutex);
51 static DEFINE_SPINLOCK(xsegbd_devices_lock);
52
53
54
55 static struct xsegbd_device *__xsegbd_get_dev(unsigned long id)
56 {
57         struct xsegbd_device *xsegbd_dev = NULL;
58
59         spin_lock(&xsegbd_devices_lock);
60         xsegbd_dev = xsegbd_devices[id];
61         spin_unlock(&xsegbd_devices_lock);
62
63         return xsegbd_dev;
64 }
65
66 /* ************************* */
67 /* ***** sysfs helpers ***** */
68 /* ************************* */
69
70 static struct xsegbd_device *dev_to_xsegbd(struct device *dev)
71 {
72         return container_of(dev, struct xsegbd_device, dev);
73 }
74
75 static struct device *xsegbd_get_dev(struct xsegbd_device *xsegbd_dev)
76 {
77         /* FIXME */
78         return get_device(&xsegbd_dev->dev);
79 }
80
81 static void xsegbd_put_dev(struct xsegbd_device *xsegbd_dev)
82 {
83         put_device(&xsegbd_dev->dev);
84 }
85
86 /* ************************* */
87 /* ** XSEG Initialization ** */
88 /* ************************* */
89
90 static void xseg_callback(uint32_t portno);
91
92 int xsegbd_xseg_init(void)
93 {
94         int r;
95
96         if (!xsegbd.name[0])
97                 strncpy(xsegbd.name, name, XSEGBD_SEGMENT_NAMELEN);
98
99         r = xseg_initialize();
100         if (r) {
101                 XSEGLOG("cannot initialize 'segdev' peer");
102                 goto err;
103         }
104
105         r = xseg_parse_spec(spec, &xsegbd.config);
106         if (r)
107                 goto err;
108
109         if (strncmp(xsegbd.config.type, "segdev", 16))
110                 XSEGLOG("WARNING: unexpected segment type '%s' vs 'segdev'",
111                          xsegbd.config.type);
112
113         /* leave it here for now */
114         XSEGLOG("joining segment");
115         xsegbd.xseg = xseg_join(        xsegbd.config.type,
116                                         xsegbd.config.name,
117                                         "segdev",
118                                         xseg_callback           );
119         if (!xsegbd.xseg) {
120                 XSEGLOG("cannot find segment");
121                 r = -ENODEV;
122                 goto err;
123         }
124
125         return 0;
126 err:
127         return r;
128
129 }
130
131 int xsegbd_xseg_quit(void)
132 {
133         struct segdev *segdev;
134
135         /* make sure to unmap the segment first */
136         segdev = segdev_get(0);
137         clear_bit(SEGDEV_RESERVED, &segdev->flags);
138         xsegbd.xseg->priv->segment_type.ops.unmap(xsegbd.xseg, xsegbd.xseg->segment_size);
139         segdev_put(segdev);
140
141         return 0;
142 }
143
144
145 /* ***************************** */
146 /* ** Block Device Operations ** */
147 /* ***************************** */
148
149 static int xsegbd_open(struct block_device *bdev, fmode_t mode)
150 {
151         struct gendisk *disk = bdev->bd_disk;
152         struct xsegbd_device *xsegbd_dev = disk->private_data;
153
154         xsegbd_get_dev(xsegbd_dev);
155
156         return 0;
157 }
158
159 static int xsegbd_release(struct gendisk *gd, fmode_t mode)
160 {
161         struct xsegbd_device *xsegbd_dev = gd->private_data;
162
163         xsegbd_put_dev(xsegbd_dev);
164
165         return 0;
166 }
167
168 static int xsegbd_ioctl(struct block_device *bdev, fmode_t mode,
169                         unsigned int cmd, unsigned long arg)
170 {
171         return -ENOTTY;
172 }
173
174 static const struct block_device_operations xsegbd_ops = {
175         .owner          = THIS_MODULE,
176         .open           = xsegbd_open,
177         .release        = xsegbd_release,
178         .ioctl          = xsegbd_ioctl 
179 };
180
181
182 /* *************************** */
183 /* ** Device Initialization ** */
184 /* *************************** */
185
186 static void xseg_request_fn(struct request_queue *rq);
187 static int xsegbd_get_size(struct xsegbd_device *xsegbd_dev);
188
189 static int xsegbd_dev_init(struct xsegbd_device *xsegbd_dev)
190 {
191         int ret = -ENOMEM;
192         struct gendisk *disk;
193         unsigned int max_request_size_bytes;
194
195         spin_lock_init(&xsegbd_dev->rqlock);
196
197         xsegbd_dev->xsegbd = &xsegbd;
198
199         xsegbd_dev->blk_queue = blk_alloc_queue(GFP_KERNEL);
200         if (!xsegbd_dev->blk_queue)
201                 goto out;
202
203         if (!blk_init_allocated_queue(xsegbd_dev->blk_queue, 
204                         xseg_request_fn, &xsegbd_dev->rqlock))
205                 goto outqueue;
206
207         xsegbd_dev->blk_queue->queuedata = xsegbd_dev;
208
209         blk_queue_flush(xsegbd_dev->blk_queue, REQ_FLUSH | REQ_FUA);
210         blk_queue_logical_block_size(xsegbd_dev->blk_queue, 512);
211         blk_queue_physical_block_size(xsegbd_dev->blk_queue, blksize);
212         blk_queue_bounce_limit(xsegbd_dev->blk_queue, BLK_BOUNCE_ANY);
213         
214         //blk_queue_max_segments(dev->blk_queue, 512);
215
216         max_request_size_bytes = XSEGBD_MAX_REQUEST_SIZE;
217         blk_queue_max_hw_sectors(xsegbd_dev->blk_queue, max_request_size_bytes >> 9);
218         blk_queue_max_segment_size(xsegbd_dev->blk_queue, max_request_size_bytes);
219         blk_queue_io_min(xsegbd_dev->blk_queue, max_request_size_bytes);
220         blk_queue_io_opt(xsegbd_dev->blk_queue, max_request_size_bytes);
221
222         queue_flag_set_unlocked(QUEUE_FLAG_NONROT, xsegbd_dev->blk_queue);
223
224         /* vkoukis says we don't need partitions */
225         xsegbd_dev->gd = disk = alloc_disk(1);
226         if (!disk)
227                 goto outqueue;
228
229         disk->major = xsegbd_dev->major;
230         disk->first_minor = 0; // id * XSEGBD_MINORS;
231         disk->fops = &xsegbd_ops;
232         disk->queue = xsegbd_dev->blk_queue;
233         disk->private_data = xsegbd_dev;
234         disk->flags |= GENHD_FL_SUPPRESS_PARTITION_INFO;
235         snprintf(disk->disk_name, 32, "xsegbd%u", xsegbd_dev->id);
236
237         ret = 0;
238         
239         /* allow a non-zero sector_size parameter to override the disk size */
240         if (sector_size)
241                 xsegbd_dev->sectors = sector_size;
242         else {
243                 ret = xsegbd_get_size(xsegbd_dev);
244                 if (ret)
245                         goto outdisk;
246         }
247
248         set_capacity(disk, xsegbd_dev->sectors);
249         XSEGLOG("xsegbd active...");
250         add_disk(disk); /* immediately activates the device */
251
252         return 0;
253
254
255 outdisk:
256         put_disk(xsegbd_dev->gd);
257 outqueue:
258         blk_cleanup_queue(xsegbd_dev->blk_queue);
259 out:
260         xsegbd_dev->gd = NULL;
261         return ret;
262 }
263
264 static void xsegbd_dev_release(struct device *dev)
265 {
266         struct xsegbd_device *xsegbd_dev = dev_to_xsegbd(dev);
267         
268         xseg_cancel_wait(xsegbd_dev->xseg, xsegbd_dev->src_portno);
269
270         /* cleanup gendisk and blk_queue the right way */
271         if (xsegbd_dev->gd) {
272                 if (xsegbd_dev->gd->flags & GENHD_FL_UP)
273                         del_gendisk(xsegbd_dev->gd);
274
275                 blk_cleanup_queue(xsegbd_dev->blk_queue);
276                 put_disk(xsegbd_dev->gd);
277         }
278
279 //      if (xseg_free_requests(xsegbd_dev->xseg, 
280 //                      xsegbd_dev->src_portno, xsegbd_dev->nr_requests) < 0)
281 //              XSEGLOG("Error trying to free requests!\n");
282
283
284         unregister_blkdev(xsegbd_dev->major, XSEGBD_NAME);
285
286         spin_lock(&xsegbd_devices_lock);
287         BUG_ON(xsegbd_devices[xsegbd_dev->src_portno] != xsegbd_dev);
288         xsegbd_devices[xsegbd_dev->src_portno] = NULL;
289         spin_unlock(&xsegbd_devices_lock);
290
291         if (xsegbd_dev->blk_req_pending)
292                 kfree(xsegbd_dev->blk_req_pending);
293         xq_free(&xsegbd_dev->blk_queue_pending);
294
295         kfree(xsegbd_dev);
296
297         module_put(THIS_MODULE);
298 }
299
300 /* ******************* */
301 /* ** Critical Path ** */
302 /* ******************* */
303
304 static void blk_to_xseg(struct xseg *xseg, struct xseg_request *xreq,
305                         struct request *blkreq)
306 {
307         struct bio_vec *bvec;
308         struct req_iterator iter;
309         uint64_t off = 0;
310         char *data = xseg_get_data(xseg, xreq);
311         rq_for_each_segment(bvec, blkreq, iter) {
312                 char *bdata = kmap_atomic(bvec->bv_page) + bvec->bv_offset;
313                 memcpy(data + off, bdata, bvec->bv_len);
314                 off += bvec->bv_len;
315                 kunmap_atomic(bdata);
316         }
317 }
318
319 static void xseg_to_blk(struct xseg *xseg, struct xseg_request *xreq,
320                         struct request *blkreq)
321 {
322         struct bio_vec *bvec;
323         struct req_iterator iter;
324         uint64_t off = 0;
325         char *data = xseg_get_data(xseg, xreq);
326         rq_for_each_segment(bvec, blkreq, iter) {
327                 char *bdata = kmap_atomic(bvec->bv_page) + bvec->bv_offset;
328                 memcpy(bdata, data + off, bvec->bv_len);
329                 off += bvec->bv_len;
330                 kunmap_atomic(bdata);
331         }
332 }
333
334 static void xseg_request_fn(struct request_queue *rq)
335 {
336         struct xseg_request *xreq;
337         struct xsegbd_device *xsegbd_dev = rq->queuedata;
338         struct request *blkreq;
339         struct xsegbd_pending *pending;
340         xqindex blkreq_idx;
341         char *target;
342         uint64_t datalen;
343         xport p;
344         int r;
345
346         for (;;) {
347                 blkreq_idx = Noneidx;
348                 xreq = xseg_get_request(xsegbd_dev->xseg, xsegbd_dev->src_portno, 
349                                 xsegbd_dev->dst_portno, X_ALLOC);
350                 if (!xreq)
351                         break;
352
353                 blkreq_idx = xq_pop_head(&xsegbd_dev->blk_queue_pending, 
354                                                 xsegbd_dev->src_portno);
355                 if (blkreq_idx == Noneidx)
356                         break;
357                 
358                 if (blkreq_idx >= xsegbd_dev->nr_requests) {
359                         XSEGLOG("blkreq_idx >= xsegbd_dev->nr_requests");
360                         BUG_ON(1);
361                         break;
362                 }
363
364                 blkreq = blk_fetch_request(rq);
365                 if (!blkreq)
366                         break;
367
368                 if (blkreq->cmd_type != REQ_TYPE_FS) {
369                         //we lose xreq here
370                         XSEGLOG("non-fs cmd_type: %u. *shrug*", blkreq->cmd_type);
371                         __blk_end_request_all(blkreq, 0);
372                         continue;
373                 }
374
375                 datalen = blk_rq_bytes(blkreq);
376                 r = xseg_prep_request(xsegbd_dev->xseg, xreq, 
377                                         xsegbd_dev->targetlen, datalen);
378                 if (r < 0) {
379                         XSEGLOG("couldn't prep request");
380                         __blk_end_request_err(blkreq, r);
381                         BUG_ON(1);
382                         break;
383                 }
384                 r = -ENOMEM;
385                 if (xreq->bufferlen - xsegbd_dev->targetlen < datalen){
386                         XSEGLOG("malformed req buffers");
387                         __blk_end_request_err(blkreq, r);
388                         BUG_ON(1);
389                         break;
390                 }
391
392                 target = xseg_get_target(xsegbd_dev->xseg, xreq);
393                 strncpy(target, xsegbd_dev->target, xsegbd_dev->targetlen);
394
395                 pending = &xsegbd_dev->blk_req_pending[blkreq_idx];
396                 pending->dev = xsegbd_dev;
397                 pending->request = blkreq;
398                 pending->comp = NULL;
399                 
400                 xreq->size = datalen;
401                 xreq->offset = blk_rq_pos(blkreq) << 9;
402                 xreq->priv = (uint64_t) blkreq_idx;
403
404                 /*
405                 if (xreq->offset >= (sector_size << 9))
406                         XSEGLOG("sector offset: %lu > %lu, flush:%u, fua:%u",
407                                  blk_rq_pos(blkreq), sector_size,
408                                  blkreq->cmd_flags & REQ_FLUSH,
409                                  blkreq->cmd_flags & REQ_FUA);
410                 */
411
412                 if (blkreq->cmd_flags & REQ_FLUSH)
413                         xreq->flags |= XF_FLUSH;
414
415                 if (blkreq->cmd_flags & REQ_FUA)
416                         xreq->flags |= XF_FUA;
417
418                 if (rq_data_dir(blkreq)) {
419                         /* unlock for data transfers? */
420                         blk_to_xseg(xsegbd_dev->xseg, xreq, blkreq);
421                         xreq->op = X_WRITE;
422                 } else {
423                         xreq->op = X_READ;
424                 }
425
426
427                 r = -EIO;
428                 p = xseg_submit(xsegbd_dev->xseg, xreq, 
429                                         xsegbd_dev->src_portno, X_ALLOC);
430                 if (p == NoPort) {
431                         XSEGLOG("coundn't submit req");
432                         BUG_ON(1);
433                         __blk_end_request_err(blkreq, r);
434                         break;
435                 }
436                 WARN_ON(xseg_signal(xsegbd_dev->xsegbd->xseg, p) < 0);
437         }
438         if (xreq)
439                 BUG_ON(xseg_put_request(xsegbd_dev->xsegbd->xseg, xreq, 
440                                         xsegbd_dev->src_portno) == -1);
441         if (blkreq_idx != Noneidx)
442                 BUG_ON(xq_append_head(&xsegbd_dev->blk_queue_pending, 
443                                 blkreq_idx, xsegbd_dev->src_portno) == Noneidx);
444 }
445
446 int update_dev_sectors_from_request(    struct xsegbd_device *xsegbd_dev,
447                                         struct xseg_request *xreq       )
448 {
449         void *data;
450         if (!xreq) {
451                 XSEGLOG("Invalid xreq");
452                 return -EIO;
453         }
454
455         if (xreq->state & XS_FAILED)
456                 return -ENOENT;
457
458         if (!(xreq->state & XS_SERVED))
459                 return -EIO;
460
461         data = xseg_get_data(xsegbd_dev->xseg, xreq);
462         if (!data) {
463                 XSEGLOG("Invalid req data");
464                 return -EIO;
465         }
466         xsegbd_dev->sectors = *((uint64_t *) data) / 512ULL;
467         return 0;
468 }
469
470 static int xsegbd_get_size(struct xsegbd_device *xsegbd_dev)
471 {
472         struct xseg_request *xreq;
473         char *target;
474         uint64_t datalen;
475         xqindex blkreq_idx;
476         struct xsegbd_pending *pending;
477         struct completion comp;
478         xport p;
479         void *data;
480         int ret = -EBUSY, r;
481         xreq = xseg_get_request(xsegbd_dev->xseg, xsegbd_dev->src_portno,
482                         xsegbd_dev->dst_portno, X_ALLOC);
483         if (!xreq)
484                 goto out;
485
486         datalen = sizeof(uint64_t);
487         BUG_ON(xseg_prep_request(xsegbd_dev->xseg, xreq, xsegbd_dev->targetlen, datalen));
488         BUG_ON(xreq->bufferlen - xsegbd_dev->targetlen < datalen);
489
490         init_completion(&comp);
491         blkreq_idx = xq_pop_head(&xsegbd_dev->blk_queue_pending, 1);
492         if (blkreq_idx == Noneidx)
493                 goto out;
494         
495         pending = &xsegbd_dev->blk_req_pending[blkreq_idx];
496         pending->dev = xsegbd_dev;
497         pending->request = NULL;
498         pending->comp = &comp;
499
500         
501         xreq->priv = (uint64_t) blkreq_idx;
502
503         target = xseg_get_target(xsegbd_dev->xseg, xreq);
504         strncpy(target, xsegbd_dev->target, xsegbd_dev->targetlen);
505         xreq->size = datalen;
506         xreq->offset = 0;
507         xreq->op = X_INFO;
508
509         xseg_prepare_wait(xsegbd_dev->xseg, xsegbd_dev->src_portno);
510         p = xseg_submit(xsegbd_dev->xseg, xreq, 
511                                 xsegbd_dev->src_portno, X_ALLOC);
512         if ( p == NoPort) {
513                 XSEGLOG("couldn't submit request");
514                 BUG_ON(1);
515                 goto out_queue;
516         }
517         WARN_ON(xseg_signal(xsegbd_dev->xseg, p) < 0);
518         XSEGLOG("Before wait for completion, xreq %lx", (unsigned long) xreq);
519         wait_for_completion_interruptible(&comp);
520         XSEGLOG("Woken up after wait_for_completion_interruptible(), xreq: %lx", (unsigned long) xreq);
521         ret = update_dev_sectors_from_request(xsegbd_dev, xreq);
522         //XSEGLOG("get_size: sectors = %ld\n", (long)xsegbd_dev->sectors);
523 out:
524         BUG_ON(xseg_put_request(xsegbd_dev->xseg, xreq, xsegbd_dev->src_portno) == -1);
525         return ret;
526
527 out_queue:
528         xq_append_head(&xsegbd_dev->blk_queue_pending, blkreq_idx, 1);
529         
530         goto out;
531 }
532
533 static void xseg_callback(xport portno)
534 {
535         struct xsegbd_device *xsegbd_dev;
536         struct xseg_request *xreq;
537         struct request *blkreq;
538         struct xsegbd_pending *pending;
539         unsigned long flags;
540         xqindex blkreq_idx, ridx;
541         int err;
542         void *data;
543
544         xsegbd_dev  = __xsegbd_get_dev(portno);
545         if (!xsegbd_dev) {
546                 XSEGLOG("portno: %u has no xsegbd device assigned", portno);
547                 WARN_ON(1);
548                 return;
549         }
550
551         for (;;) {
552                 xseg_prepare_wait(xsegbd_dev->xseg, xsegbd_dev->src_portno);
553                 xreq = xseg_receive(xsegbd_dev->xseg, portno);
554                 if (!xreq)
555                         break;
556
557                 xseg_cancel_wait(xsegbd_dev->xseg, xsegbd_dev->src_portno);
558
559                 blkreq_idx = (xqindex) xreq->priv;
560                 if (blkreq_idx >= xsegbd_dev->nr_requests) {
561                         WARN_ON(1);
562                         //FIXME maybe put request?
563                         continue;
564                 }
565
566                 pending = &xsegbd_dev->blk_req_pending[blkreq_idx];
567                 if (pending->comp) {
568                         /* someone is blocking on this request
569                            and will handle it when we wake them up. */
570                         complete(pending->comp);
571                         /* the request is blocker's responsibility so
572                            we will not put_request(); */
573                         continue;
574                 }
575
576                 /* this is now treated as a block I/O request to end */
577                 blkreq = pending->request;
578                 pending->request = NULL;
579                 if (xsegbd_dev != pending->dev) {
580                         //FIXME maybe put request?
581                         XSEGLOG("xsegbd_dev != pending->dev");
582                         BUG_ON(1);
583                         continue;
584                 }
585                 pending->dev = NULL;
586                 if (!blkreq){
587                         //FIXME maybe put request?
588                         XSEGLOG("blkreq does not exist");
589                         BUG_ON(1);
590                         continue;
591                 }
592
593                 err = -EIO;
594                 if (!(xreq->state & XS_SERVED))
595                         goto blk_end;
596
597                 if (xreq->serviced != blk_rq_bytes(blkreq))
598                         goto blk_end;
599
600                 err = 0;
601                 /* unlock for data transfer? */
602                 if (!rq_data_dir(blkreq)){
603                         xseg_to_blk(xsegbd_dev->xseg, xreq, blkreq);
604                 }       
605 blk_end:
606                 blk_end_request_all(blkreq, err);
607                 
608                 ridx = xq_append_head(&xsegbd_dev->blk_queue_pending, 
609                                         blkreq_idx, xsegbd_dev->src_portno);
610                 if (ridx == Noneidx) {
611                         XSEGLOG("couldnt append blkreq_idx");
612                         WARN_ON(1);
613                 }
614
615                 if (xseg_put_request(xsegbd_dev->xseg, xreq, 
616                                                 xsegbd_dev->src_portno) < 0){
617                         XSEGLOG("couldn't put req");
618                         BUG_ON(1);
619                 }
620         }
621
622         if (xsegbd_dev) {
623                 spin_lock_irqsave(&xsegbd_dev->rqlock, flags);
624                 xseg_request_fn(xsegbd_dev->blk_queue);
625                 spin_unlock_irqrestore(&xsegbd_dev->rqlock, flags);
626         }
627 }
628
629
630 /* sysfs interface */
631
632 static struct bus_type xsegbd_bus_type = {
633         .name   = "xsegbd",
634 };
635
636 static ssize_t xsegbd_size_show(struct device *dev,
637                                         struct device_attribute *attr, char *buf)
638 {
639         struct xsegbd_device *xsegbd_dev = dev_to_xsegbd(dev);
640
641         return sprintf(buf, "%llu\n", (unsigned long long) xsegbd_dev->sectors * 512ULL);
642 }
643
644 static ssize_t xsegbd_major_show(struct device *dev,
645                                         struct device_attribute *attr, char *buf)
646 {
647         struct xsegbd_device *xsegbd_dev = dev_to_xsegbd(dev);
648
649         return sprintf(buf, "%d\n", xsegbd_dev->major);
650 }
651
652 static ssize_t xsegbd_srcport_show(struct device *dev,
653                                         struct device_attribute *attr, char *buf)
654 {
655         struct xsegbd_device *xsegbd_dev = dev_to_xsegbd(dev);
656
657         return sprintf(buf, "%u\n", (unsigned) xsegbd_dev->src_portno);
658 }
659
660 static ssize_t xsegbd_dstport_show(struct device *dev,
661                                         struct device_attribute *attr, char *buf)
662 {
663         struct xsegbd_device *xsegbd_dev = dev_to_xsegbd(dev);
664
665         return sprintf(buf, "%u\n", (unsigned) xsegbd_dev->dst_portno);
666 }
667
668 static ssize_t xsegbd_id_show(struct device *dev,
669                                         struct device_attribute *attr, char *buf)
670 {
671         struct xsegbd_device *xsegbd_dev = dev_to_xsegbd(dev);
672
673         return sprintf(buf, "%u\n", (unsigned) xsegbd_dev->id);
674 }
675
676 static ssize_t xsegbd_reqs_show(struct device *dev,
677                                         struct device_attribute *attr, char *buf)
678 {
679         struct xsegbd_device *xsegbd_dev = dev_to_xsegbd(dev);
680
681         return sprintf(buf, "%u\n", (unsigned) xsegbd_dev->nr_requests);
682 }
683
684 static ssize_t xsegbd_target_show(struct device *dev,
685                                         struct device_attribute *attr, char *buf)
686 {
687         struct xsegbd_device *xsegbd_dev = dev_to_xsegbd(dev);
688
689         return sprintf(buf, "%s\n", xsegbd_dev->target);
690 }
691
692 static ssize_t xsegbd_image_refresh(struct device *dev,
693                                         struct device_attribute *attr,
694                                         const char *buf,
695                                         size_t size)
696 {
697         struct xsegbd_device *xsegbd_dev = dev_to_xsegbd(dev);
698         int rc, ret = size;
699
700         mutex_lock_nested(&xsegbd_mutex, SINGLE_DEPTH_NESTING);
701
702         rc = xsegbd_get_size(xsegbd_dev);
703         if (rc < 0) {
704                 ret = rc;
705                 goto out;
706         }
707
708         set_capacity(xsegbd_dev->gd, xsegbd_dev->sectors);
709
710 out:
711         mutex_unlock(&xsegbd_mutex);
712         return ret;
713 }
714
715 static ssize_t xsegbd_cleanup(struct device *dev,
716                                         struct device_attribute *attr,
717                                         const char *buf,
718                                         size_t size)
719 {
720         struct xsegbd_device *xsegbd_dev = dev_to_xsegbd(dev);
721         int ret = size, i;
722         struct request *blkreq = NULL;
723         struct xsegbd_pending *pending = NULL;
724         struct completion *comp = NULL;
725
726         mutex_lock_nested(&xsegbd_mutex, SINGLE_DEPTH_NESTING);
727         for (i = 0; i < xsegbd_dev->nr_requests; i++) {
728                 xlock_acquire(&xsegbd_dev->blk_queue_pending.lock, 
729                                 xsegbd_dev->src_portno);
730                 if (!__xq_check(&xsegbd_dev->blk_queue_pending, i)) {
731                         pending = &xsegbd_dev->blk_req_pending[i];
732                         blkreq = pending->request;
733                         pending->request = NULL;
734                         comp = pending->comp;
735                         pending->comp = NULL;
736                         if (blkreq)
737                                 blk_end_request_all(blkreq, -EIO);
738                         if (comp)
739                                 complete(comp);
740                 }
741                 xlock_release(&xsegbd_dev->blk_queue_pending.lock);
742         }
743
744         mutex_unlock(&xsegbd_mutex);
745         return ret;
746 }
747
748 static DEVICE_ATTR(size, S_IRUGO, xsegbd_size_show, NULL);
749 static DEVICE_ATTR(major, S_IRUGO, xsegbd_major_show, NULL);
750 static DEVICE_ATTR(srcport, S_IRUGO, xsegbd_srcport_show, NULL);
751 static DEVICE_ATTR(dstport, S_IRUGO, xsegbd_dstport_show, NULL);
752 static DEVICE_ATTR(id , S_IRUGO, xsegbd_id_show, NULL);
753 static DEVICE_ATTR(reqs , S_IRUGO, xsegbd_reqs_show, NULL);
754 static DEVICE_ATTR(target, S_IRUGO, xsegbd_target_show, NULL);
755 static DEVICE_ATTR(refresh , S_IWUSR, NULL, xsegbd_image_refresh);
756 static DEVICE_ATTR(cleanup , S_IWUSR, NULL, xsegbd_cleanup);
757
758 static struct attribute *xsegbd_attrs[] = {
759         &dev_attr_size.attr,
760         &dev_attr_major.attr,
761         &dev_attr_srcport.attr,
762         &dev_attr_dstport.attr,
763         &dev_attr_id.attr,
764         &dev_attr_reqs.attr,
765         &dev_attr_target.attr,
766         &dev_attr_refresh.attr,
767         &dev_attr_cleanup.attr,
768         NULL
769 };
770
771 static struct attribute_group xsegbd_attr_group = {
772         .attrs = xsegbd_attrs,
773 };
774
775 static const struct attribute_group *xsegbd_attr_groups[] = {
776         &xsegbd_attr_group,
777         NULL
778 };
779
780 static void xsegbd_sysfs_dev_release(struct device *dev)
781 {
782 }
783
784 static struct device_type xsegbd_device_type = {
785         .name           = "xsegbd",
786         .groups         = xsegbd_attr_groups,
787         .release        = xsegbd_sysfs_dev_release,
788 };
789
790 static void xsegbd_root_dev_release(struct device *dev)
791 {
792 }
793
794 static struct device xsegbd_root_dev = {
795         .init_name      = "xsegbd",
796         .release        = xsegbd_root_dev_release,
797 };
798
799 static int xsegbd_bus_add_dev(struct xsegbd_device *xsegbd_dev)
800 {
801         int ret = -ENOMEM;
802         struct device *dev;
803
804         mutex_lock_nested(&xsegbd_mutex, SINGLE_DEPTH_NESTING);
805         dev = &xsegbd_dev->dev;
806
807         dev->bus = &xsegbd_bus_type;
808         dev->type = &xsegbd_device_type;
809         dev->parent = &xsegbd_root_dev;
810         dev->release = xsegbd_dev_release;
811         dev_set_name(dev, "%d", xsegbd_dev->id);
812
813         ret = device_register(dev);
814
815         mutex_unlock(&xsegbd_mutex);
816         return ret;
817 }
818
819 static void xsegbd_bus_del_dev(struct xsegbd_device *xsegbd_dev)
820 {
821         device_unregister(&xsegbd_dev->dev);
822 }
823
824 static ssize_t xsegbd_add(struct bus_type *bus, const char *buf, size_t count)
825 {
826         struct xsegbd_device *xsegbd_dev;
827         struct xseg_port *port;
828         ssize_t ret = -ENOMEM;
829
830         if (!try_module_get(THIS_MODULE))
831                 return -ENODEV;
832
833         xsegbd_dev = kzalloc(sizeof(*xsegbd_dev), GFP_KERNEL);
834         if (!xsegbd_dev)
835                 goto out;
836
837         spin_lock_init(&xsegbd_dev->rqlock);
838         INIT_LIST_HEAD(&xsegbd_dev->node);
839
840         /* parse cmd */
841         if (sscanf(buf, "%" __stringify(XSEGBD_TARGET_NAMELEN) "s "
842                         "%d:%d:%d", xsegbd_dev->target, &xsegbd_dev->src_portno,
843                         &xsegbd_dev->dst_portno, &xsegbd_dev->nr_requests) < 3) {
844                 ret = -EINVAL;
845                 goto out_dev;
846         }
847         xsegbd_dev->targetlen = strlen(xsegbd_dev->target);
848
849         spin_lock(&xsegbd_devices_lock);
850         if (xsegbd_devices[xsegbd_dev->src_portno] != NULL) {
851                 ret = -EINVAL;
852                 goto out_unlock;
853         }
854         xsegbd_devices[xsegbd_dev->src_portno] = xsegbd_dev;
855         xsegbd_dev->id = xsegbd_dev->src_portno;
856         spin_unlock(&xsegbd_devices_lock);
857
858         XSEGLOG("registering block device major %d", major);
859         ret = register_blkdev(major, XSEGBD_NAME);
860         if (ret < 0) {
861                 XSEGLOG("cannot register block device!");
862                 ret = -EBUSY;
863                 goto out_delentry;
864         }
865         xsegbd_dev->major = ret;
866         XSEGLOG("registered block device major %d", xsegbd_dev->major);
867
868         ret = xsegbd_bus_add_dev(xsegbd_dev);
869         if (ret)
870                 goto out_blkdev;
871
872         if (!xq_alloc_seq(&xsegbd_dev->blk_queue_pending, 
873                                 xsegbd_dev->nr_requests,
874                                 xsegbd_dev->nr_requests))
875                 goto out_bus;
876
877         xsegbd_dev->blk_req_pending = kzalloc(
878                         xsegbd_dev->nr_requests *sizeof(struct xsegbd_pending),
879                                    GFP_KERNEL);
880         if (!xsegbd_dev->blk_req_pending)
881                 goto out_freeq;
882
883         
884         XSEGLOG("joining segment");
885         //FIXME use xsebd module config for now
886         xsegbd_dev->xseg = xseg_join(   xsegbd.config.type,
887                                         xsegbd.config.name,
888                                         "segdev",
889                                         xseg_callback           );
890         if (!xsegbd_dev->xseg)
891                 goto out_freepending;
892         
893
894         XSEGLOG("binding to source port %u (destination %u)",
895                         xsegbd_dev->src_portno, xsegbd_dev->dst_portno);
896         port = xseg_bind_port(xsegbd_dev->xseg, xsegbd_dev->src_portno);
897         if (!port) {
898                 XSEGLOG("cannot bind to port");
899                 ret = -EFAULT;
900
901                 goto out_xseg;
902         }
903         
904         if (xsegbd_dev->src_portno != xseg_portno(xsegbd_dev->xseg, port)) {
905                 XSEGLOG("portno != xsegbd_dev->src_portno");
906                 BUG_ON(1);
907                 ret = -EFAULT;
908                 goto out_xseg;
909         }
910         
911         /* make sure we don't get any requests until we're ready to handle them */
912         xseg_cancel_wait(xsegbd_dev->xseg, xseg_portno(xsegbd_dev->xseg, port));
913
914         ret = xsegbd_dev_init(xsegbd_dev);
915         if (ret)
916                 goto out_xseg;
917
918         xseg_prepare_wait(xsegbd_dev->xseg, xseg_portno(xsegbd_dev->xseg, port));
919         return count;
920
921 out_xseg:
922         xseg_leave(xsegbd_dev->xseg);
923         
924 out_freepending:
925         kfree(xsegbd_dev->blk_req_pending);
926
927 out_freeq:
928         xq_free(&xsegbd_dev->blk_queue_pending);
929
930 out_bus:
931         xsegbd_bus_del_dev(xsegbd_dev);
932         return ret;
933
934 out_blkdev:
935         unregister_blkdev(xsegbd_dev->major, XSEGBD_NAME);
936
937 out_delentry:
938         spin_lock(&xsegbd_devices_lock);
939         xsegbd_devices[xsegbd_dev->src_portno] = NULL;
940
941 out_unlock:
942         spin_unlock(&xsegbd_devices_lock);
943
944 out_dev:
945         kfree(xsegbd_dev);
946
947 out:
948         return ret;
949 }
950
951 static ssize_t xsegbd_remove(struct bus_type *bus, const char *buf, size_t count)
952 {
953         struct xsegbd_device *xsegbd_dev = NULL;
954         int id, ret;
955         unsigned long ul_id;
956
957         ret = strict_strtoul(buf, 10, &ul_id);
958         if (ret)
959                 return ret;
960
961         id = (int) ul_id;
962         if (id != ul_id)
963                 return -EINVAL;
964
965         mutex_lock_nested(&xsegbd_mutex, SINGLE_DEPTH_NESTING);
966
967         ret = count;
968         xsegbd_dev = __xsegbd_get_dev(id);
969         if (!xsegbd_dev) {
970                 ret = -ENOENT;
971                 goto out_unlock;
972         }
973         xsegbd_bus_del_dev(xsegbd_dev);
974
975 out_unlock:
976         mutex_unlock(&xsegbd_mutex);
977         return ret;
978 }
979
980 static struct bus_attribute xsegbd_bus_attrs[] = {
981         __ATTR(add, S_IWUSR, NULL, xsegbd_add),
982         __ATTR(remove, S_IWUSR, NULL, xsegbd_remove),
983         __ATTR_NULL
984 };
985
986 static int xsegbd_sysfs_init(void)
987 {
988         int ret;
989
990         ret = device_register(&xsegbd_root_dev);
991         if (ret < 0)
992                 return ret;
993
994         xsegbd_bus_type.bus_attrs = xsegbd_bus_attrs;
995         ret = bus_register(&xsegbd_bus_type);
996         if (ret < 0)
997                 device_unregister(&xsegbd_root_dev);
998
999         return ret;
1000 }
1001
1002 static void xsegbd_sysfs_cleanup(void)
1003 {
1004         bus_unregister(&xsegbd_bus_type);
1005         device_unregister(&xsegbd_root_dev);
1006 }
1007
1008 /* *************************** */
1009 /* ** Module Initialization ** */
1010 /* *************************** */
1011
1012 static int __init xsegbd_init(void)
1013 {
1014         int ret = -ENOMEM;
1015         xsegbd_devices = kzalloc(max_dev * sizeof(struct xsegbd_devices *), GFP_KERNEL);
1016         if (!xsegbd_devices)
1017                 goto out;
1018
1019         spin_lock_init(&xsegbd_devices_lock);
1020
1021         ret = -ENOSYS;
1022         ret = xsegbd_xseg_init();
1023         if (ret)
1024                 goto out_free;
1025
1026         ret = xsegbd_sysfs_init();
1027         if (ret)
1028                 goto out_xseg;
1029
1030         XSEGLOG("initialization complete");
1031
1032 out:
1033         return ret;
1034
1035 out_xseg:
1036         xsegbd_xseg_quit();
1037         
1038 out_free:
1039         kfree(xsegbd_devices);
1040
1041         goto out;
1042 }
1043
1044 static void __exit xsegbd_exit(void)
1045 {
1046         xsegbd_sysfs_cleanup();
1047         xsegbd_xseg_quit();
1048 }
1049
1050 module_init(xsegbd_init);
1051 module_exit(xsegbd_exit);
1052