add one more check in xsegbd
[archipelago] / xseg / peers / kernel / xsegbd.c
1 /* xsegbd.c
2  *
3  */
4
5 #include <linux/module.h>
6 #include <linux/moduleparam.h>
7 #include <linux/init.h>
8 #include <linux/sched.h>
9 #include <linux/kernel.h>
10 #include <linux/slab.h>
11 #include <linux/fs.h>
12 #include <linux/errno.h>
13 #include <linux/timer.h>
14 #include <linux/types.h>
15 #include <linux/vmalloc.h>
16 #include <linux/genhd.h>
17 #include <linux/blkdev.h>
18 #include <linux/bio.h>
19 #include <linux/device.h>
20 #include <linux/completion.h>
21
22 #include <sys/kernel/segdev.h>
23 #include "xsegbd.h"
24
25 #define XSEGBD_MINORS 1
26 /* define max request size to be used in xsegbd */
27 //FIXME should we make this 4MB instead of 256KB ?
28 #define XSEGBD_MAX_REQUEST_SIZE 262144U
29
30 MODULE_DESCRIPTION("xsegbd");
31 MODULE_AUTHOR("XSEG");
32 MODULE_LICENSE("GPL");
33
34 static long sector_size = 0;
35 static long blksize = 512;
36 static int major = 0;
37 static int max_dev = 1024;
38 static char name[XSEGBD_SEGMENT_NAMELEN] = "xsegbd";
39 static char spec[256] = "segdev:xsegbd:4:1024:12";
40
41 module_param(sector_size, long, 0644);
42 module_param(blksize, long, 0644);
43 module_param(max_dev, int, 0644);
44 module_param(major, int, 0644);
45 module_param_string(name, name, sizeof(name), 0644);
46 module_param_string(spec, spec, sizeof(spec), 0644);
47
48 static struct xsegbd xsegbd;
49 static struct xsegbd_device **xsegbd_devices; /* indexed by portno */
50 static DEFINE_MUTEX(xsegbd_mutex);
51 static DEFINE_SPINLOCK(xsegbd_devices_lock);
52
53
54
55 static struct xsegbd_device *__xsegbd_get_dev(unsigned long id)
56 {
57         struct xsegbd_device *xsegbd_dev = NULL;
58
59         spin_lock(&xsegbd_devices_lock);
60         xsegbd_dev = xsegbd_devices[id];
61         spin_unlock(&xsegbd_devices_lock);
62
63         return xsegbd_dev;
64 }
65
66 /* ************************* */
67 /* ***** sysfs helpers ***** */
68 /* ************************* */
69
70 static struct xsegbd_device *dev_to_xsegbd(struct device *dev)
71 {
72         return container_of(dev, struct xsegbd_device, dev);
73 }
74
75 static struct device *xsegbd_get_dev(struct xsegbd_device *xsegbd_dev)
76 {
77         /* FIXME */
78         return get_device(&xsegbd_dev->dev);
79 }
80
81 static void xsegbd_put_dev(struct xsegbd_device *xsegbd_dev)
82 {
83         put_device(&xsegbd_dev->dev);
84 }
85
86 /* ************************* */
87 /* ** XSEG Initialization ** */
88 /* ************************* */
89
90 static void xseg_callback(uint32_t portno);
91
92 int xsegbd_xseg_init(void)
93 {
94         int r;
95
96         if (!xsegbd.name[0])
97                 strncpy(xsegbd.name, name, XSEGBD_SEGMENT_NAMELEN);
98
99         r = xseg_initialize();
100         if (r) {
101                 XSEGLOG("cannot initialize 'segdev' peer");
102                 goto err;
103         }
104
105         r = xseg_parse_spec(spec, &xsegbd.config);
106         if (r)
107                 goto err;
108
109         if (strncmp(xsegbd.config.type, "segdev", 16))
110                 XSEGLOG("WARNING: unexpected segment type '%s' vs 'segdev'",
111                          xsegbd.config.type);
112
113         /* leave it here for now */
114         XSEGLOG("joining segment");
115         xsegbd.xseg = xseg_join(        xsegbd.config.type,
116                                         xsegbd.config.name,
117                                         "segdev",
118                                         xseg_callback           );
119         if (!xsegbd.xseg) {
120                 XSEGLOG("cannot find segment");
121                 r = -ENODEV;
122                 goto err;
123         }
124
125         return 0;
126 err:
127         return r;
128
129 }
130
131 int xsegbd_xseg_quit(void)
132 {
133         struct segdev *segdev;
134
135         /* make sure to unmap the segment first */
136         segdev = segdev_get(0);
137         clear_bit(SEGDEV_RESERVED, &segdev->flags);
138         xsegbd.xseg->priv->segment_type.ops.unmap(xsegbd.xseg, xsegbd.xseg->segment_size);
139         segdev_put(segdev);
140
141         return 0;
142 }
143
144
145 /* ***************************** */
146 /* ** Block Device Operations ** */
147 /* ***************************** */
148
149 static int xsegbd_open(struct block_device *bdev, fmode_t mode)
150 {
151         struct gendisk *disk = bdev->bd_disk;
152         struct xsegbd_device *xsegbd_dev = disk->private_data;
153
154         xsegbd_get_dev(xsegbd_dev);
155
156         return 0;
157 }
158
159 static int xsegbd_release(struct gendisk *gd, fmode_t mode)
160 {
161         struct xsegbd_device *xsegbd_dev = gd->private_data;
162
163         xsegbd_put_dev(xsegbd_dev);
164
165         return 0;
166 }
167
168 static int xsegbd_ioctl(struct block_device *bdev, fmode_t mode,
169                         unsigned int cmd, unsigned long arg)
170 {
171         return -ENOTTY;
172 }
173
174 static const struct block_device_operations xsegbd_ops = {
175         .owner          = THIS_MODULE,
176         .open           = xsegbd_open,
177         .release        = xsegbd_release,
178         .ioctl          = xsegbd_ioctl 
179 };
180
181
182 /* *************************** */
183 /* ** Device Initialization ** */
184 /* *************************** */
185
186 static void xseg_request_fn(struct request_queue *rq);
187 static int xsegbd_get_size(struct xsegbd_device *xsegbd_dev);
188
189 static int xsegbd_dev_init(struct xsegbd_device *xsegbd_dev)
190 {
191         int ret = -ENOMEM;
192         struct gendisk *disk;
193         unsigned int max_request_size_bytes;
194
195         spin_lock_init(&xsegbd_dev->rqlock);
196
197         xsegbd_dev->xsegbd = &xsegbd;
198
199         xsegbd_dev->blk_queue = blk_alloc_queue(GFP_KERNEL);
200         if (!xsegbd_dev->blk_queue)
201                 goto out;
202
203         if (!blk_init_allocated_queue(xsegbd_dev->blk_queue, 
204                         xseg_request_fn, &xsegbd_dev->rqlock))
205                 goto outqueue;
206
207         xsegbd_dev->blk_queue->queuedata = xsegbd_dev;
208
209         blk_queue_flush(xsegbd_dev->blk_queue, REQ_FLUSH | REQ_FUA);
210         blk_queue_logical_block_size(xsegbd_dev->blk_queue, 512);
211         blk_queue_physical_block_size(xsegbd_dev->blk_queue, blksize);
212         blk_queue_bounce_limit(xsegbd_dev->blk_queue, BLK_BOUNCE_ANY);
213         
214         //blk_queue_max_segments(dev->blk_queue, 512);
215
216         max_request_size_bytes = XSEGBD_MAX_REQUEST_SIZE;
217         blk_queue_max_hw_sectors(xsegbd_dev->blk_queue, max_request_size_bytes >> 9);
218         blk_queue_max_segment_size(xsegbd_dev->blk_queue, max_request_size_bytes);
219         blk_queue_io_min(xsegbd_dev->blk_queue, max_request_size_bytes);
220         blk_queue_io_opt(xsegbd_dev->blk_queue, max_request_size_bytes);
221
222         queue_flag_set_unlocked(QUEUE_FLAG_NONROT, xsegbd_dev->blk_queue);
223
224         /* vkoukis says we don't need partitions */
225         xsegbd_dev->gd = disk = alloc_disk(1);
226         if (!disk)
227                 goto outqueue;
228
229         disk->major = xsegbd_dev->major;
230         disk->first_minor = 0; // id * XSEGBD_MINORS;
231         disk->fops = &xsegbd_ops;
232         disk->queue = xsegbd_dev->blk_queue;
233         disk->private_data = xsegbd_dev;
234         disk->flags |= GENHD_FL_SUPPRESS_PARTITION_INFO;
235         snprintf(disk->disk_name, 32, "xsegbd%u", xsegbd_dev->id);
236
237         ret = 0;
238         
239         /* allow a non-zero sector_size parameter to override the disk size */
240         if (sector_size)
241                 xsegbd_dev->sectors = sector_size;
242         else {
243                 ret = xsegbd_get_size(xsegbd_dev);
244                 if (ret)
245                         goto outdisk;
246         }
247
248         set_capacity(disk, xsegbd_dev->sectors);
249         XSEGLOG("xsegbd active...");
250         add_disk(disk); /* immediately activates the device */
251
252         return 0;
253
254
255 outdisk:
256         put_disk(xsegbd_dev->gd);
257 outqueue:
258         blk_cleanup_queue(xsegbd_dev->blk_queue);
259 out:
260         xsegbd_dev->gd = NULL;
261         return ret;
262 }
263
264 static void xsegbd_dev_release(struct device *dev)
265 {
266         struct xsegbd_device *xsegbd_dev = dev_to_xsegbd(dev);
267         
268         xseg_cancel_wait(xsegbd_dev->xseg, xsegbd_dev->src_portno);
269
270         /* cleanup gendisk and blk_queue the right way */
271         if (xsegbd_dev->gd) {
272                 if (xsegbd_dev->gd->flags & GENHD_FL_UP)
273                         del_gendisk(xsegbd_dev->gd);
274
275                 blk_cleanup_queue(xsegbd_dev->blk_queue);
276                 put_disk(xsegbd_dev->gd);
277         }
278
279 //      if (xseg_free_requests(xsegbd_dev->xseg, 
280 //                      xsegbd_dev->src_portno, xsegbd_dev->nr_requests) < 0)
281 //              XSEGLOG("Error trying to free requests!\n");
282
283
284         unregister_blkdev(xsegbd_dev->major, XSEGBD_NAME);
285
286         spin_lock(&xsegbd_devices_lock);
287         BUG_ON(xsegbd_devices[xsegbd_dev->src_portno] != xsegbd_dev);
288         xsegbd_devices[xsegbd_dev->src_portno] = NULL;
289         spin_unlock(&xsegbd_devices_lock);
290
291         if (xsegbd_dev->blk_req_pending)
292                 kfree(xsegbd_dev->blk_req_pending);
293         xq_free(&xsegbd_dev->blk_queue_pending);
294
295         kfree(xsegbd_dev);
296
297         module_put(THIS_MODULE);
298 }
299
300 /* ******************* */
301 /* ** Critical Path ** */
302 /* ******************* */
303
304 static void blk_to_xseg(struct xseg *xseg, struct xseg_request *xreq,
305                         struct request *blkreq)
306 {
307         struct bio_vec *bvec;
308         struct req_iterator iter;
309         uint64_t off = 0;
310         char *data = xseg_get_data(xseg, xreq);
311         rq_for_each_segment(bvec, blkreq, iter) {
312                 char *bdata = kmap_atomic(bvec->bv_page) + bvec->bv_offset;
313                 memcpy(data + off, bdata, bvec->bv_len);
314                 off += bvec->bv_len;
315                 kunmap_atomic(bdata);
316         }
317 }
318
319 static void xseg_to_blk(struct xseg *xseg, struct xseg_request *xreq,
320                         struct request *blkreq)
321 {
322         struct bio_vec *bvec;
323         struct req_iterator iter;
324         uint64_t off = 0;
325         char *data = xseg_get_data(xseg, xreq);
326         rq_for_each_segment(bvec, blkreq, iter) {
327                 char *bdata = kmap_atomic(bvec->bv_page) + bvec->bv_offset;
328                 memcpy(bdata, data + off, bvec->bv_len);
329                 off += bvec->bv_len;
330                 kunmap_atomic(bdata);
331         }
332 }
333
334 static void xseg_request_fn(struct request_queue *rq)
335 {
336         struct xseg_request *xreq;
337         struct xsegbd_device *xsegbd_dev = rq->queuedata;
338         struct request *blkreq;
339         struct xsegbd_pending *pending;
340         xqindex blkreq_idx;
341         char *target;
342         uint64_t datalen;
343         xport p;
344         int r;
345
346         for (;;) {
347                 blkreq_idx = Noneidx;
348                 xreq = xseg_get_request(xsegbd_dev->xseg, xsegbd_dev->src_portno, 
349                                 xsegbd_dev->dst_portno, X_ALLOC);
350                 if (!xreq)
351                         break;
352
353                 blkreq_idx = xq_pop_head(&xsegbd_dev->blk_queue_pending, 
354                                                 xsegbd_dev->src_portno);
355                 if (blkreq_idx == Noneidx)
356                         break;
357                 
358                 if (blkreq_idx >= xsegbd_dev->nr_requests) {
359                         XSEGLOG("blkreq_idx >= xsegbd_dev->nr_requests");
360                         BUG_ON(1);
361                         break;
362                 }
363
364                 blkreq = blk_fetch_request(rq);
365                 if (!blkreq)
366                         break;
367
368                 if (blkreq->cmd_type != REQ_TYPE_FS) {
369                         //we lose xreq here
370                         XSEGLOG("non-fs cmd_type: %u. *shrug*", blkreq->cmd_type);
371                         __blk_end_request_all(blkreq, 0);
372                         continue;
373                 }
374
375                 datalen = blk_rq_bytes(blkreq);
376                 r = xseg_prep_request(xsegbd_dev->xseg, xreq, 
377                                         xsegbd_dev->targetlen, datalen);
378                 if (r < 0) {
379                         XSEGLOG("couldn't prep request");
380                         __blk_end_request_err(blkreq, r);
381                         BUG_ON(1);
382                         break;
383                 }
384                 r = -ENOMEM;
385                 if (xreq->bufferlen - xsegbd_dev->targetlen < datalen){
386                         XSEGLOG("malformed req buffers");
387                         __blk_end_request_err(blkreq, r);
388                         BUG_ON(1);
389                         break;
390                 }
391
392                 target = xseg_get_target(xsegbd_dev->xseg, xreq);
393                 strncpy(target, xsegbd_dev->target, xsegbd_dev->targetlen);
394
395                 pending = &xsegbd_dev->blk_req_pending[blkreq_idx];
396                 pending->dev = xsegbd_dev;
397                 pending->request = blkreq;
398                 pending->comp = NULL;
399                 
400                 xreq->size = datalen;
401                 xreq->offset = blk_rq_pos(blkreq) << 9;
402                 xreq->priv = (uint64_t) blkreq_idx;
403
404                 /*
405                 if (xreq->offset >= (sector_size << 9))
406                         XSEGLOG("sector offset: %lu > %lu, flush:%u, fua:%u",
407                                  blk_rq_pos(blkreq), sector_size,
408                                  blkreq->cmd_flags & REQ_FLUSH,
409                                  blkreq->cmd_flags & REQ_FUA);
410                 */
411
412                 if (blkreq->cmd_flags & REQ_FLUSH)
413                         xreq->flags |= XF_FLUSH;
414
415                 if (blkreq->cmd_flags & REQ_FUA)
416                         xreq->flags |= XF_FUA;
417
418                 if (rq_data_dir(blkreq)) {
419                         /* unlock for data transfers? */
420                         blk_to_xseg(xsegbd_dev->xseg, xreq, blkreq);
421                         xreq->op = X_WRITE;
422                 } else {
423                         xreq->op = X_READ;
424                 }
425
426
427                 r = -EIO;
428                 p = xseg_submit(xsegbd_dev->xseg, xreq, 
429                                         xsegbd_dev->src_portno, X_ALLOC);
430                 if (p == NoPort) {
431                         XSEGLOG("coundn't submit req");
432                         BUG_ON(1);
433                         __blk_end_request_err(blkreq, r);
434                         break;
435                 }
436                 WARN_ON(xseg_signal(xsegbd_dev->xsegbd->xseg, p) < 0);
437         }
438         if (xreq)
439                 BUG_ON(xseg_put_request(xsegbd_dev->xsegbd->xseg, xreq, 
440                                         xsegbd_dev->src_portno) == -1);
441         if (blkreq_idx != Noneidx)
442                 BUG_ON(xq_append_head(&xsegbd_dev->blk_queue_pending, 
443                                 blkreq_idx, xsegbd_dev->src_portno) == Noneidx);
444 }
445
446 int update_dev_sectors_from_request(    struct xsegbd_device *xsegbd_dev,
447                                         struct xseg_request *xreq       )
448 {
449         void *data;
450         if (!xreq) {
451                 XSEGLOG("Invalid xreq");
452                 return -EIO;
453         }
454
455         if (xreq->state & XS_FAILED)
456                 return -ENOENT;
457
458         if (!(xreq->state & XS_SERVED))
459                 return -EIO;
460
461         data = xseg_get_data(xsegbd_dev->xseg, xreq);
462         if (!data) {
463                 XSEGLOG("Invalid req data");
464                 return -EIO;
465         }
466         if (!xsegbd_dev) {
467                 XSEGLOG("Invalid xsegbd_dev");
468                 return -ENOENT;
469         }
470         xsegbd_dev->sectors = *((uint64_t *) data) / 512ULL;
471         return 0;
472 }
473
474 static int xsegbd_get_size(struct xsegbd_device *xsegbd_dev)
475 {
476         struct xseg_request *xreq;
477         char *target;
478         uint64_t datalen;
479         xqindex blkreq_idx;
480         struct xsegbd_pending *pending;
481         struct completion comp;
482         xport p;
483         void *data;
484         int ret = -EBUSY, r;
485         xreq = xseg_get_request(xsegbd_dev->xseg, xsegbd_dev->src_portno,
486                         xsegbd_dev->dst_portno, X_ALLOC);
487         if (!xreq)
488                 goto out;
489
490         datalen = sizeof(uint64_t);
491         BUG_ON(xseg_prep_request(xsegbd_dev->xseg, xreq, xsegbd_dev->targetlen, datalen));
492         BUG_ON(xreq->bufferlen - xsegbd_dev->targetlen < datalen);
493
494         init_completion(&comp);
495         blkreq_idx = xq_pop_head(&xsegbd_dev->blk_queue_pending, 1);
496         if (blkreq_idx == Noneidx)
497                 goto out;
498         
499         pending = &xsegbd_dev->blk_req_pending[blkreq_idx];
500         pending->dev = xsegbd_dev;
501         pending->request = NULL;
502         pending->comp = &comp;
503
504         
505         xreq->priv = (uint64_t) blkreq_idx;
506
507         target = xseg_get_target(xsegbd_dev->xseg, xreq);
508         strncpy(target, xsegbd_dev->target, xsegbd_dev->targetlen);
509         xreq->size = datalen;
510         xreq->offset = 0;
511         xreq->op = X_INFO;
512
513         xseg_prepare_wait(xsegbd_dev->xseg, xsegbd_dev->src_portno);
514         p = xseg_submit(xsegbd_dev->xseg, xreq, 
515                                 xsegbd_dev->src_portno, X_ALLOC);
516         if ( p == NoPort) {
517                 XSEGLOG("couldn't submit request");
518                 BUG_ON(1);
519                 goto out_queue;
520         }
521         WARN_ON(xseg_signal(xsegbd_dev->xseg, p) < 0);
522         XSEGLOG("Before wait for completion, comp %lx [%llu]", (unsigned long) pending->comp, (unsigned long long) blkreq_idx);
523         wait_for_completion_interruptible(&comp);
524         XSEGLOG("Woken up after wait_for_completion_interruptible(), comp: %lx [%llu]", (unsigned long) pending->comp, (unsigned long long) blkreq_idx);
525         ret = update_dev_sectors_from_request(xsegbd_dev, xreq);
526         //XSEGLOG("get_size: sectors = %ld\n", (long)xsegbd_dev->sectors);
527 out:
528         pending->dev = NULL;
529         pending->comp = NULL;
530         BUG_ON(xseg_put_request(xsegbd_dev->xseg, xreq, xsegbd_dev->src_portno) == -1);
531         return ret;
532
533 out_queue:
534         xq_append_head(&xsegbd_dev->blk_queue_pending, blkreq_idx, 1);
535         
536         goto out;
537 }
538
539 static void xseg_callback(xport portno)
540 {
541         struct xsegbd_device *xsegbd_dev;
542         struct xseg_request *xreq;
543         struct request *blkreq;
544         struct xsegbd_pending *pending;
545         unsigned long flags;
546         xqindex blkreq_idx, ridx;
547         int err;
548         void *data;
549
550         xsegbd_dev  = __xsegbd_get_dev(portno);
551         if (!xsegbd_dev) {
552                 XSEGLOG("portno: %u has no xsegbd device assigned", portno);
553                 WARN_ON(1);
554                 return;
555         }
556
557         for (;;) {
558                 xseg_prepare_wait(xsegbd_dev->xseg, xsegbd_dev->src_portno);
559                 xreq = xseg_receive(xsegbd_dev->xseg, portno);
560                 if (!xreq)
561                         break;
562
563                 xseg_cancel_wait(xsegbd_dev->xseg, xsegbd_dev->src_portno);
564
565                 blkreq_idx = (xqindex) xreq->priv;
566                 if (blkreq_idx >= xsegbd_dev->nr_requests) {
567                         WARN_ON(1);
568                         //FIXME maybe put request?
569                         continue;
570                 }
571
572                 pending = &xsegbd_dev->blk_req_pending[blkreq_idx];
573                 if (pending->comp) {
574                         /* someone is blocking on this request
575                            and will handle it when we wake them up. */
576                         complete(pending->comp);
577                         /* the request is blocker's responsibility so
578                            we will not put_request(); */
579                         continue;
580                 }
581
582                 /* this is now treated as a block I/O request to end */
583                 blkreq = pending->request;
584                 pending->request = NULL;
585                 if (xsegbd_dev != pending->dev) {
586                         //FIXME maybe put request?
587                         XSEGLOG("xsegbd_dev != pending->dev");
588                         BUG_ON(1);
589                         continue;
590                 }
591                 pending->dev = NULL;
592                 if (!blkreq){
593                         //FIXME maybe put request?
594                         XSEGLOG("blkreq does not exist");
595                         BUG_ON(1);
596                         continue;
597                 }
598
599                 err = -EIO;
600                 if (!(xreq->state & XS_SERVED))
601                         goto blk_end;
602
603                 if (xreq->serviced != blk_rq_bytes(blkreq))
604                         goto blk_end;
605
606                 err = 0;
607                 /* unlock for data transfer? */
608                 if (!rq_data_dir(blkreq)){
609                         xseg_to_blk(xsegbd_dev->xseg, xreq, blkreq);
610                 }       
611 blk_end:
612                 blk_end_request_all(blkreq, err);
613                 
614                 ridx = xq_append_head(&xsegbd_dev->blk_queue_pending, 
615                                         blkreq_idx, xsegbd_dev->src_portno);
616                 if (ridx == Noneidx) {
617                         XSEGLOG("couldnt append blkreq_idx");
618                         WARN_ON(1);
619                 }
620
621                 if (xseg_put_request(xsegbd_dev->xseg, xreq, 
622                                                 xsegbd_dev->src_portno) < 0){
623                         XSEGLOG("couldn't put req");
624                         BUG_ON(1);
625                 }
626         }
627
628         if (xsegbd_dev) {
629                 spin_lock_irqsave(&xsegbd_dev->rqlock, flags);
630                 xseg_request_fn(xsegbd_dev->blk_queue);
631                 spin_unlock_irqrestore(&xsegbd_dev->rqlock, flags);
632         }
633 }
634
635
636 /* sysfs interface */
637
638 static struct bus_type xsegbd_bus_type = {
639         .name   = "xsegbd",
640 };
641
642 static ssize_t xsegbd_size_show(struct device *dev,
643                                         struct device_attribute *attr, char *buf)
644 {
645         struct xsegbd_device *xsegbd_dev = dev_to_xsegbd(dev);
646
647         return sprintf(buf, "%llu\n", (unsigned long long) xsegbd_dev->sectors * 512ULL);
648 }
649
650 static ssize_t xsegbd_major_show(struct device *dev,
651                                         struct device_attribute *attr, char *buf)
652 {
653         struct xsegbd_device *xsegbd_dev = dev_to_xsegbd(dev);
654
655         return sprintf(buf, "%d\n", xsegbd_dev->major);
656 }
657
658 static ssize_t xsegbd_srcport_show(struct device *dev,
659                                         struct device_attribute *attr, char *buf)
660 {
661         struct xsegbd_device *xsegbd_dev = dev_to_xsegbd(dev);
662
663         return sprintf(buf, "%u\n", (unsigned) xsegbd_dev->src_portno);
664 }
665
666 static ssize_t xsegbd_dstport_show(struct device *dev,
667                                         struct device_attribute *attr, char *buf)
668 {
669         struct xsegbd_device *xsegbd_dev = dev_to_xsegbd(dev);
670
671         return sprintf(buf, "%u\n", (unsigned) xsegbd_dev->dst_portno);
672 }
673
674 static ssize_t xsegbd_id_show(struct device *dev,
675                                         struct device_attribute *attr, char *buf)
676 {
677         struct xsegbd_device *xsegbd_dev = dev_to_xsegbd(dev);
678
679         return sprintf(buf, "%u\n", (unsigned) xsegbd_dev->id);
680 }
681
682 static ssize_t xsegbd_reqs_show(struct device *dev,
683                                         struct device_attribute *attr, char *buf)
684 {
685         struct xsegbd_device *xsegbd_dev = dev_to_xsegbd(dev);
686
687         return sprintf(buf, "%u\n", (unsigned) xsegbd_dev->nr_requests);
688 }
689
690 static ssize_t xsegbd_target_show(struct device *dev,
691                                         struct device_attribute *attr, char *buf)
692 {
693         struct xsegbd_device *xsegbd_dev = dev_to_xsegbd(dev);
694
695         return sprintf(buf, "%s\n", xsegbd_dev->target);
696 }
697
698 static ssize_t xsegbd_image_refresh(struct device *dev,
699                                         struct device_attribute *attr,
700                                         const char *buf,
701                                         size_t size)
702 {
703         struct xsegbd_device *xsegbd_dev = dev_to_xsegbd(dev);
704         int rc, ret = size;
705
706         mutex_lock_nested(&xsegbd_mutex, SINGLE_DEPTH_NESTING);
707
708         rc = xsegbd_get_size(xsegbd_dev);
709         if (rc < 0) {
710                 ret = rc;
711                 goto out;
712         }
713
714         set_capacity(xsegbd_dev->gd, xsegbd_dev->sectors);
715
716 out:
717         mutex_unlock(&xsegbd_mutex);
718         return ret;
719 }
720
721 static ssize_t xsegbd_cleanup(struct device *dev,
722                                         struct device_attribute *attr,
723                                         const char *buf,
724                                         size_t size)
725 {
726         struct xsegbd_device *xsegbd_dev = dev_to_xsegbd(dev);
727         int ret = size, i;
728         struct request *blkreq = NULL;
729         struct xsegbd_pending *pending = NULL;
730         struct completion *comp = NULL;
731
732         mutex_lock_nested(&xsegbd_mutex, SINGLE_DEPTH_NESTING);
733         xlock_acquire(&xsegbd_dev->blk_queue_pending.lock, 
734                                 xsegbd_dev->src_portno);
735         for (i = 0; i < xsegbd_dev->nr_requests; i++) {
736                 if (!__xq_check(&xsegbd_dev->blk_queue_pending, i)) {
737                         pending = &xsegbd_dev->blk_req_pending[i];
738                         blkreq = pending->request;
739                         pending->request = NULL;
740                         comp = pending->comp;
741                         pending->comp = NULL;
742                         if (blkreq){
743                                 XSEGLOG("Cleaning up blkreq %lx [%d]", (unsigned long) blkreq, i);
744                                 blk_end_request_all(blkreq, -EIO);
745                         }
746                         if (comp){
747                                 XSEGLOG("Cleaning up comp %lx [%d]", (unsigned long) comp, i);
748                                 complete(comp);
749                         }
750                         __xq_append_tail(&xsegbd_dev->blk_queue_pending, i);
751                 }
752         }
753         xlock_release(&xsegbd_dev->blk_queue_pending.lock);
754
755         mutex_unlock(&xsegbd_mutex);
756         return ret;
757 }
758
759 static DEVICE_ATTR(size, S_IRUGO, xsegbd_size_show, NULL);
760 static DEVICE_ATTR(major, S_IRUGO, xsegbd_major_show, NULL);
761 static DEVICE_ATTR(srcport, S_IRUGO, xsegbd_srcport_show, NULL);
762 static DEVICE_ATTR(dstport, S_IRUGO, xsegbd_dstport_show, NULL);
763 static DEVICE_ATTR(id , S_IRUGO, xsegbd_id_show, NULL);
764 static DEVICE_ATTR(reqs , S_IRUGO, xsegbd_reqs_show, NULL);
765 static DEVICE_ATTR(target, S_IRUGO, xsegbd_target_show, NULL);
766 static DEVICE_ATTR(refresh , S_IWUSR, NULL, xsegbd_image_refresh);
767 static DEVICE_ATTR(cleanup , S_IWUSR, NULL, xsegbd_cleanup);
768
769 static struct attribute *xsegbd_attrs[] = {
770         &dev_attr_size.attr,
771         &dev_attr_major.attr,
772         &dev_attr_srcport.attr,
773         &dev_attr_dstport.attr,
774         &dev_attr_id.attr,
775         &dev_attr_reqs.attr,
776         &dev_attr_target.attr,
777         &dev_attr_refresh.attr,
778         &dev_attr_cleanup.attr,
779         NULL
780 };
781
782 static struct attribute_group xsegbd_attr_group = {
783         .attrs = xsegbd_attrs,
784 };
785
786 static const struct attribute_group *xsegbd_attr_groups[] = {
787         &xsegbd_attr_group,
788         NULL
789 };
790
791 static void xsegbd_sysfs_dev_release(struct device *dev)
792 {
793 }
794
795 static struct device_type xsegbd_device_type = {
796         .name           = "xsegbd",
797         .groups         = xsegbd_attr_groups,
798         .release        = xsegbd_sysfs_dev_release,
799 };
800
801 static void xsegbd_root_dev_release(struct device *dev)
802 {
803 }
804
805 static struct device xsegbd_root_dev = {
806         .init_name      = "xsegbd",
807         .release        = xsegbd_root_dev_release,
808 };
809
810 static int xsegbd_bus_add_dev(struct xsegbd_device *xsegbd_dev)
811 {
812         int ret = -ENOMEM;
813         struct device *dev;
814
815         mutex_lock_nested(&xsegbd_mutex, SINGLE_DEPTH_NESTING);
816         dev = &xsegbd_dev->dev;
817
818         dev->bus = &xsegbd_bus_type;
819         dev->type = &xsegbd_device_type;
820         dev->parent = &xsegbd_root_dev;
821         dev->release = xsegbd_dev_release;
822         dev_set_name(dev, "%d", xsegbd_dev->id);
823
824         ret = device_register(dev);
825
826         mutex_unlock(&xsegbd_mutex);
827         return ret;
828 }
829
830 static void xsegbd_bus_del_dev(struct xsegbd_device *xsegbd_dev)
831 {
832         device_unregister(&xsegbd_dev->dev);
833 }
834
835 static ssize_t xsegbd_add(struct bus_type *bus, const char *buf, size_t count)
836 {
837         struct xsegbd_device *xsegbd_dev;
838         struct xseg_port *port;
839         ssize_t ret = -ENOMEM;
840
841         if (!try_module_get(THIS_MODULE))
842                 return -ENODEV;
843
844         xsegbd_dev = kzalloc(sizeof(*xsegbd_dev), GFP_KERNEL);
845         if (!xsegbd_dev)
846                 goto out;
847
848         spin_lock_init(&xsegbd_dev->rqlock);
849         INIT_LIST_HEAD(&xsegbd_dev->node);
850
851         /* parse cmd */
852         if (sscanf(buf, "%" __stringify(XSEGBD_TARGET_NAMELEN) "s "
853                         "%d:%d:%d", xsegbd_dev->target, &xsegbd_dev->src_portno,
854                         &xsegbd_dev->dst_portno, &xsegbd_dev->nr_requests) < 3) {
855                 ret = -EINVAL;
856                 goto out_dev;
857         }
858         xsegbd_dev->targetlen = strlen(xsegbd_dev->target);
859
860         spin_lock(&xsegbd_devices_lock);
861         if (xsegbd_devices[xsegbd_dev->src_portno] != NULL) {
862                 ret = -EINVAL;
863                 goto out_unlock;
864         }
865         xsegbd_devices[xsegbd_dev->src_portno] = xsegbd_dev;
866         xsegbd_dev->id = xsegbd_dev->src_portno;
867         spin_unlock(&xsegbd_devices_lock);
868
869         XSEGLOG("registering block device major %d", major);
870         ret = register_blkdev(major, XSEGBD_NAME);
871         if (ret < 0) {
872                 XSEGLOG("cannot register block device!");
873                 ret = -EBUSY;
874                 goto out_delentry;
875         }
876         xsegbd_dev->major = ret;
877         XSEGLOG("registered block device major %d", xsegbd_dev->major);
878
879         ret = xsegbd_bus_add_dev(xsegbd_dev);
880         if (ret)
881                 goto out_blkdev;
882
883         if (!xq_alloc_seq(&xsegbd_dev->blk_queue_pending, 
884                                 xsegbd_dev->nr_requests,
885                                 xsegbd_dev->nr_requests))
886                 goto out_bus;
887
888         xsegbd_dev->blk_req_pending = kzalloc(
889                         xsegbd_dev->nr_requests *sizeof(struct xsegbd_pending),
890                                    GFP_KERNEL);
891         if (!xsegbd_dev->blk_req_pending)
892                 goto out_freeq;
893
894         
895         XSEGLOG("joining segment");
896         //FIXME use xsebd module config for now
897         xsegbd_dev->xseg = xseg_join(   xsegbd.config.type,
898                                         xsegbd.config.name,
899                                         "segdev",
900                                         xseg_callback           );
901         if (!xsegbd_dev->xseg)
902                 goto out_freepending;
903         
904
905         XSEGLOG("binding to source port %u (destination %u)",
906                         xsegbd_dev->src_portno, xsegbd_dev->dst_portno);
907         port = xseg_bind_port(xsegbd_dev->xseg, xsegbd_dev->src_portno);
908         if (!port) {
909                 XSEGLOG("cannot bind to port");
910                 ret = -EFAULT;
911
912                 goto out_xseg;
913         }
914         
915         if (xsegbd_dev->src_portno != xseg_portno(xsegbd_dev->xseg, port)) {
916                 XSEGLOG("portno != xsegbd_dev->src_portno");
917                 BUG_ON(1);
918                 ret = -EFAULT;
919                 goto out_xseg;
920         }
921         
922         /* make sure we don't get any requests until we're ready to handle them */
923         xseg_cancel_wait(xsegbd_dev->xseg, xseg_portno(xsegbd_dev->xseg, port));
924
925         ret = xsegbd_dev_init(xsegbd_dev);
926         if (ret)
927                 goto out_xseg;
928
929         xseg_prepare_wait(xsegbd_dev->xseg, xseg_portno(xsegbd_dev->xseg, port));
930         return count;
931
932 out_xseg:
933         xseg_leave(xsegbd_dev->xseg);
934         
935 out_freepending:
936         kfree(xsegbd_dev->blk_req_pending);
937
938 out_freeq:
939         xq_free(&xsegbd_dev->blk_queue_pending);
940
941 out_bus:
942         xsegbd_bus_del_dev(xsegbd_dev);
943         return ret;
944
945 out_blkdev:
946         unregister_blkdev(xsegbd_dev->major, XSEGBD_NAME);
947
948 out_delentry:
949         spin_lock(&xsegbd_devices_lock);
950         xsegbd_devices[xsegbd_dev->src_portno] = NULL;
951
952 out_unlock:
953         spin_unlock(&xsegbd_devices_lock);
954
955 out_dev:
956         kfree(xsegbd_dev);
957
958 out:
959         return ret;
960 }
961
962 static ssize_t xsegbd_remove(struct bus_type *bus, const char *buf, size_t count)
963 {
964         struct xsegbd_device *xsegbd_dev = NULL;
965         int id, ret;
966         unsigned long ul_id;
967
968         ret = strict_strtoul(buf, 10, &ul_id);
969         if (ret)
970                 return ret;
971
972         id = (int) ul_id;
973         if (id != ul_id)
974                 return -EINVAL;
975
976         mutex_lock_nested(&xsegbd_mutex, SINGLE_DEPTH_NESTING);
977
978         ret = count;
979         xsegbd_dev = __xsegbd_get_dev(id);
980         if (!xsegbd_dev) {
981                 ret = -ENOENT;
982                 goto out_unlock;
983         }
984         xsegbd_bus_del_dev(xsegbd_dev);
985
986 out_unlock:
987         mutex_unlock(&xsegbd_mutex);
988         return ret;
989 }
990
991 static struct bus_attribute xsegbd_bus_attrs[] = {
992         __ATTR(add, S_IWUSR, NULL, xsegbd_add),
993         __ATTR(remove, S_IWUSR, NULL, xsegbd_remove),
994         __ATTR_NULL
995 };
996
997 static int xsegbd_sysfs_init(void)
998 {
999         int ret;
1000
1001         ret = device_register(&xsegbd_root_dev);
1002         if (ret < 0)
1003                 return ret;
1004
1005         xsegbd_bus_type.bus_attrs = xsegbd_bus_attrs;
1006         ret = bus_register(&xsegbd_bus_type);
1007         if (ret < 0)
1008                 device_unregister(&xsegbd_root_dev);
1009
1010         return ret;
1011 }
1012
1013 static void xsegbd_sysfs_cleanup(void)
1014 {
1015         bus_unregister(&xsegbd_bus_type);
1016         device_unregister(&xsegbd_root_dev);
1017 }
1018
1019 /* *************************** */
1020 /* ** Module Initialization ** */
1021 /* *************************** */
1022
1023 static int __init xsegbd_init(void)
1024 {
1025         int ret = -ENOMEM;
1026         xsegbd_devices = kzalloc(max_dev * sizeof(struct xsegbd_devices *), GFP_KERNEL);
1027         if (!xsegbd_devices)
1028                 goto out;
1029
1030         spin_lock_init(&xsegbd_devices_lock);
1031
1032         ret = -ENOSYS;
1033         ret = xsegbd_xseg_init();
1034         if (ret)
1035                 goto out_free;
1036
1037         ret = xsegbd_sysfs_init();
1038         if (ret)
1039                 goto out_xseg;
1040
1041         XSEGLOG("initialization complete");
1042
1043 out:
1044         return ret;
1045
1046 out_xseg:
1047         xsegbd_xseg_quit();
1048         
1049 out_free:
1050         kfree(xsegbd_devices);
1051
1052         goto out;
1053 }
1054
1055 static void __exit xsegbd_exit(void)
1056 {
1057         xsegbd_sysfs_cleanup();
1058         xsegbd_xseg_quit();
1059 }
1060
1061 module_init(xsegbd_init);
1062 module_exit(xsegbd_exit);
1063