fix xsegbd to use req->priv pointer instead of xseg_{get,set} req data
[archipelago] / xseg / peers / kernel / xsegbd.c
1 /* xsegbd.c
2  *
3  */
4
5 #include <linux/module.h>
6 #include <linux/moduleparam.h>
7 #include <linux/init.h>
8 #include <linux/sched.h>
9 #include <linux/kernel.h>
10 #include <linux/slab.h>
11 #include <linux/fs.h>
12 #include <linux/errno.h>
13 #include <linux/timer.h>
14 #include <linux/types.h>
15 #include <linux/vmalloc.h>
16 #include <linux/genhd.h>
17 #include <linux/blkdev.h>
18 #include <linux/bio.h>
19 #include <linux/device.h>
20 #include <linux/completion.h>
21
22 #include <sys/kernel/segdev.h>
23 #include "xsegbd.h"
24
25 #define XSEGBD_MINORS 1
26 /* define max request size to be used in xsegbd */
27 //FIXME should we make this 4MB instead of 256KB ?
28 #define XSEGBD_MAX_REQUEST_SIZE 262144U
29
30 MODULE_DESCRIPTION("xsegbd");
31 MODULE_AUTHOR("XSEG");
32 MODULE_LICENSE("GPL");
33
34 static long sector_size = 0;
35 static long blksize = 512;
36 static int major = 0;
37 static int max_dev = 1024;
38 static char name[XSEGBD_SEGMENT_NAMELEN] = "xsegbd";
39 static char spec[256] = "segdev:xsegbd:4:1024:12";
40
41 module_param(sector_size, long, 0644);
42 module_param(blksize, long, 0644);
43 module_param(max_dev, int, 0644);
44 module_param(major, int, 0644);
45 module_param_string(name, name, sizeof(name), 0644);
46 module_param_string(spec, spec, sizeof(spec), 0644);
47
48 //static spinlock_t __lock;
49 static struct xsegbd xsegbd;
50 static struct xsegbd_device **xsegbd_devices; /* indexed by portno */
51 static DEFINE_MUTEX(xsegbd_mutex);
52 static DEFINE_SPINLOCK(xsegbd_devices_lock);
53
54
55
56 static struct xsegbd_device *__xsegbd_get_dev(unsigned long id)
57 {
58         struct xsegbd_device *xsegbd_dev = NULL;
59
60         spin_lock(&xsegbd_devices_lock);
61         xsegbd_dev = xsegbd_devices[id];
62         spin_unlock(&xsegbd_devices_lock);
63
64         return xsegbd_dev;
65 }
66
67 /* ************************* */
68 /* ***** sysfs helpers ***** */
69 /* ************************* */
70
71 static struct xsegbd_device *dev_to_xsegbd(struct device *dev)
72 {
73         return container_of(dev, struct xsegbd_device, dev);
74 }
75
76 static struct device *xsegbd_get_dev(struct xsegbd_device *xsegbd_dev)
77 {
78         /* FIXME */
79         return get_device(&xsegbd_dev->dev);
80 }
81
82 static void xsegbd_put_dev(struct xsegbd_device *xsegbd_dev)
83 {
84         put_device(&xsegbd_dev->dev);
85 }
86
87 /* ************************* */
88 /* ** XSEG Initialization ** */
89 /* ************************* */
90
91 static void xseg_callback(struct xseg *xseg, uint32_t portno);
92
93 int xsegbd_xseg_init(void)
94 {
95         int r;
96
97         if (!xsegbd.name[0])
98                 strncpy(xsegbd.name, name, XSEGBD_SEGMENT_NAMELEN);
99
100         r = xseg_initialize();
101         if (r) {
102                 XSEGLOG("cannot initialize 'segdev' peer");
103                 goto err;
104         }
105
106         r = xseg_parse_spec(spec, &xsegbd.config);
107         if (r)
108                 goto err;
109
110         if (strncmp(xsegbd.config.type, "segdev", 16))
111                 XSEGLOG("WARNING: unexpected segment type '%s' vs 'segdev'",
112                          xsegbd.config.type);
113
114         /* leave it here for now */
115         XSEGLOG("joining segment");
116         xsegbd.xseg = xseg_join(        xsegbd.config.type,
117                                         xsegbd.config.name,
118                                         "segdev",
119                                         xseg_callback           );
120         if (!xsegbd.xseg) {
121                 XSEGLOG("cannot find segment");
122                 r = -ENODEV;
123                 goto err;
124         }
125
126         return 0;
127 err:
128         return r;
129
130 }
131
132 int xsegbd_xseg_quit(void)
133 {
134         struct segdev *segdev;
135
136         /* make sure to unmap the segment first */
137         segdev = segdev_get(0);
138         clear_bit(SEGDEV_RESERVED, &segdev->flags);
139         xsegbd.xseg->priv->segment_type.ops.unmap(xsegbd.xseg, xsegbd.xseg->segment_size);
140         segdev_put(segdev);
141
142         return 0;
143 }
144
145
146 /* ***************************** */
147 /* ** Block Device Operations ** */
148 /* ***************************** */
149
150 static int xsegbd_open(struct block_device *bdev, fmode_t mode)
151 {
152         struct gendisk *disk = bdev->bd_disk;
153         struct xsegbd_device *xsegbd_dev = disk->private_data;
154
155         xsegbd_get_dev(xsegbd_dev);
156
157         return 0;
158 }
159
160 static int xsegbd_release(struct gendisk *gd, fmode_t mode)
161 {
162         struct xsegbd_device *xsegbd_dev = gd->private_data;
163
164         xsegbd_put_dev(xsegbd_dev);
165
166         return 0;
167 }
168
169 static int xsegbd_ioctl(struct block_device *bdev, fmode_t mode,
170                         unsigned int cmd, unsigned long arg)
171 {
172         return -ENOTTY;
173 }
174
175 static const struct block_device_operations xsegbd_ops = {
176         .owner          = THIS_MODULE,
177         .open           = xsegbd_open,
178         .release        = xsegbd_release,
179         .ioctl          = xsegbd_ioctl 
180 };
181
182
183 /* *************************** */
184 /* ** Device Initialization ** */
185 /* *************************** */
186
187 static void xseg_request_fn(struct request_queue *rq);
188 static int xsegbd_get_size(struct xsegbd_device *xsegbd_dev);
189
190 static int xsegbd_dev_init(struct xsegbd_device *xsegbd_dev)
191 {
192         int ret = -ENOMEM;
193         struct gendisk *disk;
194         unsigned int max_request_size_bytes;
195
196         spin_lock_init(&xsegbd_dev->rqlock);
197
198         xsegbd_dev->xsegbd = &xsegbd;
199
200         xsegbd_dev->blk_queue = blk_alloc_queue(GFP_KERNEL);
201         if (!xsegbd_dev->blk_queue)
202                 goto out;
203
204         blk_init_allocated_queue(xsegbd_dev->blk_queue, xseg_request_fn, &xsegbd_dev->rqlock);
205         xsegbd_dev->blk_queue->queuedata = xsegbd_dev;
206
207         blk_queue_flush(xsegbd_dev->blk_queue, REQ_FLUSH | REQ_FUA);
208         blk_queue_logical_block_size(xsegbd_dev->blk_queue, 512);
209         blk_queue_physical_block_size(xsegbd_dev->blk_queue, blksize);
210         blk_queue_bounce_limit(xsegbd_dev->blk_queue, BLK_BOUNCE_ANY);
211         
212         //blk_queue_max_segments(dev->blk_queue, 512);
213
214         max_request_size_bytes = XSEGBD_MAX_REQUEST_SIZE;
215         blk_queue_max_hw_sectors(xsegbd_dev->blk_queue, max_request_size_bytes >> 9);
216         blk_queue_max_segment_size(xsegbd_dev->blk_queue, max_request_size_bytes);
217         blk_queue_io_min(xsegbd_dev->blk_queue, max_request_size_bytes);
218         blk_queue_io_opt(xsegbd_dev->blk_queue, max_request_size_bytes);
219
220         queue_flag_set_unlocked(QUEUE_FLAG_NONROT, xsegbd_dev->blk_queue);
221
222         /* vkoukis says we don't need partitions */
223         xsegbd_dev->gd = disk = alloc_disk(1);
224         if (!disk)
225                 /* FIXME: We call xsegbd_dev_release if something goes wrong, to cleanup
226                  * disks/queues/etc.
227                  * Would it be better to do the cleanup here, and conditionally cleanup
228                  * in dev_release?
229                  */
230                 goto out;
231
232         disk->major = xsegbd_dev->major;
233         disk->first_minor = 0; // id * XSEGBD_MINORS;
234         disk->fops = &xsegbd_ops;
235         disk->queue = xsegbd_dev->blk_queue;
236         disk->private_data = xsegbd_dev;
237         disk->flags |= GENHD_FL_SUPPRESS_PARTITION_INFO;
238         snprintf(disk->disk_name, 32, "xsegbd%u", xsegbd_dev->id);
239
240         ret = 0;
241         
242         /* allow a non-zero sector_size parameter to override the disk size */
243         if (sector_size)
244                 xsegbd_dev->sectors = sector_size;
245         else {
246                 ret = xsegbd_get_size(xsegbd_dev);
247                 if (ret)
248                         goto out;
249         }
250
251         set_capacity(disk, xsegbd_dev->sectors);
252         XSEGLOG("xsegbd active...");
253         add_disk(disk); /* immediately activates the device */
254
255         return 0;
256
257 out:
258         return ret;
259 }
260
261 static void xsegbd_dev_release(struct device *dev)
262 {
263         struct xsegbd_device *xsegbd_dev = dev_to_xsegbd(dev);
264
265         /* cleanup gendisk and blk_queue the right way */
266         if (xsegbd_dev->gd) {
267                 if (xsegbd_dev->gd->flags & GENHD_FL_UP)
268                         del_gendisk(xsegbd_dev->gd);
269
270                 blk_cleanup_queue(xsegbd_dev->blk_queue);
271                 put_disk(xsegbd_dev->gd);
272         }
273
274         /* xsegbd actually does not need to use waiting. 
275          * maybe we can use xseg_cancel_wait for clarity
276          * with the xseg_segdev kernel driver to convert 
277          * this to a noop
278          */
279         xseg_cancel_wait(xsegbd_dev->xseg, xsegbd_dev->src_portno);
280
281         if (xseg_free_requests(xsegbd_dev->xseg, 
282                         xsegbd_dev->src_portno, xsegbd_dev->nr_requests) < 0)
283                 XSEGLOG("Error trying to free requests!\n");
284
285
286         unregister_blkdev(xsegbd_dev->major, XSEGBD_NAME);
287
288         spin_lock(&xsegbd_devices_lock);
289         BUG_ON(xsegbd_devices[xsegbd_dev->src_portno] != xsegbd_dev);
290         xsegbd_devices[xsegbd_dev->src_portno] = NULL;
291         spin_unlock(&xsegbd_devices_lock);
292
293         if (xsegbd_dev->blk_req_pending)
294                 kfree(xsegbd_dev->blk_req_pending);
295         xq_free(&xsegbd_dev->blk_queue_pending);
296
297         kfree(xsegbd_dev);
298
299         module_put(THIS_MODULE);
300 }
301
302 /* ******************* */
303 /* ** Critical Path ** */
304 /* ******************* */
305
306 static void blk_to_xseg(struct xseg *xseg, struct xseg_request *xreq,
307                         struct request *blkreq)
308 {
309         struct bio_vec *bvec;
310         struct req_iterator iter;
311         uint64_t off = 0;
312         char *data = xseg_get_data(xseg, xreq);
313         rq_for_each_segment(bvec, blkreq, iter) {
314                 char *bdata = kmap_atomic(bvec->bv_page) + bvec->bv_offset;
315                 memcpy(data + off, bdata, bvec->bv_len);
316                 off += bvec->bv_len;
317                 kunmap_atomic(bdata);
318         }
319 }
320
321 static void xseg_to_blk(struct xseg *xseg, struct xseg_request *xreq,
322                         struct request *blkreq)
323 {
324         struct bio_vec *bvec;
325         struct req_iterator iter;
326         uint64_t off = 0;
327         char *data = xseg_get_data(xseg, xreq);
328         rq_for_each_segment(bvec, blkreq, iter) {
329                 char *bdata = kmap_atomic(bvec->bv_page) + bvec->bv_offset;
330                 memcpy(bdata, data + off, bvec->bv_len);
331                 off += bvec->bv_len;
332                 kunmap_atomic(bdata);
333         }
334 }
335
336 static void xseg_request_fn(struct request_queue *rq)
337 {
338         struct xseg_request *xreq;
339         struct xsegbd_device *xsegbd_dev = rq->queuedata;
340         struct request *blkreq;
341         struct xsegbd_pending *pending;
342         xqindex blkreq_idx;
343         char *target;
344         uint64_t datalen;
345         xport p;
346         int r;
347
348         for (;;) {
349                 blkreq_idx = Noneidx;
350                 xreq = xseg_get_request(xsegbd_dev->xseg, xsegbd_dev->src_portno, 
351                                 xsegbd_dev->dst_portno, X_ALLOC);
352                 if (!xreq)
353                         break;
354
355                 blkreq_idx = xq_pop_head(&xsegbd_dev->blk_queue_pending, 
356                                                 xsegbd_dev->src_portno);
357                 if (blkreq_idx == Noneidx)
358                         break;
359
360                 blkreq = blk_fetch_request(rq);
361                 if (!blkreq)
362                         break;
363
364                 if (blkreq->cmd_type != REQ_TYPE_FS) {
365                         //we lose xreq here
366                         XSEGLOG("non-fs cmd_type: %u. *shrug*", blkreq->cmd_type);
367                         __blk_end_request_all(blkreq, 0);
368                         continue;
369                 }
370
371                 datalen = blk_rq_bytes(blkreq);
372                 r = xseg_prep_request(xsegbd_dev->xseg, xreq, 
373                                         xsegbd_dev->targetlen, datalen);
374                 if (r < 0) {
375                         XSEGLOG("couldn't prep request");
376                         __blk_end_request_err(blkreq, r);
377                         BUG_ON(1);
378                         break;
379                 }
380                 if (xreq->bufferlen - xsegbd_dev->targetlen < datalen){
381                         XSEGLOG("malformed req buffers");
382                         __blk_end_request_err(blkreq, r);
383                         BUG_ON(1);
384                         break;
385                 }
386
387                 target = xseg_get_target(xsegbd_dev->xseg, xreq);
388                 strncpy(target, xsegbd_dev->target, xsegbd_dev->targetlen);
389                 if (blkreq_idx >= xsegbd_dev->nr_requests) {
390                         XSEGLOG("blkreq_idx >= xsegbd_dev->nr_requests");
391                         BUG_ON(1);
392                         __blk_end_request_err(blkreq, -1);
393                         break;
394                 }
395                 pending = &xsegbd_dev->blk_req_pending[blkreq_idx];
396                 pending->dev = xsegbd_dev;
397                 pending->request = blkreq;
398                 pending->comp = NULL;
399                 xreq->size = datalen;
400                 xreq->offset = blk_rq_pos(blkreq) << 9;
401                 /*
402                 if (xreq->offset >= (sector_size << 9))
403                         XSEGLOG("sector offset: %lu > %lu, flush:%u, fua:%u",
404                                  blk_rq_pos(blkreq), sector_size,
405                                  blkreq->cmd_flags & REQ_FLUSH,
406                                  blkreq->cmd_flags & REQ_FUA);
407                 */
408
409                 if (blkreq->cmd_flags & REQ_FLUSH)
410                         xreq->flags |= XF_FLUSH;
411
412                 if (blkreq->cmd_flags & REQ_FUA)
413                         xreq->flags |= XF_FUA;
414
415                 //XSEGLOG("xreq: %lx size: %llu offset: %llu, blkreq_idx: %llu", 
416                 //              xreq, xreq->size, xreq->offset, blkreq_idx);
417
418                 if (rq_data_dir(blkreq)) {
419                         /* unlock for data transfers? */
420                         blk_to_xseg(xsegbd_dev->xseg, xreq, blkreq);
421                         //XSEGLOG("xreq: %lx size: %llu offset: %llu, blkreq_idx: %llu completed blk_to_xseg", 
422                         //      xreq, xreq->size, xreq->offset, blkreq_idx);
423                         xreq->op = X_WRITE;
424                 } else {
425                         xreq->op = X_READ;
426                 }
427
428                 //maybe put this in loop start, and on break, 
429                 //just do xseg_get_req_data
430                 //r = xseg_set_req_data(xsegbd_dev->xseg, xreq, (void *) blkreq_idx);
431                 //BUG_ON(r < 0);
432                 xreq->priv = (void *) blkreq_idx;
433                 //XSEGLOG("xreq: %lx size: %llu offset: %llu, blkreq_idx: %llu set req data", 
434                 //              xreq, xreq->size, xreq->offset, blkreq_idx);
435
436                 p = xseg_submit(xsegbd_dev->xseg, xreq, 
437                                         xsegbd_dev->src_portno, X_ALLOC);
438                 if (p == NoPort) {
439                         //no unsetting req data;
440                         XSEGLOG("coundn't submit req");
441                         BUG_ON(1);
442                         __blk_end_request_err(blkreq, -1);
443                         break;
444                 }
445                 //XSEGLOG("xreq: %lx size: %llu offset: %llu, blkreq_idx: %llu submitted", 
446                 //              xreq, xreq->size, xreq->offset, blkreq_idx);
447                 WARN_ON(xseg_signal(xsegbd_dev->xsegbd->xseg, p) < 0);
448         }
449         if (xreq)
450                 BUG_ON(xseg_put_request(xsegbd_dev->xsegbd->xseg, xreq, 
451                                         xsegbd_dev->src_portno) == -1);
452         if (blkreq_idx != Noneidx)
453                 BUG_ON(xq_append_head(&xsegbd_dev->blk_queue_pending, 
454                                         blkreq_idx, xsegbd_dev->src_portno) == Noneidx);
455 }
456
457 int update_dev_sectors_from_request(    struct xsegbd_device *xsegbd_dev,
458                                         struct xseg_request *xreq       )
459 {
460         void *data;
461
462         if (xreq->state & XS_FAILED)
463                 return -ENOENT;
464
465         if (!(xreq->state & XS_SERVED))
466                 return -EIO;
467
468         data = xseg_get_data(xsegbd_dev->xseg, xreq);
469         xsegbd_dev->sectors = *((uint64_t *) data) / 512ULL;
470         return 0;
471 }
472
473 static int xsegbd_get_size(struct xsegbd_device *xsegbd_dev)
474 {
475         struct xseg_request *xreq;
476         char *target;
477         uint64_t datalen;
478         xqindex blkreq_idx;
479         struct xsegbd_pending *pending;
480         struct completion comp;
481         xport p;
482         void *data;
483         int ret = -EBUSY, r;
484         xreq = xseg_get_request(xsegbd_dev->xseg, xsegbd_dev->src_portno,
485                         xsegbd_dev->dst_portno, X_ALLOC);
486         if (!xreq)
487                 goto out;
488
489         datalen = sizeof(uint64_t);
490         BUG_ON(xseg_prep_request(xsegbd_dev->xseg, xreq, xsegbd_dev->targetlen, datalen));
491         BUG_ON(xreq->bufferlen - xsegbd_dev->targetlen < datalen);
492
493         init_completion(&comp);
494         blkreq_idx = xq_pop_head(&xsegbd_dev->blk_queue_pending, 1);
495         if (blkreq_idx == Noneidx)
496                 goto out;
497         
498         pending = &xsegbd_dev->blk_req_pending[blkreq_idx];
499         pending->dev = xsegbd_dev;
500         pending->request = NULL;
501         pending->comp = &comp;
502
503         
504 //      r = xseg_set_req_data(xsegbd_dev->xseg, xreq, (void *) blkreq_idx);
505 //      if (r < 0)
506 //              goto out_queue;
507         xreq->priv = (void *) blkreq_idx;
508         //XSEGLOG("for req: %lx, set data %llu (lx: %lx)", xreq, blkreq_idx, (void *) blkreq_idx);
509
510         target = xseg_get_target(xsegbd_dev->xseg, xreq);
511         strncpy(target, xsegbd_dev->target, xsegbd_dev->targetlen);
512         xreq->size = datalen;
513         xreq->offset = 0;
514
515         xreq->op = X_INFO;
516
517         /* waiting is not needed.
518          * but it should be better to use xseg_prepare_wait
519          * and the xseg_segdev kernel driver, would be a no op
520          */
521
522         xseg_prepare_wait(xsegbd_dev->xseg, xsegbd_dev->src_portno);
523         p = xseg_submit(xsegbd_dev->xseg, xreq, 
524                                 xsegbd_dev->src_portno, X_ALLOC);
525         BUG_ON(p == NoPort);
526         if ( p == NoPort) {
527                 //goto out_data;
528                 goto out_queue;
529         }
530         WARN_ON(xseg_signal(xsegbd_dev->xseg, p) < 0);
531
532         wait_for_completion_interruptible(&comp);
533         //XSEGLOG("Woken up after wait_for_completion_interruptible()\n");
534         ret = update_dev_sectors_from_request(xsegbd_dev, xreq);
535         //XSEGLOG("get_size: sectors = %ld\n", (long)xsegbd_dev->sectors);
536 out:
537         BUG_ON(xseg_put_request(xsegbd_dev->xseg, xreq, xsegbd_dev->src_portno) < 0);
538         return ret;
539
540 //out_data:
541 //      r = xseg_get_req_data(xsegbd_dev->xseg, xreq, &data);
542 out_queue:
543         xq_append_head(&xsegbd_dev->blk_queue_pending, blkreq_idx, 1);
544         
545         goto out;
546 }
547
548 static void xseg_callback(struct xseg *xseg, xport portno)
549 {
550         struct xsegbd_device *xsegbd_dev;
551         struct xseg_request *xreq;
552         struct request *blkreq;
553         struct xsegbd_pending *pending;
554         unsigned long flags;
555         xqindex blkreq_idx, ridx;
556         int err;
557         void *data;
558
559         xsegbd_dev  = __xsegbd_get_dev(portno);
560         if (!xsegbd_dev) {
561                 WARN_ON(1);
562                 return;
563         }
564
565         for (;;) {
566                 xseg_prepare_wait(xsegbd_dev->xseg, xsegbd_dev->src_portno);
567                 xreq = xseg_receive(xsegbd_dev->xseg, portno);
568                 if (!xreq)
569                         break;
570
571                 xseg_cancel_wait(xsegbd_dev->xseg, xsegbd_dev->src_portno);
572
573 //              err = xseg_get_req_data(xsegbd_dev->xseg, xreq, &data); 
574                 //XSEGLOG("for req: %lx, got data %llu (lx %lx)", xreq, (xqindex) data, data);
575 //              if (err < 0) {
576 //                      WARN_ON(1);
577                         //maybe put request?
578 //                      continue;
579 //              }
580                 
581                 blkreq_idx = (xqindex) xreq->priv;
582                 if (blkreq_idx >= xsegbd_dev->nr_requests) {
583                         WARN_ON(1);
584                         //maybe put request?
585                         continue;
586                 }
587
588                 pending = &xsegbd_dev->blk_req_pending[blkreq_idx];
589                 if (pending->comp) {
590                         /* someone is blocking on this request
591                            and will handle it when we wake them up. */
592                         complete(pending->comp);
593                         /* the request is blocker's responsibility so
594                            we will not put_request(); */
595                         continue;
596                 }
597
598                 /* this is now treated as a block I/O request to end */
599                 blkreq = pending->request;
600                 pending->request = NULL;
601                 //xsegbd_dev = pending->dev;
602                 if (xsegbd_dev != pending->dev) {
603                         XSEGLOG("xsegbd_dev != pending->dev");
604                         BUG_ON(1);
605                         continue;
606                 }
607                 pending->dev = NULL;
608                 if (!blkreq){
609                         //FIXME
610                         XSEGLOG("blkreq does not exist");
611                         BUG_ON(1);
612                         continue;
613                 }
614
615                 err = -1;
616                 if (!(xreq->state & XS_SERVED))
617                         goto blk_end;
618
619                 if (xreq->serviced != blk_rq_bytes(blkreq))
620                         goto blk_end;
621
622                 /* unlock for data transfer? */
623                 if (!rq_data_dir(blkreq)){
624                         xseg_to_blk(xsegbd_dev->xseg, xreq, blkreq);
625                         //XSEGLOG("for req: %lx, completed xseg_to_blk", xreq);
626                 }       
627
628                 err = 0;
629 blk_end:
630                 blk_end_request_all(blkreq, err);
631                 //XSEGLOG("for req: %lx, completed", xreq);
632                 ridx = xq_append_head(&xsegbd_dev->blk_queue_pending, blkreq_idx, 1);
633                 if (ridx == Noneidx) {
634                         XSEGLOG("couldnt append blkreq_idx");
635                         WARN_ON(1);
636                 }
637
638                 err = xseg_put_request(xsegbd_dev->xseg, xreq, xsegbd_dev->src_portno);
639                 if (err < 0) {
640                         XSEGLOG("couldn't put req");
641                         BUG_ON(1);
642                 }
643         }
644
645         if (xsegbd_dev) {
646                 spin_lock_irqsave(&xsegbd_dev->rqlock, flags);
647                 xseg_request_fn(xsegbd_dev->blk_queue);
648                 spin_unlock_irqrestore(&xsegbd_dev->rqlock, flags);
649         }
650 }
651
652
653 /* sysfs interface */
654
655 static struct bus_type xsegbd_bus_type = {
656         .name   = "xsegbd",
657 };
658
659 static ssize_t xsegbd_size_show(struct device *dev,
660                                         struct device_attribute *attr, char *buf)
661 {
662         struct xsegbd_device *xsegbd_dev = dev_to_xsegbd(dev);
663
664         return sprintf(buf, "%llu\n", (unsigned long long) xsegbd_dev->sectors * 512ULL);
665 }
666
667 static ssize_t xsegbd_major_show(struct device *dev,
668                                         struct device_attribute *attr, char *buf)
669 {
670         struct xsegbd_device *xsegbd_dev = dev_to_xsegbd(dev);
671
672         return sprintf(buf, "%d\n", xsegbd_dev->major);
673 }
674
675 static ssize_t xsegbd_srcport_show(struct device *dev,
676                                         struct device_attribute *attr, char *buf)
677 {
678         struct xsegbd_device *xsegbd_dev = dev_to_xsegbd(dev);
679
680         return sprintf(buf, "%u\n", (unsigned) xsegbd_dev->src_portno);
681 }
682
683 static ssize_t xsegbd_dstport_show(struct device *dev,
684                                         struct device_attribute *attr, char *buf)
685 {
686         struct xsegbd_device *xsegbd_dev = dev_to_xsegbd(dev);
687
688         return sprintf(buf, "%u\n", (unsigned) xsegbd_dev->dst_portno);
689 }
690
691 static ssize_t xsegbd_id_show(struct device *dev,
692                                         struct device_attribute *attr, char *buf)
693 {
694         struct xsegbd_device *xsegbd_dev = dev_to_xsegbd(dev);
695
696         return sprintf(buf, "%u\n", (unsigned) xsegbd_dev->id);
697 }
698
699 static ssize_t xsegbd_reqs_show(struct device *dev,
700                                         struct device_attribute *attr, char *buf)
701 {
702         struct xsegbd_device *xsegbd_dev = dev_to_xsegbd(dev);
703
704         return sprintf(buf, "%u\n", (unsigned) xsegbd_dev->nr_requests);
705 }
706
707 static ssize_t xsegbd_target_show(struct device *dev,
708                                         struct device_attribute *attr, char *buf)
709 {
710         struct xsegbd_device *xsegbd_dev = dev_to_xsegbd(dev);
711
712         return sprintf(buf, "%s\n", xsegbd_dev->target);
713 }
714
715 static ssize_t xsegbd_image_refresh(struct device *dev,
716                                         struct device_attribute *attr,
717                                         const char *buf,
718                                         size_t size)
719 {
720         struct xsegbd_device *xsegbd_dev = dev_to_xsegbd(dev);
721         int rc, ret = size;
722
723         mutex_lock_nested(&xsegbd_mutex, SINGLE_DEPTH_NESTING);
724
725         rc = xsegbd_get_size(xsegbd_dev);
726         if (rc < 0) {
727                 ret = rc;
728                 goto out;
729         }
730
731         set_capacity(xsegbd_dev->gd, xsegbd_dev->sectors);
732
733 out:
734         mutex_unlock(&xsegbd_mutex);
735         return ret;
736 }
737
738 static DEVICE_ATTR(size, S_IRUGO, xsegbd_size_show, NULL);
739 static DEVICE_ATTR(major, S_IRUGO, xsegbd_major_show, NULL);
740 static DEVICE_ATTR(srcport, S_IRUGO, xsegbd_srcport_show, NULL);
741 static DEVICE_ATTR(dstport, S_IRUGO, xsegbd_dstport_show, NULL);
742 static DEVICE_ATTR(id , S_IRUGO, xsegbd_id_show, NULL);
743 static DEVICE_ATTR(reqs , S_IRUGO, xsegbd_reqs_show, NULL);
744 static DEVICE_ATTR(target, S_IRUGO, xsegbd_target_show, NULL);
745 static DEVICE_ATTR(refresh , S_IWUSR, NULL, xsegbd_image_refresh);
746
747 static struct attribute *xsegbd_attrs[] = {
748         &dev_attr_size.attr,
749         &dev_attr_major.attr,
750         &dev_attr_srcport.attr,
751         &dev_attr_dstport.attr,
752         &dev_attr_id.attr,
753         &dev_attr_reqs.attr,
754         &dev_attr_target.attr,
755         &dev_attr_refresh.attr,
756         NULL
757 };
758
759 static struct attribute_group xsegbd_attr_group = {
760         .attrs = xsegbd_attrs,
761 };
762
763 static const struct attribute_group *xsegbd_attr_groups[] = {
764         &xsegbd_attr_group,
765         NULL
766 };
767
768 static void xsegbd_sysfs_dev_release(struct device *dev)
769 {
770 }
771
772 static struct device_type xsegbd_device_type = {
773         .name           = "xsegbd",
774         .groups         = xsegbd_attr_groups,
775         .release        = xsegbd_sysfs_dev_release,
776 };
777
778 static void xsegbd_root_dev_release(struct device *dev)
779 {
780 }
781
782 static struct device xsegbd_root_dev = {
783         .init_name      = "xsegbd",
784         .release        = xsegbd_root_dev_release,
785 };
786
787 static int xsegbd_bus_add_dev(struct xsegbd_device *xsegbd_dev)
788 {
789         int ret = -ENOMEM;
790         struct device *dev;
791
792         mutex_lock_nested(&xsegbd_mutex, SINGLE_DEPTH_NESTING);
793         dev = &xsegbd_dev->dev;
794
795         dev->bus = &xsegbd_bus_type;
796         dev->type = &xsegbd_device_type;
797         dev->parent = &xsegbd_root_dev;
798         dev->release = xsegbd_dev_release;
799         dev_set_name(dev, "%d", xsegbd_dev->id);
800
801         ret = device_register(dev);
802
803         mutex_unlock(&xsegbd_mutex);
804         return ret;
805 }
806
807 static void xsegbd_bus_del_dev(struct xsegbd_device *xsegbd_dev)
808 {
809         device_unregister(&xsegbd_dev->dev);
810 }
811
812 static ssize_t xsegbd_add(struct bus_type *bus, const char *buf, size_t count)
813 {
814         struct xsegbd_device *xsegbd_dev;
815         struct xseg_port *port;
816         ssize_t ret = -ENOMEM;
817
818         if (!try_module_get(THIS_MODULE))
819                 return -ENODEV;
820
821         xsegbd_dev = kzalloc(sizeof(*xsegbd_dev), GFP_KERNEL);
822         if (!xsegbd_dev)
823                 goto out;
824
825         spin_lock_init(&xsegbd_dev->rqlock);
826         INIT_LIST_HEAD(&xsegbd_dev->node);
827
828         /* parse cmd */
829         if (sscanf(buf, "%" __stringify(XSEGBD_TARGET_NAMELEN) "s "
830                         "%d:%d:%d", xsegbd_dev->target, &xsegbd_dev->src_portno,
831                         &xsegbd_dev->dst_portno, &xsegbd_dev->nr_requests) < 3) {
832                 ret = -EINVAL;
833                 goto out_dev;
834         }
835         xsegbd_dev->targetlen = strlen(xsegbd_dev->target);
836
837         spin_lock(&xsegbd_devices_lock);
838         if (xsegbd_devices[xsegbd_dev->src_portno] != NULL) {
839                 ret = -EINVAL;
840                 goto out_unlock;
841         }
842         xsegbd_devices[xsegbd_dev->src_portno] = xsegbd_dev;
843         xsegbd_dev->id = xsegbd_dev->src_portno;
844         spin_unlock(&xsegbd_devices_lock);
845
846         XSEGLOG("registering block device major %d", major);
847         ret = register_blkdev(major, XSEGBD_NAME);
848         if (ret < 0) {
849                 XSEGLOG("cannot register block device!");
850                 ret = -EBUSY;
851                 goto out_delentry;
852         }
853         xsegbd_dev->major = ret;
854         XSEGLOG("registered block device major %d", xsegbd_dev->major);
855
856         ret = xsegbd_bus_add_dev(xsegbd_dev);
857         if (ret)
858                 goto out_blkdev;
859
860         if (!xq_alloc_seq(&xsegbd_dev->blk_queue_pending, 
861                                 xsegbd_dev->nr_requests,
862                                 xsegbd_dev->nr_requests))
863                 goto out_bus;
864
865         xsegbd_dev->blk_req_pending = kzalloc(
866                         xsegbd_dev->nr_requests *sizeof(struct xsegbd_pending),
867                                    GFP_KERNEL);
868         if (!xsegbd_dev->blk_req_pending)
869                 goto out_freeq;
870
871         
872         XSEGLOG("joining segment");
873         //FIXME use xsebd module config for now
874         xsegbd_dev->xseg = xseg_join(   xsegbd.config.type,
875                                         xsegbd.config.name,
876                                         "segdev",
877                                         xseg_callback           );
878         if (!xsegbd_dev->xseg)
879                 goto out_freepending;
880         
881
882         XSEGLOG("binding to source port %u (destination %u)",
883                         xsegbd_dev->src_portno, xsegbd_dev->dst_portno);
884         port = xseg_bind_port(xsegbd_dev->xseg, xsegbd_dev->src_portno);
885         if (!port) {
886                 XSEGLOG("cannot bind to port");
887                 ret = -EFAULT;
888
889                 goto out_xseg;
890         }
891         //FIXME rollback here
892         BUG_ON(xsegbd_dev->src_portno != xseg_portno(xsegbd_dev->xseg, port));
893         
894         /* make sure we don't get any requests until we're ready to handle them */
895         xseg_cancel_wait(xsegbd_dev->xseg, xseg_portno(xsegbd_dev->xseg, port));
896
897         ret = xsegbd_dev_init(xsegbd_dev);
898         if (ret)
899                 goto out_xseg;
900
901         xseg_prepare_wait(xsegbd_dev->xseg, xseg_portno(xsegbd_dev->xseg, port));
902         return count;
903
904 out_xseg:
905         xseg_leave(xsegbd_dev->xseg);
906         
907 out_freepending:
908         kfree(xsegbd_dev->blk_req_pending);
909
910 out_freeq:
911         xq_free(&xsegbd_dev->blk_queue_pending);
912
913 out_bus:
914         xsegbd_bus_del_dev(xsegbd_dev);
915
916         return ret;
917
918 out_blkdev:
919         unregister_blkdev(xsegbd_dev->major, XSEGBD_NAME);
920
921 out_delentry:
922         spin_lock(&xsegbd_devices_lock);
923         xsegbd_devices[xsegbd_dev->src_portno] = NULL;
924
925 out_unlock:
926         spin_unlock(&xsegbd_devices_lock);
927
928 out_dev:
929         kfree(xsegbd_dev);
930
931 out:
932         return ret;
933 }
934
935 static ssize_t xsegbd_remove(struct bus_type *bus, const char *buf, size_t count)
936 {
937         struct xsegbd_device *xsegbd_dev = NULL;
938         int id, ret;
939         unsigned long ul_id;
940
941         ret = strict_strtoul(buf, 10, &ul_id);
942         if (ret)
943                 return ret;
944
945         id = (int) ul_id;
946         if (id != ul_id)
947                 return -EINVAL;
948
949         mutex_lock_nested(&xsegbd_mutex, SINGLE_DEPTH_NESTING);
950
951         ret = count;
952         xsegbd_dev = __xsegbd_get_dev(id);
953         if (!xsegbd_dev) {
954                 ret = -ENOENT;
955                 goto out_unlock;
956         }
957         xsegbd_bus_del_dev(xsegbd_dev);
958
959 out_unlock:
960         mutex_unlock(&xsegbd_mutex);
961         return ret;
962 }
963
964 static struct bus_attribute xsegbd_bus_attrs[] = {
965         __ATTR(add, S_IWUSR, NULL, xsegbd_add),
966         __ATTR(remove, S_IWUSR, NULL, xsegbd_remove),
967         __ATTR_NULL
968 };
969
970 static int xsegbd_sysfs_init(void)
971 {
972         int ret;
973
974         ret = device_register(&xsegbd_root_dev);
975         if (ret < 0)
976                 return ret;
977
978         xsegbd_bus_type.bus_attrs = xsegbd_bus_attrs;
979         ret = bus_register(&xsegbd_bus_type);
980         if (ret < 0)
981                 device_unregister(&xsegbd_root_dev);
982
983         return ret;
984 }
985
986 static void xsegbd_sysfs_cleanup(void)
987 {
988         bus_unregister(&xsegbd_bus_type);
989         device_unregister(&xsegbd_root_dev);
990 }
991
992 /* *************************** */
993 /* ** Module Initialization ** */
994 /* *************************** */
995
996 static int __init xsegbd_init(void)
997 {
998         int ret = -ENOMEM;
999         xsegbd_devices = kzalloc(max_dev * sizeof(struct xsegbd_devices *), GFP_KERNEL);
1000         if (!xsegbd_devices)
1001                 goto out;
1002
1003         spin_lock_init(&xsegbd_devices_lock);
1004
1005         ret = -ENOSYS;
1006         ret = xsegbd_xseg_init();
1007         if (ret)
1008                 goto out_free;
1009
1010         ret = xsegbd_sysfs_init();
1011         if (ret)
1012                 goto out_xseg;
1013
1014         XSEGLOG("initialization complete");
1015
1016 out:
1017         return ret;
1018
1019 out_xseg:
1020         xsegbd_xseg_quit();
1021         
1022 out_free:
1023         kfree(xsegbd_devices);
1024
1025         goto out;
1026 }
1027
1028 static void __exit xsegbd_exit(void)
1029 {
1030         xsegbd_sysfs_cleanup();
1031         xsegbd_xseg_quit();
1032 }
1033
1034 module_init(xsegbd_init);
1035 module_exit(xsegbd_exit);
1036