Merge wip-sysfs into xseg, with amendments
authorgtsouk <gtsouk@cslab.ece.ntua.gr>
Wed, 4 Jan 2012 19:18:14 +0000 (21:18 +0200)
committergtsouk <gtsouk@cslab.ece.ntua.gr>
Wed, 4 Jan 2012 19:18:14 +0000 (21:18 +0200)
Conflicts:
xseg/sys/xsegbd.c

Amendments:
struct completion is now per-request, not per-device.
The xsegbd callback (heuristically) knows when
xreq->priv is an index of an I/O request or a pointer
to a completion struct, and branches out accordingly.

The request is handled by the blocker,
and now, xsegbd_get_size() can cause initialization to fail.

Untested.

1  2 
xseg/sys/xsegbd.c

@@@ -17,6 -17,6 +17,7 @@@
  #include <linux/blkdev.h>
  #include <linux/bio.h>
  #include <linux/device.h>
++#include <linux/completion.h>
  
  #include "xsegdev.h"
  #include "xsegbd.h"
@@@ -428,59 -428,50 +429,9 @@@ static const struct block_device_operat
  /* *************************** */
  
  static void xseg_request_fn(struct request_queue *rq);
 -
 -static int xsegbd_get_size(struct xsegbd_device *xsegbd_dev)
 -{
 -      struct xseg_request *xreq;
 -      struct xseg_port *port;
 -      char *name;
 -      uint64_t datasize;
 -      struct completion comp;
 -
 -      xreq = xseg_get_request(xsegbd.xseg, xsegbd_dev->src_portno);
 -      if (!xreq)
 -              return -EINVAL;
 -
 -      datasize = sizeof(loff_t);
 -      BUG_ON(xreq->buffersize - xsegbd.namesize < datasize);
 -      BUG_ON(xseg_prep_request(xreq, xsegbd.namesize, datasize));
 -
 -      name = XSEG_TAKE_PTR(xreq->name, xsegbd.xseg->segment);
 -      strncpy(name, xsegbd.name, xsegbd.namesize);
 -      xreq->size = datasize;
 -      xreq->offset = 0;
 -
 -      xreq->op = X_INFO;
 -
 -      port = &xsegbd.xseg->ports[xsegbd_dev->src_portno];
 -      port->waitcue = (long) xsegbd_dev;
 -
 -      init_completion(&comp);
 -      xsegbd_dev->comp = &comp;
 -
 -      BUG_ON(xseg_submit(xsegbd.xseg, xsegbd_dev->dst_portno, xreq) == NoSerial);
 -
 -      xseg_signal(xsegbd.xseg, xsegbd_dev->dst_portno);
 -
 -      xseg_put_request(xsegbd.xseg, xsegbd_dev->src_portno, xreq);
 -      xseg_free_requests(xsegbd.xseg, xsegbd_dev->src_portno, 1);
 -
 -      wait_for_completion_interruptible(&comp);
 -
 -      return 0;
 -}
 -
++static int xsegbd_get_size(struct xsegbd_device *xsegbd_dev);
  
- static loff_t xsegbd_get_size(struct xsegbd_device *xsegbd_dev)
- {
-       struct xseg_request *xreq;
-       char *name, *data;
-       uint64_t datasize, counter = 0;
-       loff_t size;
-       xreq = xseg_get_request(xsegbd.xseg, xsegbd_dev->src_portno);
-       if (!xreq)
-               return -EINVAL;
-       datasize = sizeof(loff_t);
-       BUG_ON(xreq->buffersize - xsegbd.namesize < datasize);
-       BUG_ON(xseg_prep_request(xreq, xsegbd.namesize, datasize));
-       name = XSEG_TAKE_PTR(xreq->name, xsegbd.xseg->segment);
-       strncpy(name, xsegbd.name, xsegbd.namesize);
-       xreq->size = datasize;
-       xreq->offset = 0;
-       xreq->op = X_INFO;
-       BUG_ON(xseg_submit(xsegbd.xseg, xsegbd_dev->dst_portno, xreq) == NoSerial);
-       xseg_signal(xsegbd.xseg, xsegbd_dev->dst_portno);
-       /* callback_fn doesn't handle X_INFO reqs atm, and more importantly we
-        * cannot use an async operation to learn the disk size. Currently, this
-        * behaves like a busy-wait loop and makes insmod block until a peer
-        * responds to our X_INFO req. This will change when the sysfs interface is
-        * implemented, to handle disk operations.
-        */
-       while (!(xreq = xseg_receive(xsegbd.xseg, xsegbd_dev->src_portno))) {
-               counter ++;
-               if (counter > 10000000)
-                       return 0;
-       }
-       while (!(xreq->state & XS_SERVED)) ;
-       data = XSEG_TAKE_PTR(xreq->data, xsegbd.xseg->segment);
-       /* TODO: make sure we use consistent types accross peers */
-       size = *((off_t *) data);
-       if (xreq)
-               xseg_put_request(xsegbd.xseg, xsegbd_dev->src_portno, xreq);
-       return size;
- }
- static int xsegbd_dev_init(struct xsegbd_device *xsegbd_dev, sector_t size)
+ static int xsegbd_dev_init(struct xsegbd_device *xsegbd_dev)
  {
        int ret = -ENOMEM;
        struct gendisk *disk;
        blk_queue_logical_block_size(xsegbd_dev->blk_queue, 512);
        blk_queue_physical_block_size(xsegbd_dev->blk_queue, blksize);
        blk_queue_bounce_limit(xsegbd_dev->blk_queue, BLK_BOUNCE_ANY);
 -      /* we can handle any number of segments, BUT
 -       * parts of the request may be available far sooner than others
 -       * but we cannot complete them (unless we handle their bios directly).
 +      
 +      //blk_queue_max_segments(dev->blk_queue, 512);
 +      /* calculate maximum block request size
 +       * request size in pages * page_size
 +       * leave one page in buffer for name
         */
 -      blk_queue_max_segments(xsegbd_dev->blk_queue, 1);
 +      max_request_size_bytes =
 +               (unsigned int) (xsegbd.config.request_size - 1) *
 +                              ( 1 << xsegbd.config.page_shift) ;
 +      blk_queue_max_hw_sectors(xsegbd_dev->blk_queue, max_request_size_bytes >> 9);
 +      blk_queue_max_segment_size(xsegbd_dev->blk_queue, max_request_size_bytes);
 +      blk_queue_io_min(xsegbd_dev->blk_queue, max_request_size_bytes);
 +      blk_queue_io_opt(xsegbd_dev->blk_queue, max_request_size_bytes);
 +
        queue_flag_set_unlocked(QUEUE_FLAG_NONROT, xsegbd_dev->blk_queue);
  
-       XSEGLOG("TWO");
        /* vkoukis says we don't need partitions */
        xsegbd_dev->gd = disk = alloc_disk(1);
        if (!disk)
        if (!xsegbd_dev->blk_req_pending)
                goto out_free_pending;
  
-       XSEGLOG("FOUR");
        /* allow a non-zero sector_size parameter to override the disk size */
-       xsegbd_dev->sectors = sector_size ? sector_size : xsegbd_get_size(xsegbd_dev) / 512ULL;
-       set_capacity(disk, xsegbd_dev->sectors);
+       if (sector_size)
+               xsegbd_dev->sectors = sector_size;
 -      else
 -              xsegbd_get_size(xsegbd_dev);
++      else {
++              ret = xsegbd_get_size(xsegbd_dev);
++              if (ret)
++                      goto out_free_pending;
++      }
  
+       set_capacity(disk, xsegbd_dev->sectors);
 -
 +      XSEGLOG("xsegbd active...");
        add_disk(disk); /* immediately activates the device */
--
--      return 0;
--
++      ret = 0;
  out:
        return ret;
  
@@@ -645,9 -624,9 +597,9 @@@ static void xseg_request_fn(struct requ
                strncpy(name, xsegbd.name, xsegbd.namesize);
                blkreq_idx = xq_pop_head(&xsegbd_dev->blk_queue_pending);
                BUG_ON(blkreq_idx == None);
-               /* WARN_ON(dev->blk_req_pending[blkreq_idx] */
 -              /* WARN_ON(xsegbd_dev->blk_req_pending[blkreq_idx] */
++              /* WARN_ON(xsebd_dev->blk_req_pending[blkreq_idx] */
                xsegbd_dev->blk_req_pending[blkreq_idx] = blkreq;
 -              xreq->priv = (void *)(unsigned long)blkreq_idx;
 +              xreq->priv = (uint64_t)blkreq_idx;
                xreq->size = datasize;
                xreq->offset = blk_rq_pos(blkreq) << 9;
                /*
  
                BUG_ON(xseg_submit(xsegbd.xseg, xsegbd_dev->dst_portno, xreq) == NoSerial);
        }
 -
 +      //This is going to happen at least once.
 +      //TODO find out why it happens more than once.
 +      WARN_ON(xseg_signal(xsegbd_dev->xsegbd->xseg, xsegbd_dev->dst_portno));
        if (xreq)
 -              xseg_put_request(xsegbd.xseg, xsegbd_dev->src_portno, xreq);
 +              xseg_put_request(xsegbd_dev->xsegbd->xseg, xsegbd_dev->src_portno, xreq);
 +}
 +
++int update_dev_sectors_from_request(  struct xsegbd_device *xsegbd_dev,
++                                      struct xseg_request *xreq       )
++{
++      void *data;
++      if (!(xreq->state & XS_SERVED))
++              return -EIO;
++
++      data = XSEG_TAKE_PTR(xreq->data, xsegbd.xseg->segment);
++      xsegbd_dev->sectors = *((uint64_t *) data) / 512ULL;
++      return 0;
++}
++
++static int xsegbd_get_size(struct xsegbd_device *xsegbd_dev)
++{
++      struct xseg_request *xreq;
++      struct xseg_port *port;
++      char *name;
++      uint64_t datasize;
++      struct completion comp;
++      int ret = -EBUSY;
++
++      xreq = xseg_get_request(xsegbd.xseg, xsegbd_dev->src_portno);
++      if (!xreq)
++              goto out;
++
++      datasize = sizeof(uint64_t);
++      BUG_ON((uint64_t)&comp < xsegbd_dev->nr_requests);
++      BUG_ON(xreq->buffersize - xsegbd.namesize < datasize);
++      BUG_ON(xseg_prep_request(xreq, xsegbd.namesize, datasize));
++
++      init_completion(&comp);
++      xreq->priv = (uint64_t)(long)&comp;
++
++      name = XSEG_TAKE_PTR(xreq->name, xsegbd.xseg->segment);
++      strncpy(name, xsegbd.name, xsegbd.namesize);
++      xreq->size = datasize;
++      xreq->offset = 0;
++
++      xreq->op = X_INFO;
++
++      port = &xsegbd.xseg->ports[xsegbd_dev->src_portno];
++      port->waitcue = (uint64_t)(long)xsegbd_dev;
++
++      BUG_ON(xseg_submit(xsegbd.xseg, xsegbd_dev->dst_portno, xreq) == NoSerial);
++      xseg_signal(xsegbd.xseg, xsegbd_dev->dst_portno);
++
++      wait_for_completion_interruptible(&comp);
++      ret = update_dev_sectors_from_request(xsegbd_dev, xreq);
++out:
++      xseg_put_request(xsegbd.xseg, xsegbd_dev->src_portno, xreq);
++      return ret;
+ }
  static long xseg_callback(void *arg)
  {
        struct xsegbd_device *xsegbd_dev = NULL;
        struct xseg_port *port;
        struct request *blkreq;
        unsigned long flags;
--      xqindex blkreq_idx;
++      uint64_t blkreq_idx;
        int err;
 -      char *data;
  
        port = XSEG_TAKE_PTR(arg, xsegbd.xseg->segment);
        xsegbd_dev = (struct xsegbd_device *) port->waitcue;
                if (!xreq)
                        break;
  
 -              if (xreq->op == X_INFO) {
 -                      if (!(xreq->state & XS_SERVED)) {
 -                              xseg_put_request(xsegbd.xseg, xreq->portno, xreq);
 -                              return -EIO;
 -                      }
 -
 -                      data = XSEG_TAKE_PTR(xreq->data, xsegbd.xseg->segment);
 -                      /* TODO: make sure we use consistent types accross peers */
 -                      xsegbd_dev->sectors = *((off_t *) data) / 512ULL;
 -
 -                      complete(xsegbd_dev->comp);
 -
 -                      xseg_put_request(xsegbd.xseg, xreq->portno, xreq);
 -
 -                      return 0;
 -               }
 -
                /* we rely upon our peers to not have touched ->priv */
--              blkreq_idx = (xqindex)(unsigned long)xreq->priv;
--              if (blkreq_idx < 0 || blkreq_idx >= xsegbd_dev->nr_requests) {
--                      XSEGLOG("invalid request index: %u! Ignoring.", blkreq_idx);
--                      goto xseg_put;
++              blkreq_idx = (uint64_t)xreq->priv;
++              if (blkreq_idx >= xsegbd_dev->nr_requests) {
++                      /* someone is blocking on this request
++                         and will handle it when we wake them up. */
++                      complete((void *)(long)xreq->priv);
++                      /* the request is blocker's responsibility so
++                         we will not put_request(); */
++                      continue;
                }
  
++              /* this is now treated as a block I/O request to end */
                blkreq = xsegbd_dev->blk_req_pending[blkreq_idx];
                /* WARN_ON(!blkreq); */
                err = -EIO;
  blk_end:
                blk_end_request_all(blkreq, err);
                xq_append_head(&xsegbd_dev->blk_queue_pending, blkreq_idx);
--xseg_put:
                xseg_put_request(xsegbd.xseg, xreq->portno, xreq);
        }
  
  
  /* sysfs interface */
  
--
  static struct bus_type xsegbd_bus_type = {
        .name   = "xsegbd",
  };