/xseg/peers/user/cached.c - Diff - Archipelago - Greek Research and Technology Network's projects

Revision 0c97ba3e xseg/peers/user/cached.c

     #define DIRTY   3
     #define WRITING 4
     #define bucket_readable(__status) \
     	(__status == VALID || __status == DIRTY || __status == WRITING)
     #define bucket_readable(__bucket_status) \
     	(__bucket_status == VALID || __bucket_status == DIRTY || __bucket_status == WRITING)
     /* write policies */
     #define WRITETHROUGH 1
     #define WRITEBACK    2
     #define WRITE_POLICY(__wcp)							\
     	__wcp == WRITETHROUGH ? "writethrough" :		\
     	__wcp == WRITEBACK	? "writeback" :				\
     #define WRITE_POLICY(__wcp)					\
     	__wcp == WRITETHROUGH	? "writethrough"	:	\
     	__wcp == WRITEBACK	? "writeback" 		:	\
     	"undefined"
     /* object states */
     #define INVALIDATED (1 << 0)
     /* cio states */
     #define CIO_FAILED	1
     #define CIO_ACCEPTED	2
     #define CIO_READING	3
     /* ce states */
     #define CE_READY	1
     #define CE_WRITING	2
     #define CE_FAILED	3
     #define CE_INVALIDATED	4
     #define CE_NOT_READY	5
     #define BUCKET_SIZE_QUANTUM 4096
     struct cache_io {
-...
     struct ce {
     	unsigned char *data;
     	uint32_t *status;
     	struct xwaitq *waitq;
     	uint32_t flags;
     	struct xlock lock;
     	struct xworkq workq;
     	uint32_t *bucket_status;	/* bucket_status of each bucket */
     	struct xwaitq *bucket_waitq;	/* waitq of each bucket */
     	uint32_t status;		/* cache entry status */
     	struct xlock lock;		/* cache entry lock */
     	struct xworkq workq;		/* workq of the cache entry */
     	struct xwaitq waitq;		/* waitq of the cache entry */
     	struct peer_req pr;
     };
     struct req_completion{
     	struct peer_req *pr;
     	struct xseg_request *req;
     };
     struct eviction_arg {
     	struct peerd *peer;
     	struct ce *evicted;
     	struct ce *new_entry;
     };
     /*
      * Helper functions
-...
     static int is_not_loading(void *arg)
+    {
     	uint32_t *status = (uint32_t *)arg;
     	return (*status != LOADING);
     	uint32_t *bucket_status = (uint32_t *)arg;
     	return (*bucket_status != LOADING);
+    }
     static int cache_entry_ready(void *arg)
+    {
     	struct ce *ce = (struct ce *)arg;
     	return (ce->status == CE_READY);
+    }
     static void print_cached(struct cached *cached)
+    {
-...
     		 * //Paste data
     		 * req_data = xseg_get_data(xseg, req);
     		 * memcpy(req_data, req_old->data, size);
     		 * for (i=start; i<=end; i++){
     		 * 	ce->bucket_status[i] = WRITING;
     		 * }
     		 */
     	} else {
     		for (i=start; i<=end; i++){
     			ce->status[i] = LOADING;
     			ce->bucket_status[i] = LOADING;
+    		}
+    	}
-...
     	struct peerd *peer = (struct peerd *)c;
     	struct cached *cached = peer->priv;
     	struct ce *ce = (struct ce *)e;
     	ce->flags = 0;
     	struct cache_io *ce_cio = ce->pr.priv;
     	ce->status = CE_NOT_READY;
     	memset(ce->data, 0, cached->object_size);
     	for (i = 0; i < cached->buckets_per_object; i++) {
     		ce->status[i] = INVALID;
     		ce->bucket_status[i] = INVALID;
+    	}
     	ce->pr.cio->h = NoEntry;
     	ce->pr.cio->pending_reqs;
     	ce_cio->h = NoEntry;
     	ce_cio->pending_reqs = 0;
     	xlock_release(&ce->lock);
     	return 0;
+    }
     void on_put(void *c, void *e)
     void eviction_work(void *arg)
+    {
     	struct peerd *peer = (struct peerd *)c;
     	/*
     	 * In this context we hold a reference to the evicted cache entry and
     	 * the new cache entry.
     	 * Evicted cache entry lock is also held.
     	 */
     	struct eviction_arg *earg = (struct eviction_arg *)arg;
     	struct peerd *peer = earg->peer;;
     	struct cached *cached = peer->priv;
     	struct ce *ce = (struct ce *)e;
     	//since we are the last referrer to the cache entry
     	//no lock is needed.
     	XSEGLOG2(&lc, D, "Putting cache entry %p", e);
     	struct ce *ce_evicted = earg->evicted;
     	struct ce *ce_new = earg->new_entry;
     	struct cache_io *ce_cio = ce_evicted->pr.priv;
     	uint32_t start, end, i = 0;
     	if (cached->write_policy == WRITETHROUGH || ce->flags & INVALIDATED)
     	/* recheck status here, in case there was a race */
     	if (ce_evicted->status & CE_INVALIDATED){
     		ce_new->status = CE_READY;
     		xwaitq_signal(&ce_new->waitq);
     		return;
     	//write all dirty buckets.
+    	}
     	while(i < cached->buckets_per_object){
     		if (ce->status[i] != DIRTY){
     		if (ce_evicted->bucket_status[i] != DIRTY){
     			i++;
     			continue;
+    		}
     		start = i;
     		while (i < cached->buckets_per_object &&
     			(i-start)*cached->bucket_size < cached->max_req_size &&
     				ce->status[i] == DIRTY){
     				ce_evicted->bucket_status[i] == DIRTY){
     			i++;
+    		}
     		end = i;
     		rw_range(cached, &ce->pr, 1, start, end);
     		ce->pr.cio->pending_reqs++;
     		rw_range(peer, &ce_evicted->pr, 1, start, end);
     		ce_cio->pending_reqs++;
+    	}
     	if (!ce_cio->pending_reqs){
     		xcache_put(cached->cache, ce_cio->h);
     		ce_cio->h = NoEntry;
     		ce_new->status = CE_READY;
     		free(earg);
     		xwaitq_signal(&ce_new->waitq);
+    	}
+    }
     int on_evict(void *c, void *evicted, void *new_entry)
+    {
     	struct peerd *peer = (struct peerd *)c;
     	struct cached *cached = peer->priv;
     	struct ce *ce_evicted = (struct ce *)evicted;
     	struct ce *ce_new = (struct ce *)new_entry;
     	struct cache_io *ce_cio = (struct cache_io *)ce_evicted->pr.priv;
     	struct eviction_arg *earg;
     	/*
     	 * Since write policy doesn't change and after a cache entry gets
     	 * invalidated, it cannot never be valid again, it is safe to procceed
     	 * without the cache entry lock.
     	 */
     	if (cached->write_policy != WRITEBACK ||
     			ce_evicted->status & CE_INVALIDATED){
     		xcache_put(cached->cache, ce_cio->h);
     		ce_cio->h = NoEntry;
     		ce_new->status = CE_READY;
     		xwaitq_signal(&ce_new->waitq);
     		return 0;
+    	}
     	earg = malloc(sizeof(struct eviction_arg));
     	if (!earg)
     		return -1;
     	earg->peer = (struct peerd *)peer;
     	earg->evicted = ce_evicted;
     	earg->new_entry = ce_new;
     	/* In all other cases, we should have the cache entry lock */
     	if (xworkq_enqueue(&ce_evicted->workq, eviction_work, (void *)earg) < 0){
     		return -1;
+    	}
     	return 1;
+    }
     void on_put(void *c, void *e)
+    {
     	//since we are the last referrer to the cache entry
     	//no lock is needed.
     	XSEGLOG2(&lc, D, "Putting cache entry %p", e);
+    }
     void * init_node(void *c)
-...
     	xlock_release(&ce->lock);
     	ce->data = malloc(sizeof(unsigned char) * cached->object_size);
     	ce->status = malloc(sizeof(uint32_t) * cached->buckets_per_object);
     	ce->waitq = malloc(sizeof(struct xwaitq) * cached->buckets_per_object);
     	ce->pr.priv = malloc(sizeof(struct cio));
     	if (!ce->data || !ce->status || !ce->waitq || !ce->pr.priv)
     	ce->bucket_status = malloc(sizeof(uint32_t) * cached->buckets_per_object);
     	ce->bucket_waitq = malloc(sizeof(struct xwaitq) * cached->buckets_per_object);
     	ce->pr.priv = malloc(sizeof(struct cache_io));
     	if (!ce->data || !ce->bucket_status || !ce->bucket_waitq || !ce->pr.priv)
     		goto ce_fields_fail;
     	ce->pr.peer = peer;
     	for (i = 0; i < cached->buckets_per_object; i++) {
     		xwaitq_init(&ce->waitq[i], is_not_loading, &ce->status[i],
     				XWAIT_SIGNAL_ONE);
     		xwaitq_init(&ce->bucket_waitq[i], is_not_loading,
     				&ce->bucket_status[i], XWAIT_SIGNAL_ONE);
+    	}
     	xwaitq_init(&ce->waitq, cache_entry_ready, ce, XWAIT_SIGNAL_ONE);
     	xworkq_init(&ce->workq, &ce->lock, 0);
     	return ce;
     ce_fields_fail:
     	free(ce->data);
     	free(ce->status);
     	free(ce->waitq);
     	free(ce->bucket_status);
     	free(ce->bucket_waitq);
     	free(ce);
     ce_fail:
     	perror("malloc");
-...
     struct xcache_ops c_ops = {
     	.on_init = on_init,
     	.on_evict = on_evict,
     	.on_put  = on_put,
     	.on_node_init = init_node
     };
-...
     					uint32_t limit)
+    {
     	uint32_t end = start + 1;
     	while (end <= limit && ce->status[end] == INVALID)
     	while (end <= limit && ce->bucket_status[end] == INVALID)
     		end++;
     	return (end - 1);
+    }
-...
     static void handle_read(void *arg);
     //is this necessary?
     static void status_changed(void *arg)
     static void bucket_status_changed(void *arg)
+    {
     	/*
     	 * In this context we hold a reference to the cache entry.
-...
     	XSEGLOG2(&lc, D, "Start: %lu, Limit %lu", b, limit );
     	for (i = b; i <= limit; i++) {
     		if (bucket_readable(ce->status[i]))
     		if (bucket_readable(ce->bucket_status[i]))
     			continue;
     		if (ce->status[i] != LOADING){
     		if (ce->bucket_status[i] != LOADING){
     			XSEGLOG2(&lc, D, "Found invalid bucket %lu\n", i);
     			start_bucket = i;
     			end_bucket = __get_last_invalid(ce, start_bucket, limit);
-...
     		cio->work.job_fn = handle_read;
     		cio->work.job = pr;
     		/* wait on the last bucket */
     		xwaitq_enqueue(&ce->waitq[end_bucket], &cio->work);
     		xwaitq_enqueue(&ce->bucket_waitq[end_bucket], &cio->work);
     		return;
+    	}
-...
     	struct cached *cached = __get_cached(peer);
     	struct cache_io *cio = __get_cache_io(pr);
     	struct ce *ce = get_cache_entry(cached->cache, cio->h);
     	(void)ce;
     	struct xseg_request *req = pr->req;
     	char *req_data;
     	uint32_t start_bucket, end_bucket, last_read_bucket = -1;
     	uint32_t i;
     	uint64_t data_len, data_start;
     	uint64_t first_bucket_offset = req->offset % cached->bucket_size;
     	//what about FUA?
     	uint32_t pending_buckets = 0;
     	start = __get_bucket(cached, req->offset);
     	end = __get_bucket(cached, req->offset + req->size - 1);
     	start_bucket = __get_bucket(cached, req->offset);
     	end_bucket = __get_bucket(cached, req->offset + req->size - 1);
     	/*
     	 * In case of a misaligned write, if the start, end buckets of the write
     	 * are invalid, we have to read them before continue with the write.
     	 */
     	if (ce->status[start] == INVALID && first_bucket_offset){
     		if (rw_range(peer, pr, 0, start, start) < 0){
     	if (ce->bucket_status[start_bucket] == INVALID && first_bucket_offset){
     		if (rw_range(peer, pr, 0, start_bucket, start_bucket) < 0){
     			cio->state = CIO_FAILED;
     			goto out;
+    		}
     		cio->pending_reqs++;
     		last_read_bucket = start;
     		last_read_bucket = start_bucket;
+    	}
     	if (start != end && ce->status[end] == INVALID &&
     	if (start_bucket != end_bucket && ce->bucket_status[end_bucket] == INVALID &&
     			(req->offset + req->size -1) % cached->bucket_size){
     		if (rw_range(peer, pr, 0, end, end) < 0){
     		if (rw_range(peer, pr, 0, end_bucket, end_bucket) < 0){
     			cio->state = CIO_FAILED;
     			goto out;
+    		}
     		cio->pending_reqs++;
     		last_read_bucket = end;
     		last_read_bucket = end_bucket;
+    	}
     	if (last_read_bucket != -1){
-...
     		cio->work.job = pr;
     		/* wait on the last read bucket */
     		XSEGLOG2(&lc, I, "Enqueuing cio %p in waitq (fn: handle_write).\n", cio);
     		xwaitq_enqueue(&ce->waitq[last_read_bucket], &cio->work);
     		xwaitq_enqueue(&ce->bucket_waitq[last_read_bucket], &cio->work);
     		XSEGLOG2(&lc, I, "Handle_write returned after enqueuing cio %p in waitq.\n", cio);
     		return;
+    	}
-...
     		 * 	mark them as dirty
     		 */
     		/* special care for the first bucket */
     		data_start = cached->bucket_size * start + first_bucket_offset;
     		data_start = cached->bucket_size * start_bucket + first_bucket_offset;
     		data_len = cached->bucket_size - first_bucket_offset;
     		memcpy(ce->data[data_start], req_data, datalen);
     		ce->status[start] = DIRTY;
     		memcpy(&ce->data[data_start], req_data, data_len);
     		ce->bucket_status[start_bucket] = DIRTY;
     		/* and the rest */
     		for (i = start+1; i <= end; i++) {
     			data_start = cached->bucket_size * (i-start) +
     		for (i = start_bucket + 1; i <= end_bucket; i++) {
     			data_start = cached->bucket_size * (i-start_bucket) +
     				first_bucket_offset;
     			if (data_start + cached->bucket_size <= req->size)
     				data_len = cached->bucket_size;
     			else
     				data_len = req->size - data_start;
     			memcpy(ce->data[i * cached->bucket_size],
     				req_data[data_start], data_len);
     			ce->status[i] = DIRTY;
     			memcpy(&ce->data[i * cached->bucket_size],
     				&req_data[data_start], data_len);
     			ce->bucket_status[i] = DIRTY;
+    		}
     	} else {
     		cio->state = CIO_FAILED;
-...
+    }
     /*
      * Assert cache entry is ready.
+     *
      * Depending on the op type, a handler function is enqueued in the workq of the
      * target's cache_entry.
      */
     static void handle_readwrite_post(void *arg)
+    {
     	/*
     	 * In this context, we hold a reference to the associated cache entry.
     	 */
     	struct ce *ce;
     	struct peer_req *pr = (struct peer_req *)arg;
     	struct peerd *peer = pr->peer;
     //	struct cached *cached = __get_cached(peer);
     //	struct cache_io *ce_cio, *cio = __get_cache_io(pr);
     	struct xseg_request *req = pr->req;
     	int r = 0;
     	if (ce->status != CE_READY){
     		XSEGLOG2(&lc, E, "Cache entry %p has status %u", ce, ce->status);
     		r = -1;
     		//FIXME defer request ?
     		goto out;
+    	}
     	if (req->op == X_WRITE)
     		r = xworkq_enqueue(&ce->workq, handle_write, (void *)pr);
     	else if (req->op == X_READ)
     		r = xworkq_enqueue(&ce->workq, handle_read, (void *)pr);
     	else {
     		r = -1;
     		XSEGLOG2(&lc, E, "Invalid op %u", req->op);
+    	}
     out:
     	if (r < 0){
     		XSEGLOG2(&lc, E, "Failing pr %p", pr);
     		cached_fail(peer, pr);
+    	}
+    }
     /*
      * handle_readwrite is called when we accept a request.
      * Its purpose is to find a handler associated with the request's target
      * object (partial cache hit), or to allocate a new one (cache_miss) and insert
      * it in xcache.
+     *
      * Depending on the op type, a handler function is enqueued in the workq of the
      * target's cache_entry.
      * Then, we wait until the returned cache entry is ready.
      */
     static int handle_readwrite(struct peerd *peer, struct peer_req *pr)
+    {
     	struct ce *ce;
     	struct cached *cached = __get_cached(peer);
     	struct cache_io *cio = __get_cache_io(pr);
     	struct cache_io *ce_cio, *cio = __get_cache_io(pr);
     	struct xseg_request *req = pr->req;
     	char name[XSEG_MAX_TARGETLEN + 1];
     	char *target;
-...
     		XSEGLOG2(&lc, E, "Received cache entry handler %lu but no cache entry", h);
     		goto out;
+    	}
     	cio->h = h;
     	if (req->op == X_WRITE)
     		r = xworkq_enqueue(&ce->workq, handle_write, (void *)pr);
     	else if (req->op == X_READ)
     		r = xworkq_enqueue(&ce->workq, handle_read, (void *)pr);
     	else {
     		r = -1;
     		XSEGLOG2(&lc, E, "Invalid op %u", req->op);
     		goto out;
+    	}
     	ce_cio = ce->pr.priv;
     	ce_cio->h = h;
     	/* wait for the cache_entry to be ready */
     	cio->work.job_fn = handle_readwrite_post;
     	cio->work.job = pr;
     	r = xwaitq_enqueue(&ce->waitq, &cio->work);
     out:
     	if (r < 0){
-...
+    }
     struct req_completion{
     	struct peer_req *pr;
     	struct xseg_request *req;
     };
     /*
      * complete_read is called when we receive a reply from a request issued by
      * rw_range. The process mentioned below applies only to buckets previously
-...
      * If not, we mark serviced buckets as VALID, non-serviced buckets as INVALID
      * and the cio is failed
+     *
      * At any point when a bucket status changes, we signal the respective waitq.
      * At any point when a bucket bucket_status changes, we signal the respective waitq.
      */
     static void complete_read(void *arg)
+    {
-...
     	/* Check serviced buckets */
     	for (i = start; i <= end_serviced && req->serviced; i++) {
     		if (ce->status[i] != LOADING)
     		if (ce->bucket_status[i] != LOADING)
     			continue;
     		XSEGLOG2(&lc, D, "Bucket %lu loading and reception successful\n",i);
     		memcpy(ce->data + (i * cached->bucket_size), data, cached->bucket_size);
     		ce->status[i] = VALID;
     		xwaitq_signal(&ce->waitq[i]);
     		ce->bucket_status[i] = VALID;
     		xwaitq_signal(&ce->bucket_waitq[i]);
+    	}
     	/* Check non-serviced buckets */
     	for (; i <= end_size; i++) {
     		if (ce->status[i] != LOADING)
     		if (ce->bucket_status[i] != LOADING)
     			continue;
     		XSEGLOG2(&lc, D, "Bucket %lu loading but reception unsuccessful\n", i);
     		ce->status[i] = INVALID;
     		ce->bucket_status[i] = INVALID;
     		cio->state = CIO_FAILED;
     		xwaitq_signal(&ce->waitq[i]);
     		xwaitq_signal(&ce->bucket_waitq[i]);
+    	}
     	xseg_put_request(peer->xseg, rc->req, pr->portno);
-...
     	struct cached *cached = __get_cached(peer);
     	struct cache_io *cio = __get_cache_io(pr);
     	struct ce *ce;
     	uint32_t start, end_serviced, end_requested, i, first_bucket_offset;
     	uint64_t data_start, data_len;
     	uint32_t start, end_serviced, end_requested, i;
     	uint64_t data_start, data_len, first_bucket_offset;
     	char *req_data;
     	int success;
-...
     	end_serviced = __get_bucket(cached, req->offset + req->serviced - 1);
     	end_requested = __get_bucket(cached, req->offset + req->size - 1);
     	uint64_t first_bucket_offset = req->offset % cached->bucket_size;
     	first_bucket_offset = req->offset % cached->bucket_size;
     	req_data = xseg_get_data(peer->xseg, req);
     	/*
-...
     	 *		copy data to bucket
     	 *		mark as valid
     	 *	else if WRITEBACK
     	 *		if status writing
     	 *		if bucket_status writing
     	 *			mark as valid
+    	 *
     	 * No need to signal anything!
-...
     	if (cached->write_policy == WRITETHROUGH){
     		data_start = start * cached->bucket_size + first_bucket_offset;
     		data_len = cached->bucket_size - first_bucket_offset;
     		memcpy(ce->data[data_start], req_data, data_len);
     		memcpy(&ce->data[data_start], req_data, data_len);
     	} else if (cached->write_policy == WRITEBACK) {
+    		;
+    	}
     	ce->status[start] = VALID;
     	ce->bucket_status[start] = VALID;
     	for (i = start+1; i <= end_serviced; i++) {
     		if (cached->write_policy == WRITETHROUGH){
     			data_start = cached->bucket_size * (i - start) +
-...
     				data_len = cached->bucket_size;
     			else
     				data_len = req->size - data_start;
     			memcpy(ce->data[cached->bucket_size * i],
     					req_data[data_start], data_len);
     			ce->status[i] = VALID;
     			memcpy(&ce->data[cached->bucket_size * i],
     					&req_data[data_start], data_len);
     			ce->bucket_status[i] = VALID;
     		} else if (cached->write_policy == WRITEBACK) {
     			if (ce->status[i] == WRITING)
     				ce->status[i] = VALID;
     			if (ce->bucket_status[i] == WRITING)
     				ce->bucket_status[i] = VALID;
+    		}
+    	}
     out:
-...
     	return 0;
+    }
     static int handle_receive_write(struct peerd *peer, struct peer_req *pr)
     static int handle_receive_write(struct peerd *peer, struct peer_req *pr,
     			struct xseg_request *req)
+    {
     	XSEGLOG2(&lc, I, "Started\n");
     	/*

Also available in: Unified diff