|
1 |
/*
|
|
2 |
* Copyright 2013 GRNET S.A. All rights reserved.
|
|
3 |
*
|
|
4 |
* Redistribution and use in source and binary forms, with or
|
|
5 |
* without modification, are permitted provided that the following
|
|
6 |
* conditions are met:
|
|
7 |
*
|
|
8 |
* 1. Redistributions of source code must retain the above
|
|
9 |
* copyright notice, this list of conditions and the following
|
|
10 |
* disclaimer.
|
|
11 |
* 2. Redistributions in binary form must reproduce the above
|
|
12 |
* copyright notice, this list of conditions and the following
|
|
13 |
* disclaimer in the documentation and/or other materials
|
|
14 |
* provided with the distribution.
|
|
15 |
*
|
|
16 |
* THIS SOFTWARE IS PROVIDED BY GRNET S.A. ``AS IS'' AND ANY EXPRESS
|
|
17 |
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
|
18 |
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
19 |
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GRNET S.A OR
|
|
20 |
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
21 |
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
22 |
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
|
|
23 |
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
|
|
24 |
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
25 |
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
|
26 |
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
27 |
* POSSIBILITY OF SUCH DAMAGE.
|
|
28 |
*
|
|
29 |
* The views and conclusions contained in the software and
|
|
30 |
* documentation are those of the authors and should not be
|
|
31 |
* interpreted as representing official policies, either expressed
|
|
32 |
* or implied, of GRNET S.A.
|
|
33 |
*/
|
|
34 |
|
|
35 |
#include <errno.h>
|
|
36 |
#include <stdio.h>
|
|
37 |
#include <stdlib.h>
|
|
38 |
#include <string.h>
|
|
39 |
#include <unistd.h>
|
|
40 |
#include <pthread.h>
|
|
41 |
|
|
42 |
#include <xseg/xseg.h>
|
|
43 |
#include <xseg/protocol.h>
|
|
44 |
|
|
45 |
#include "list.h"
|
|
46 |
#include "tapdisk.h"
|
|
47 |
#include "tapdisk-driver.h"
|
|
48 |
#include "tapdisk-interface.h"
|
|
49 |
#include "tapdisk-server.h"
|
|
50 |
|
|
51 |
#ifdef HACE_CONFIG_H
|
|
52 |
#include "config.h"
|
|
53 |
#endif
|
|
54 |
|
|
55 |
#define MAX_ARCHIPELAGO_REQS TAPDISK_DATA_REQUESTS
|
|
56 |
#define MAX_ARCHIPELAGO_MERGED_REQS 32
|
|
57 |
#define NUM_XSEG_THREADS 2
|
|
58 |
|
|
59 |
struct tdarchipelago_request {
|
|
60 |
td_request_t treq[MAX_ARCHIPELAGO_MERGED_REQS];
|
|
61 |
int treq_count;
|
|
62 |
int op;
|
|
63 |
uint64_t offset;
|
|
64 |
uint64_t size;
|
|
65 |
void *buf;
|
|
66 |
ssize_t result;
|
|
67 |
struct list_head queue;
|
|
68 |
};
|
|
69 |
|
|
70 |
typedef struct AIORequestData {
|
|
71 |
char *volname;
|
|
72 |
off_t offset;
|
|
73 |
ssize_t size;
|
|
74 |
char *buf;
|
|
75 |
int ret;
|
|
76 |
int op;
|
|
77 |
struct tdarchipelago_request *tdreq;
|
|
78 |
} AIORequestData;
|
|
79 |
|
|
80 |
|
|
81 |
struct xseg *xseg = NULL;
|
|
82 |
xport srcport = NoPort;
|
|
83 |
struct xseg_port *port;
|
|
84 |
xport mportno = NoPort;
|
|
85 |
xport vportno = NoPort;
|
|
86 |
|
|
87 |
struct posixfd_signal_desc {
|
|
88 |
char signal_file[sizeof(void *)];
|
|
89 |
int fd;
|
|
90 |
int flag;
|
|
91 |
};
|
|
92 |
|
|
93 |
struct tdarchipelago_data {
|
|
94 |
/* Archipelago Volume Name and Size */
|
|
95 |
char *volname;
|
|
96 |
ssize_t size;
|
|
97 |
|
|
98 |
/* Requests Queue */
|
|
99 |
struct list_head reqs_inflight;
|
|
100 |
struct list_head reqs_free;
|
|
101 |
struct tdarchipelago_request *req_deferred;
|
|
102 |
struct tdarchipelago_request reqs[MAX_ARCHIPELAGO_REQS];
|
|
103 |
int reqs_free_count;
|
|
104 |
|
|
105 |
/* Flush event */
|
|
106 |
int timeout_event_id;
|
|
107 |
|
|
108 |
/* Inter-thread pipe */
|
|
109 |
int pipe_fds[2];
|
|
110 |
int pipe_event_id;
|
|
111 |
|
|
112 |
/* Driver Stats */
|
|
113 |
struct {
|
|
114 |
int req_total;
|
|
115 |
|
|
116 |
int req_issued;
|
|
117 |
int req_issued_no_merge;
|
|
118 |
int req_issued_forced;
|
|
119 |
int req_issued_direct;
|
|
120 |
int req_issued_timeout;
|
|
121 |
|
|
122 |
int req_miss;
|
|
123 |
int req_miss_op;
|
|
124 |
int req_miss_ofs;
|
|
125 |
int req_miss_buf;
|
|
126 |
} stat;
|
|
127 |
};
|
|
128 |
|
|
129 |
typedef struct ArchipelagoThread {
|
|
130 |
pthread_t request_th;
|
|
131 |
pthread_cond_t request_cond;
|
|
132 |
pthread_mutex_t request_mutex;
|
|
133 |
int is_signaled;
|
|
134 |
int is_running;
|
|
135 |
} ArchipelagoThread;
|
|
136 |
|
|
137 |
ArchipelagoThread archipelago_th[NUM_XSEG_THREADS];
|
|
138 |
|
|
139 |
static void tdarchipelago_finish_aiocb(void *arg, ssize_t c, AIORequestData *reqdata);
|
|
140 |
static int tdarchipelago_close(td_driver_t *driver);
|
|
141 |
static void tdarchipelago_pipe_read_cb(event_id_t eb, char mode, void *data);
|
|
142 |
|
|
143 |
static int wait_reply(struct xseg_request *expected_req)
|
|
144 |
{
|
|
145 |
struct xseg_request *rec;
|
|
146 |
xseg_prepare_wait(xseg, srcport);
|
|
147 |
struct posixfd_signal_desc *psd = xseg_get_signal_desc(xseg, port);
|
|
148 |
while(1) {
|
|
149 |
rec = xseg_receive(xseg, srcport, 0);
|
|
150 |
if(rec) {
|
|
151 |
if( rec != expected_req) {
|
|
152 |
DPRINTF("wait_reply(): Unknown request.\n");
|
|
153 |
xseg_put_request(xseg, rec, srcport);
|
|
154 |
} else if(!(rec->state & XS_SERVED)) {
|
|
155 |
DPRINTF("wait_reply(): Failed request.\n");
|
|
156 |
return -1;
|
|
157 |
} else {
|
|
158 |
break;
|
|
159 |
}
|
|
160 |
}
|
|
161 |
xseg_wait_signal(xseg, psd, 1000000UL);
|
|
162 |
}
|
|
163 |
xseg_cancel_wait(xseg, srcport);
|
|
164 |
return 0;
|
|
165 |
}
|
|
166 |
|
|
167 |
static void xseg_request_handler(void *arthd)
|
|
168 |
{
|
|
169 |
struct posixfd_signal_desc *psd = xseg_get_signal_desc(xseg, port);
|
|
170 |
ArchipelagoThread *th = (ArchipelagoThread *) arthd;
|
|
171 |
while(th->is_running) {
|
|
172 |
struct xseg_request *req;
|
|
173 |
xseg_prepare_wait(xseg, srcport);
|
|
174 |
req = xseg_receive(xseg, srcport, 0);
|
|
175 |
if(req) {
|
|
176 |
AIORequestData *reqdata;
|
|
177 |
xseg_get_req_data(xseg, req, (void **)&reqdata);
|
|
178 |
if(reqdata->op == TD_OP_READ)
|
|
179 |
{
|
|
180 |
char *data = xseg_get_data(xseg, req);
|
|
181 |
memcpy(reqdata->buf, data, req->serviced);
|
|
182 |
int serviced = req->serviced;
|
|
183 |
tdarchipelago_finish_aiocb(reqdata->tdreq, serviced, reqdata);
|
|
184 |
xseg_put_request(xseg, req, srcport);
|
|
185 |
} else if(reqdata->op == TD_OP_WRITE) {
|
|
186 |
int serviced = req->serviced;
|
|
187 |
tdarchipelago_finish_aiocb(reqdata->tdreq, serviced, reqdata);
|
|
188 |
xseg_put_request(xseg, req, srcport);
|
|
189 |
}
|
|
190 |
} else {
|
|
191 |
xseg_wait_signal(xseg, psd, 1000000UL);
|
|
192 |
}
|
|
193 |
xseg_cancel_wait(xseg, srcport);
|
|
194 |
}
|
|
195 |
th->is_signaled = 1;
|
|
196 |
pthread_cond_signal(&th->request_cond);
|
|
197 |
pthread_exit(NULL);
|
|
198 |
}
|
|
199 |
|
|
200 |
static uint64_t get_image_info(char *volname)
|
|
201 |
{
|
|
202 |
uint64_t size;
|
|
203 |
int r;
|
|
204 |
|
|
205 |
int targetlen = strlen(volname);
|
|
206 |
struct xseg_request *req = xseg_get_request(xseg, srcport, mportno, X_ALLOC);
|
|
207 |
r = xseg_prep_request(xseg, req, targetlen, sizeof(struct xseg_reply_info));
|
|
208 |
if(r < 0) {
|
|
209 |
xseg_put_request(xseg, req, srcport);
|
|
210 |
DPRINTF("get_image_info(): Cannot prepare request. Aborting...");
|
|
211 |
exit(-1);
|
|
212 |
}
|
|
213 |
|
|
214 |
char *target = xseg_get_target(xseg, req);
|
|
215 |
strncpy(target, volname, targetlen);
|
|
216 |
req->size = req->datalen;
|
|
217 |
req->offset = 0;
|
|
218 |
req->op = X_INFO;
|
|
219 |
|
|
220 |
xport p = xseg_submit(xseg, req, srcport, X_ALLOC);
|
|
221 |
if(p == NoPort) {
|
|
222 |
xseg_put_request(xseg, req, srcport);
|
|
223 |
DPRINTF("get_image_info(): Cannot submit request. Aborting...");
|
|
224 |
exit(-1);
|
|
225 |
}
|
|
226 |
xseg_signal(xseg, p);
|
|
227 |
r = wait_reply(req);
|
|
228 |
if(r) {
|
|
229 |
xseg_put_request(xseg, req, srcport);
|
|
230 |
DPRINTF("get_image_info(): wait_reply() error. Aborting...");
|
|
231 |
exit(-1);
|
|
232 |
}
|
|
233 |
struct xseg_reply_info *xinfo = (struct xseg_reply_info *) xseg_get_data(xseg, req);
|
|
234 |
size = xinfo->size;
|
|
235 |
xseg_put_request(xseg, req, srcport);
|
|
236 |
return size;
|
|
237 |
}
|
|
238 |
|
|
239 |
static void xseg_find_port(char *pstr, const char *needle, xport *port)
|
|
240 |
{
|
|
241 |
char *a;
|
|
242 |
char *dpstr = strdup(pstr);
|
|
243 |
a = strtok(dpstr, needle);
|
|
244 |
*port = (xport) atoi(a);
|
|
245 |
free(dpstr);
|
|
246 |
}
|
|
247 |
|
|
248 |
static void parse_uri(char **volname, const char *s)
|
|
249 |
{
|
|
250 |
int n=0, nn, i;
|
|
251 |
char *tokens[4];
|
|
252 |
|
|
253 |
char *ds = strdup(s);
|
|
254 |
tokens[n] = strtok(ds, ":");
|
|
255 |
*volname = malloc(strlen(tokens[n]) + 1);
|
|
256 |
strcpy(*volname, tokens[n]);
|
|
257 |
|
|
258 |
for(i = 0, nn = 0; s[i]; i++)
|
|
259 |
nn += (s[i] == ':');
|
|
260 |
/* FIXME: Protect tokens array overflow */
|
|
261 |
if( nn > 3)
|
|
262 |
i = 3;
|
|
263 |
else
|
|
264 |
i = nn;
|
|
265 |
|
|
266 |
while(tokens[n] && n <= i) tokens[++n] = strtok(NULL, ":");
|
|
267 |
|
|
268 |
for(nn = 0; nn <= i; nn++) {
|
|
269 |
if(strstr(tokens[nn], "mport="))
|
|
270 |
xseg_find_port(tokens[nn], "mport=", &mportno);
|
|
271 |
if(strstr(tokens[nn], "vport="))
|
|
272 |
xseg_find_port(tokens[nn], "vport=", &vportno);
|
|
273 |
}
|
|
274 |
}
|
|
275 |
|
|
276 |
static int tdarchipelago_open(td_driver_t *driver, const char *name, td_flag_t flags)
|
|
277 |
{
|
|
278 |
struct tdarchipelago_data *prv = driver->data;
|
|
279 |
uint64_t size; /*Archipelago Volume Size*/
|
|
280 |
int i, retval;
|
|
281 |
|
|
282 |
/* Init private structure */
|
|
283 |
memset(prv, 0x00, sizeof(struct tdarchipelago_data));
|
|
284 |
|
|
285 |
/*Default mapperd and vlmcd ports */
|
|
286 |
vportno = 501;
|
|
287 |
mportno = 1001;
|
|
288 |
|
|
289 |
INIT_LIST_HEAD(&prv->reqs_inflight);
|
|
290 |
INIT_LIST_HEAD(&prv->reqs_free);
|
|
291 |
|
|
292 |
for(i=0; i< MAX_ARCHIPELAGO_REQS; i++){
|
|
293 |
INIT_LIST_HEAD(&prv->reqs[i].queue);
|
|
294 |
list_add(&prv->reqs[i].queue, &prv->reqs_free);
|
|
295 |
}
|
|
296 |
|
|
297 |
prv->reqs_free_count = MAX_ARCHIPELAGO_REQS;
|
|
298 |
|
|
299 |
prv->pipe_fds[0] = prv->pipe_fds[1] = prv->pipe_event_id = -1;
|
|
300 |
prv->timeout_event_id = -1;
|
|
301 |
|
|
302 |
/* Parse Archipelago Volume Name and XSEG mportno, vportno */
|
|
303 |
parse_uri(&prv->volname, name);
|
|
304 |
|
|
305 |
/* Inter-thread pipe setup */
|
|
306 |
retval = pipe(prv->pipe_fds);
|
|
307 |
if(retval) {
|
|
308 |
DPRINTF("tdarchipelago_open(): Failed to create inter-thread pipe (%d)\n", retval);
|
|
309 |
goto err_exit;
|
|
310 |
}
|
|
311 |
prv->pipe_event_id = tapdisk_server_register_event(
|
|
312 |
SCHEDULER_POLL_READ_FD,
|
|
313 |
prv->pipe_fds[0],
|
|
314 |
0,
|
|
315 |
tdarchipelago_pipe_read_cb,
|
|
316 |
prv);
|
|
317 |
|
|
318 |
/* Archipelago context */
|
|
319 |
if(!xseg) {
|
|
320 |
if(xseg_initialize()) {
|
|
321 |
DPRINTF("tdarchipelago_open(): Cannot initialize xseg.\n");
|
|
322 |
goto err_exit;
|
|
323 |
}
|
|
324 |
}
|
|
325 |
xseg = xseg_join((char *)"segdev", (char *)"xsegbd", (char *)"posixfd", NULL);
|
|
326 |
if(!xseg) {
|
|
327 |
DPRINTF("tdarchipelago_open(): Cannot join segment.\n");
|
|
328 |
goto err_exit;
|
|
329 |
}
|
|
330 |
|
|
331 |
port = xseg_bind_dynport(xseg);
|
|
332 |
if(!port) {
|
|
333 |
DPRINTF("tdarchipelago_open(): Failed to bind port.\n");
|
|
334 |
goto err_exit;
|
|
335 |
}
|
|
336 |
srcport = port->portno;
|
|
337 |
xseg_init_local_signal(xseg, srcport);
|
|
338 |
|
|
339 |
prv->size = get_image_info(prv->volname);
|
|
340 |
size = prv->size;
|
|
341 |
|
|
342 |
driver->info.sector_size = DEFAULT_SECTOR_SIZE;
|
|
343 |
driver->info.size = size >> SECTOR_SHIFT;
|
|
344 |
driver->info.info = 0;
|
|
345 |
|
|
346 |
/* Start XSEG Request Handler Threads */
|
|
347 |
pthread_attr_t attr;
|
|
348 |
pthread_attr_init(&attr);
|
|
349 |
pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
|
|
350 |
for(i=0; i< NUM_XSEG_THREADS; i++) {
|
|
351 |
pthread_cond_init(&archipelago_th[i].request_cond, NULL);
|
|
352 |
pthread_mutex_init(&archipelago_th[i].request_mutex, NULL);
|
|
353 |
archipelago_th[i].is_signaled = 0;
|
|
354 |
archipelago_th[i].is_running = 1;
|
|
355 |
pthread_create(&archipelago_th[i].request_th, &attr,
|
|
356 |
(void *) xseg_request_handler,
|
|
357 |
(void *)&archipelago_th[i]);
|
|
358 |
|
|
359 |
}
|
|
360 |
return 0;
|
|
361 |
|
|
362 |
err_exit:
|
|
363 |
tdarchipelago_close(driver);
|
|
364 |
return retval;
|
|
365 |
}
|
|
366 |
|
|
367 |
static int tdarchipelago_close(td_driver_t *driver)
|
|
368 |
{
|
|
369 |
struct tdarchipelago_data *prv = driver->data;
|
|
370 |
int i;
|
|
371 |
|
|
372 |
for(i=0; i<NUM_XSEG_THREADS; i++) {
|
|
373 |
if(archipelago_th[i].is_running) {
|
|
374 |
archipelago_th[i].is_running = 0;
|
|
375 |
pthread_mutex_lock(&archipelago_th[i].request_mutex);
|
|
376 |
if(!archipelago_th[i].is_signaled)
|
|
377 |
pthread_cond_wait(&archipelago_th[i].request_cond, &archipelago_th[i].request_mutex);
|
|
378 |
pthread_mutex_unlock(&archipelago_th[i].request_mutex);
|
|
379 |
pthread_cond_destroy(&archipelago_th[i].request_cond);
|
|
380 |
pthread_mutex_destroy(&archipelago_th[i].request_mutex);
|
|
381 |
}
|
|
382 |
}
|
|
383 |
xseg_leave_dynport(xseg, port);
|
|
384 |
xseg_leave(xseg);
|
|
385 |
|
|
386 |
if(prv->pipe_fds[0] >= 0) {
|
|
387 |
close(prv->pipe_fds[0]);
|
|
388 |
close(prv->pipe_fds[1]);
|
|
389 |
}
|
|
390 |
|
|
391 |
if(prv->pipe_event_id >= 0)
|
|
392 |
tapdisk_server_unregister_event(prv->pipe_event_id);
|
|
393 |
|
|
394 |
return 0;
|
|
395 |
}
|
|
396 |
|
|
397 |
static void tdarchipelago_finish_aiocb(void *arg, ssize_t c, AIORequestData *reqdata)
|
|
398 |
{
|
|
399 |
struct tdarchipelago_request *req = arg;
|
|
400 |
struct tdarchipelago_data *prv = req->treq[0].image->driver->data;
|
|
401 |
int rv;
|
|
402 |
|
|
403 |
req->result = c;
|
|
404 |
|
|
405 |
while(1) {
|
|
406 |
rv = write(prv->pipe_fds[1], (void *)&req, sizeof(req));
|
|
407 |
if(rv >= 0)
|
|
408 |
break;
|
|
409 |
if((errno != EAGAIN) && (errno != EINTR))
|
|
410 |
break;
|
|
411 |
}
|
|
412 |
free(reqdata);
|
|
413 |
if(rv <= 0)
|
|
414 |
DPRINTF("tdarchipelago_finish_aiocb(): Failed to write to completion pipe\n");
|
|
415 |
}
|
|
416 |
|
|
417 |
static void tdarchipelago_pipe_read_cb(event_id_t eb, char mode, void *data)
|
|
418 |
{
|
|
419 |
struct tdarchipelago_data *prv = data;
|
|
420 |
struct tdarchipelago_request *req;
|
|
421 |
char *p = (void *)&req;
|
|
422 |
int retval, tr, i;
|
|
423 |
|
|
424 |
for(tr=0; tr<sizeof(req);) {
|
|
425 |
retval = read(prv->pipe_fds[0], p + tr, sizeof(req) - tr);
|
|
426 |
if(retval == 0) {
|
|
427 |
DPRINTF("tdarchipelago_pipe_read_cb(): Short read on completion pipe\n");
|
|
428 |
break;
|
|
429 |
}
|
|
430 |
if(retval < 0) {
|
|
431 |
if( (errno == EAGAIN) || (errno == EINTR))
|
|
432 |
continue;
|
|
433 |
break;
|
|
434 |
}
|
|
435 |
tr += retval;
|
|
436 |
}
|
|
437 |
|
|
438 |
if(tr != sizeof(req)) {
|
|
439 |
DPRINTF("tdarchipelago_pipe_read_cb(): Read aborted on completion pipe\n");
|
|
440 |
return;
|
|
441 |
}
|
|
442 |
|
|
443 |
for(i=0; i < req->treq_count; i++)
|
|
444 |
{
|
|
445 |
int err = req->result < 0 ? -EIO : 0;
|
|
446 |
if(err < 0)
|
|
447 |
DPRINTF("tdarchipelago_pipe_read_cb(): Error in req->result: %d\n", err);
|
|
448 |
td_complete_request(req->treq[i], err);
|
|
449 |
}
|
|
450 |
|
|
451 |
list_move(&req->queue, &prv->reqs_free);
|
|
452 |
prv->reqs_free_count++;
|
|
453 |
}
|
|
454 |
|
|
455 |
static int archipelago_aio_read(char *volname, off_t offset, ssize_t size, char *buf,
|
|
456 |
struct tdarchipelago_request *tdreq)
|
|
457 |
{
|
|
458 |
AIORequestData *reqdata = malloc(sizeof(AIORequestData));
|
|
459 |
int retval;
|
|
460 |
int targetlen = strlen(volname);
|
|
461 |
struct xseg_request *req = xseg_get_request(xseg, srcport, vportno, X_ALLOC);
|
|
462 |
if(!req) {
|
|
463 |
DPRINTF("archipelago_aio_read(): Cannot get xseg request.\n");
|
|
464 |
retval = -1;
|
|
465 |
goto err_exit;
|
|
466 |
}
|
|
467 |
|
|
468 |
retval = xseg_prep_request(xseg, req, targetlen, size);
|
|
469 |
if(retval < 0) {
|
|
470 |
DPRINTF("archipelago_aio_read(): Cannot prepare xseg request.\n");
|
|
471 |
retval = -1;
|
|
472 |
goto err_exit;
|
|
473 |
}
|
|
474 |
char *target = xseg_get_target(xseg, req);
|
|
475 |
if(!target) {
|
|
476 |
DPRINTF("archipelago_aio_read(): Cannot get xseg target.\n");
|
|
477 |
retval = -1;
|
|
478 |
goto err_exit;
|
|
479 |
}
|
|
480 |
strncpy(target, volname, targetlen);
|
|
481 |
req->size = size;
|
|
482 |
req->offset = offset;
|
|
483 |
req->op = X_READ;
|
|
484 |
req->flags |= XF_FLUSH;
|
|
485 |
|
|
486 |
reqdata->volname = volname;
|
|
487 |
reqdata->offset = offset;
|
|
488 |
reqdata->size = size;
|
|
489 |
reqdata->buf = buf;
|
|
490 |
reqdata->op = TD_OP_READ;
|
|
491 |
reqdata->tdreq = tdreq;
|
|
492 |
|
|
493 |
xseg_set_req_data(xseg, req, reqdata);
|
|
494 |
xport p = xseg_submit(xseg, req, srcport, X_ALLOC);
|
|
495 |
if(p == NoPort) {
|
|
496 |
DPRINTF("archipelago_aio_read(): Could not submit xseg request.\n");
|
|
497 |
retval = -1;
|
|
498 |
goto err_exit;
|
|
499 |
}
|
|
500 |
xseg_signal(xseg, p);
|
|
501 |
return 0;
|
|
502 |
err_exit:
|
|
503 |
DPRINTF("archipelago_aio_read(): Read error: %d\n", retval);
|
|
504 |
xseg_put_request(xseg, req, srcport);
|
|
505 |
return retval;
|
|
506 |
}
|
|
507 |
|
|
508 |
static int archipelago_aio_write(char *volname, off_t offset, ssize_t size, char *buf,
|
|
509 |
struct tdarchipelago_request *tdreq)
|
|
510 |
{
|
|
511 |
char *data = NULL;
|
|
512 |
int retval;
|
|
513 |
AIORequestData *reqdata = malloc(sizeof(AIORequestData));
|
|
514 |
int targetlen = strlen(volname);
|
|
515 |
|
|
516 |
struct xseg_request *req = xseg_get_request(xseg, srcport, vportno, X_ALLOC);
|
|
517 |
if(!req) {
|
|
518 |
DPRINTF("archipelago_aio_write(): Cannot get xseg request.\n");
|
|
519 |
retval = -1;
|
|
520 |
goto err_exit;
|
|
521 |
}
|
|
522 |
retval = xseg_prep_request(xseg, req, targetlen, size);
|
|
523 |
if( retval < 0) {
|
|
524 |
DPRINTF("archipelago_aio_write(): Cannot prepare xseg request.\n");
|
|
525 |
retval = -1;
|
|
526 |
goto err_exit;
|
|
527 |
}
|
|
528 |
char *target = xseg_get_target(xseg, req);
|
|
529 |
if(!target) {
|
|
530 |
DPRINTF("archipelago_aio_write(): Cannot get xseg target.\n");
|
|
531 |
retval = -1;
|
|
532 |
goto err_exit;
|
|
533 |
}
|
|
534 |
strncpy(target, volname, targetlen);
|
|
535 |
req->size = size;
|
|
536 |
req->offset = offset;
|
|
537 |
req->op = X_WRITE;
|
|
538 |
req->flags |= XF_FLUSH;
|
|
539 |
|
|
540 |
reqdata->volname= volname;
|
|
541 |
reqdata->offset = offset;
|
|
542 |
reqdata->size = size;
|
|
543 |
reqdata->buf = buf;
|
|
544 |
reqdata->op = TD_OP_WRITE;
|
|
545 |
reqdata->tdreq = tdreq;
|
|
546 |
|
|
547 |
xseg_set_req_data(xseg, req, reqdata);
|
|
548 |
data = xseg_get_data(xseg, req);
|
|
549 |
if(!data) {
|
|
550 |
DPRINTF("archipelago_aio_write(): Cannot get xseg data.\n");
|
|
551 |
retval = -1;
|
|
552 |
goto err_exit;
|
|
553 |
}
|
|
554 |
|
|
555 |
memcpy(data, buf, size);
|
|
556 |
xport p = xseg_submit(xseg, req, srcport, X_ALLOC);
|
|
557 |
if(p == NoPort) {
|
|
558 |
DPRINTF("archipelago_aio_write(): Cannot submit xseg req.\n");
|
|
559 |
retval = -1;
|
|
560 |
goto err_exit;
|
|
561 |
}
|
|
562 |
xseg_signal(xseg, p);
|
|
563 |
return 0;
|
|
564 |
err_exit:
|
|
565 |
DPRINTF("archipelago_aio_write(): Write error: %d\n", retval);
|
|
566 |
xseg_put_request(xseg, req, srcport);
|
|
567 |
return retval;
|
|
568 |
}
|
|
569 |
|
|
570 |
static int tdarchipelago_submit_request(struct tdarchipelago_data *prv,
|
|
571 |
struct tdarchipelago_request *req)
|
|
572 |
{
|
|
573 |
int retval, i;
|
|
574 |
prv->stat.req_issued++;
|
|
575 |
list_add_tail(&req->queue, &prv->reqs_inflight);
|
|
576 |
|
|
577 |
switch(req->op) {
|
|
578 |
case TD_OP_READ:
|
|
579 |
retval = archipelago_aio_read(prv->volname, req->offset, req->size, req->buf, req);
|
|
580 |
break;
|
|
581 |
case TD_OP_WRITE:
|
|
582 |
retval = archipelago_aio_write(prv->volname, req->offset, req->size, req->buf, req);
|
|
583 |
break;
|
|
584 |
default:
|
|
585 |
retval = - EINVAL;
|
|
586 |
}
|
|
587 |
|
|
588 |
if( retval < 0) {
|
|
589 |
retval = -EIO;
|
|
590 |
goto err;
|
|
591 |
}
|
|
592 |
|
|
593 |
return 0;
|
|
594 |
|
|
595 |
err:
|
|
596 |
for(i=0; i < req->treq_count; i++)
|
|
597 |
td_complete_request(req->treq[i], retval);
|
|
598 |
return retval;
|
|
599 |
}
|
|
600 |
|
|
601 |
static void tdarchipelago_timeout_cb(event_id_t eb, char mode, void *data)
|
|
602 |
{
|
|
603 |
struct tdarchipelago_data *prv = data;
|
|
604 |
|
|
605 |
if(prv->req_deferred) {
|
|
606 |
tdarchipelago_submit_request(prv, prv->req_deferred);
|
|
607 |
prv->req_deferred = NULL;
|
|
608 |
prv->stat.req_issued_timeout++;
|
|
609 |
}
|
|
610 |
|
|
611 |
tapdisk_server_unregister_event(eb);
|
|
612 |
prv->timeout_event_id = -1;
|
|
613 |
}
|
|
614 |
|
|
615 |
static void tdarchipelago_queue_request(td_driver_t *driver, td_request_t treq)
|
|
616 |
{
|
|
617 |
struct tdarchipelago_data *prv= driver->data;
|
|
618 |
size_t size = treq.secs * driver->info.sector_size;
|
|
619 |
uint64_t offset = treq.sec * (uint64_t)driver->info.sector_size;
|
|
620 |
struct tdarchipelago_request *req;
|
|
621 |
int merged = 0;
|
|
622 |
|
|
623 |
/* Update stats */
|
|
624 |
prv->stat.req_total++;
|
|
625 |
|
|
626 |
if(prv->req_deferred) {
|
|
627 |
struct tdarchipelago_request *dr = prv->req_deferred;
|
|
628 |
|
|
629 |
if((dr->op == treq.op) &&
|
|
630 |
((dr->offset + dr->size) == offset) &&
|
|
631 |
(((unsigned long)dr->buf + dr->size)==(unsigned long)treq.buf))
|
|
632 |
{
|
|
633 |
dr->treq[dr->treq_count++] = treq;
|
|
634 |
dr->size += size;
|
|
635 |
merged = 1;
|
|
636 |
} else {
|
|
637 |
prv->stat.req_miss++;
|
|
638 |
if(dr->op != treq.op)
|
|
639 |
prv->stat.req_miss_op++;
|
|
640 |
if((dr->offset + dr->size) != offset)
|
|
641 |
prv->stat.req_miss_ofs++;
|
|
642 |
if(((unsigned long)dr->buf + dr->size) != (unsigned long)treq.buf)
|
|
643 |
prv->stat.req_miss_buf++;
|
|
644 |
}
|
|
645 |
|
|
646 |
if(!merged || (size != (11 * 4096)) || //44k request
|
|
647 |
(dr->size >= 1024 * 1024) ||
|
|
648 |
(dr->treq_count == MAX_ARCHIPELAGO_MERGED_REQS))
|
|
649 |
{
|
|
650 |
tdarchipelago_submit_request(prv, dr);
|
|
651 |
prv->req_deferred = NULL;
|
|
652 |
|
|
653 |
if(!merged)
|
|
654 |
prv->stat.req_issued_no_merge++;
|
|
655 |
else
|
|
656 |
prv->stat.req_issued_forced++;
|
|
657 |
}
|
|
658 |
}
|
|
659 |
|
|
660 |
|
|
661 |
if(!merged) {
|
|
662 |
if(prv->reqs_free_count == 0) {
|
|
663 |
td_complete_request(treq, -EBUSY);
|
|
664 |
goto no_free;
|
|
665 |
}
|
|
666 |
req = list_entry(prv->reqs_free.next, struct tdarchipelago_request, queue);
|
|
667 |
|
|
668 |
list_del(&req->queue);
|
|
669 |
prv->reqs_free_count--;
|
|
670 |
|
|
671 |
/* Fill request */
|
|
672 |
req->treq_count = 1;
|
|
673 |
req->treq[0] = treq;
|
|
674 |
|
|
675 |
req->op = treq.op;
|
|
676 |
req->offset = offset;
|
|
677 |
req->size = size;
|
|
678 |
req->buf = treq.buf;
|
|
679 |
|
|
680 |
if ((size == (11 * 4096)) && (size < 1024 * 1024)) {
|
|
681 |
prv->req_deferred = req;
|
|
682 |
} else {
|
|
683 |
tdarchipelago_submit_request(prv, req);
|
|
684 |
prv->stat.req_issued_direct++;
|
|
685 |
}
|
|
686 |
}
|
|
687 |
no_free:
|
|
688 |
if(prv->req_deferred && (prv->timeout_event_id == -1)) {
|
|
689 |
prv->timeout_event_id = tapdisk_server_register_event(
|
|
690 |
SCHEDULER_POLL_TIMEOUT,
|
|
691 |
-1,
|
|
692 |
0,
|
|
693 |
tdarchipelago_timeout_cb,
|
|
694 |
prv
|
|
695 |
);
|
|
696 |
} else if(!prv->req_deferred && (prv->timeout_event_id != -1)) {
|
|
697 |
tapdisk_server_unregister_event(prv->timeout_event_id);
|
|
698 |
prv->timeout_event_id = -1;
|
|
699 |
}
|
|
700 |
}
|
|
701 |
|
|
702 |
static int tdarchipelago_get_parent_id(td_driver_t *driver, td_disk_id_t *id)
|
|
703 |
{
|
|
704 |
return TD_NO_PARENT;
|
|
705 |
}
|
|
706 |
|
|
707 |
static int tdarchipelago_validate_parent(td_driver_t *driver, td_driver_t *parent,
|
|
708 |
td_flag_t flags)
|
|
709 |
{
|
|
710 |
return -EINVAL;
|
|
711 |
}
|
|
712 |
|
|
713 |
|
|
714 |
static void tdarchipelago_stats(td_driver_t *driver, td_stats_t *st)
|
|
715 |
{
|
|
716 |
struct tdarchipelago_data *prv = driver->data;
|
|
717 |
tapdisk_stats_field(st, "req_free_count", "d", prv->reqs_free_count);
|
|
718 |
tapdisk_stats_field(st, "req_total", "d", prv->stat.req_total);
|
|
719 |
tapdisk_stats_field(st, "req_issued", "d", prv->stat.req_issued);
|
|
720 |
tapdisk_stats_field(st, "req_issued_no_merge", "d", prv->stat.req_issued_no_merge);
|
|
721 |
tapdisk_stats_field(st, "req_issued_forced", "d", prv->stat.req_issued_forced);
|
|
722 |
tapdisk_stats_field(st, "req_issued_direct", "d", prv->stat.req_issued_direct);
|
|
723 |
tapdisk_stats_field(st, "req_issued_timeout", "d", prv->stat.req_issued_timeout);
|
|
724 |
tapdisk_stats_field(st, "req_miss", "d", prv->stat.req_miss);
|
|
725 |
tapdisk_stats_field(st, "req_miss_op", "d", prv->stat.req_miss_op);
|
|
726 |
tapdisk_stats_field(st, "req_miss_ofs", "d", prv->stat.req_miss_ofs);
|
|
727 |
tapdisk_stats_field(st, "req_miss_buf", "d", prv->stat.req_miss_buf);
|
|
728 |
tapdisk_stats_field(st, "max_merge_size", "d", 1024 * 1024);
|
|
729 |
}
|
|
730 |
|
|
731 |
struct tap_disk tapdisk_archipelago = {
|
|
732 |
.disk_type = "tapdisk_archipelago",
|
|
733 |
.private_data_size = sizeof(struct tdarchipelago_data),
|
|
734 |
.flags = 0,
|
|
735 |
.td_open = tdarchipelago_open,
|
|
736 |
.td_close= tdarchipelago_close,
|
|
737 |
.td_queue_read = tdarchipelago_queue_request,
|
|
738 |
.td_queue_write = tdarchipelago_queue_request,
|
|
739 |
.td_get_parent_id = tdarchipelago_get_parent_id,
|
|
740 |
.td_validate_parent = tdarchipelago_validate_parent,
|
|
741 |
.td_debug = NULL,
|
|
742 |
.td_stats = tdarchipelago_stats,
|
|
743 |
};
|