2 * Copyright 2013 GRNET S.A. All rights reserved.
4 * Redistribution and use in source and binary forms, with or
5 * without modification, are permitted provided that the following
8 * 1. Redistributions of source code must retain the above
9 * copyright notice, this list of conditions and the following
11 * 2. Redistributions in binary form must reproduce the above
12 * copyright notice, this list of conditions and the following
13 * disclaimer in the documentation and/or other materials
14 * provided with the distribution.
16 * THIS SOFTWARE IS PROVIDED BY GRNET S.A. ``AS IS'' AND ANY EXPRESS
17 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GRNET S.A OR
20 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
23 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
24 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
26 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 * POSSIBILITY OF SUCH DAMAGE.
29 * The views and conclusions contained in the software and
30 * documentation are those of the authors and should not be
31 * interpreted as representing official policies, either expressed
32 * or implied, of GRNET S.A.
40 #include <xseg/xseg.h>
43 #include <xseg/protocol.h>
45 /* general mapper flags */
46 #define MF_LOAD (1 << 0)
47 #define MF_EXCLUSIVE (1 << 1)
48 #define MF_FORCE (1 << 2)
49 #define MF_ARCHIP (1 << 3)
51 #define MAPPER_DEFAULT_BLOCKSIZE (1<<22)
53 #define MAPPER_PREFIX "archip_"
54 #define MAPPER_PREFIX_LEN 7
56 /* These values come straight from the size of map_node->objectidx and
59 #define HEXLIFIED_EPOCH (sizeof(uint64_t) << 1)
60 #define HEXLIFIED_INDEX (sizeof(uint64_t) << 1)
62 /* should always be the maximum objectlen of all versions */
63 #define MAX_OBJECT_LEN 128
65 /* since object names are cacluclated from the volume names, the limit of the
66 * maximum volume len is calculated from the maximum object len, statically for
69 * How the object name is calculated is reflected in this formula:
73 #define MAX_VOLUME_LEN (MAX_OBJECT_LEN - HEXLIFIED_INDEX - HEXLIFIED_EPOCH - 2)
75 /* Compile time limits */
76 #if MAX_OBJECT_LEN > XSEG_MAX_TARGETLEN
77 #error "XSEG_MAX_TARGETLEN should be at least MAX_OBJECT_LEN"
81 #if MAX_VOLUME_LEN > XSEG_MAX_TARGETLEN
82 #error "XSEG_MAX_TARGETLEN should be at least MAX_VOLUME_LEN"
87 extern char *zero_block;
88 #define ZERO_BLOCK_LEN (64) /* strlen(zero_block) */
90 /* callback function type */
91 typedef void (*cb_t)(struct peer_req *pr, struct xseg_request *req);
94 /* map object flags */
95 #define MF_OBJECT_WRITABLE (1 << 0)
96 #define MF_OBJECT_ARCHIP (1 << 1)
97 #define MF_OBJECT_ZERO (1 << 2)
99 /* run time map object state flags */
100 #define MF_OBJECT_COPYING (1 << 0)
101 #define MF_OBJECT_WRITING (1 << 1)
102 #define MF_OBJECT_DELETING (1 << 2)
103 //#define MF_OBJECT_DESTROYED (1 << 3)
104 #define MF_OBJECT_SNAPSHOTTING (1 << 4)
106 #define MF_OBJECT_NOT_READY (MF_OBJECT_COPYING|MF_OBJECT_WRITING|\
107 MF_OBJECT_DELETING|MF_OBJECT_SNAPSHOTTING)
110 volatile uint32_t state;
111 uint64_t objectidx; /* FIXME this is probably not needed */
113 char object[MAX_OBJECT_LEN + 1]; /* NULL terminated string */
115 volatile uint32_t ref;
116 volatile uint32_t waiters;
122 #define MF_MAP_READONLY (1 << 0)
123 #define MF_MAP_DELETED (1 << 1)
125 /* run time map state flags */
126 #define MF_MAP_LOADING (1 << 0)
127 #define MF_MAP_DESTROYED (1 << 1)
128 #define MF_MAP_WRITING (1 << 2)
129 #define MF_MAP_DELETING (1 << 3)
130 #define MF_MAP_DROPPING_CACHE (1 << 4)
131 #define MF_MAP_EXCLUSIVE (1 << 5)
132 #define MF_MAP_OPENING (1 << 6)
133 #define MF_MAP_CLOSING (1 << 7)
134 //#define MF_MAP_DELETED (1 << 8)
135 #define MF_MAP_SNAPSHOTTING (1 << 9)
136 #define MF_MAP_SERIALIZING (1 << 10)
137 #define MF_MAP_HASHING (1 << 11)
138 #define MF_MAP_NOT_READY (MF_MAP_LOADING|MF_MAP_WRITING|MF_MAP_DELETING|\
139 MF_MAP_DROPPING_CACHE|MF_MAP_OPENING| \
140 MF_MAP_SNAPSHOTTING|MF_MAP_SERIALIZING| \
147 volatile uint32_t state;
152 char volume[MAX_VOLUME_LEN + 1]; /* NULL terminated string */
153 struct map_node *objects;
154 volatile uint32_t ref;
155 volatile uint32_t waiters;
157 uint64_t opened_count;
159 volatile uint32_t users;
160 volatile uint32_t waiters_users;
161 st_cond_t users_cond;
165 xport bportno; /* blocker that accesses data */
166 xport mbportno; /* blocker that accesses maps */
167 xhash_t *hashmaps; // hash_function(target) --> struct map
171 xhash_t *copyups_nodes; /* hash map (xseg_request) --> (corresponding map_node of copied up object)*/
172 volatile int err; /* error flag */
176 volatile uint64_t pending_reqs;
180 /* usefull abstraction macros for context switching */
182 #define wait_on_pr(__pr, __condition__) \
185 __get_mapper_io(pr)->active = 0;\
186 XSEGLOG2(&lc, D, "Waiting on pr %lx, ta: %u", pr, ta); \
187 st_cond_wait(__pr->cond); \
188 } while (__condition__)
190 #define wait_on_mapnode(__mn, __condition__) \
194 XSEGLOG2(&lc, D, "Waiting on map node %lx %s, waiters: %u, \
195 ta: %u", __mn, __mn->object, __mn->waiters, ta); \
196 st_cond_wait(__mn->cond); \
197 } while (__condition__)
199 #define wait_on_map(__map, __condition__) \
203 XSEGLOG2(&lc, D, "Waiting on map %lx %s, waiters: %u, ta: %u",\
204 __map, __map->volume, __map->waiters, ta); \
205 st_cond_wait(__map->cond); \
206 } while (__condition__)
208 #define wait_all_objects_ready(__map) \
211 __map->waiters_users++; \
212 XSEGLOG2(&lc, D, "Waiting for objects ready on map %lx %s, waiters: %u, ta: %u",\
213 __map, __map->volume, __map->waiters_users, ta); \
214 st_cond_wait(__map->users_cond); \
215 } while (__map->users)
217 #define signal_pr(__pr) \
219 if (!__get_mapper_io(pr)->active){\
221 XSEGLOG2(&lc, D, "Signaling pr %lx, ta: %u", pr, ta);\
222 __get_mapper_io(pr)->active = 1;\
223 st_cond_signal(__pr->cond); \
227 #define signal_map(__map) \
229 XSEGLOG2(&lc, D, "Checking map %lx %s. Waiters %u, ta: %u", \
230 __map, __map->volume, __map->waiters, ta); \
231 if (__map->waiters) { \
232 ta += __map->waiters; \
233 XSEGLOG2(&lc, D, "Signaling map %lx %s, waiters: %u, \
234 ta: %u", __map, __map->volume, __map->waiters, ta); \
235 __map->waiters = 0; \
236 st_cond_broadcast(__map->cond); \
240 #define signal_all_objects_ready(__map) \
242 /* assert __map->users == 0 */ \
243 if (__map->waiters_users) { \
244 ta += __map->waiters_users; \
245 XSEGLOG2(&lc, D, "Signaling objects ready for map %lx %s, waiters: %u, \
246 ta: %u", __map, __map->volume, __map->waiters_users, ta); \
247 __map->waiters_users = 0; \
248 st_cond_broadcast(__map->users_cond); \
252 #define signal_mapnode(__mn) \
254 if (__mn->waiters) { \
255 ta += __mn->waiters; \
256 XSEGLOG2(&lc, D, "Signaling map node %lx %s, waiters: \
257 %u, ta: %u", __mn, __mn->object, __mn->waiters, ta); \
259 st_cond_broadcast(__mn->cond); \
264 /* Helper functions */
265 static inline struct mapperd * __get_mapperd(struct peerd *peer)
267 return (struct mapperd *) peer->priv;
270 static inline struct mapper_io * __get_mapper_io(struct peer_req *pr)
272 return (struct mapper_io *) pr->priv;
275 static inline uint64_t calc_map_obj(struct map *map)
279 nr_objs = map->size / map->blocksize;
280 if (map->size % map->blocksize)
286 /* map handling functions */
287 struct xseg_request * __open_map(struct peer_req *pr, struct map *m,
289 int open_map(struct peer_req *pr, struct map *map, uint32_t flags);
290 struct xseg_request * __close_map(struct peer_req *pr, struct map *map);
291 int close_map(struct peer_req *pr, struct map *map);
292 struct xseg_request * __write_map(struct peer_req* pr, struct map *map);
293 int write_map(struct peer_req* pr, struct map *map);
294 int write_map_metadata(struct peer_req* pr, struct map *map);
295 struct xseg_request * __load_map(struct peer_req *pr, struct map *m);
296 int read_map(struct map *map, unsigned char *buf);
297 int load_map(struct peer_req *pr, struct map *map);
298 struct xseg_request * __copyup_object(struct peer_req *pr, struct map_node *mn);
299 void copyup_cb(struct peer_req *pr, struct xseg_request *req);
300 struct xseg_request * __object_write(struct peerd *peer, struct peer_req *pr,
301 struct map *map, struct map_node *mn);
302 int __set_node(struct mapper_io *mio, struct xseg_request *req,
303 struct map_node *mn);
304 struct map_node * __get_node(struct mapper_io *mio, struct xseg_request *req);
305 int send_request(struct peer_req *pr, struct xseg_request *req);
306 struct xseg_request * get_request(struct peer_req *pr, xport dst, char * target,
307 uint32_t targetlen, uint64_t datalen);
308 void put_request(struct peer_req *pr, struct xseg_request *req);
309 struct xseg_request * __load_map_metadata(struct peer_req *pr, struct map *map);
310 int load_map_metadata(struct peer_req *pr, struct map *map);
311 int initialize_map_objects(struct map *map);
312 int hash_map(struct peer_req *pr, struct map *map, struct map *hashed_map);
313 struct map_node * get_mapnode(struct map *map, uint64_t objindex);
314 void put_mapnode(struct map_node *mn);
315 #endif /* end MAPPER_H */