+
+static ssize_t persisting_read(int fd, void *data, size_t size, off_t offset)
+{
+ ssize_t r = 0, sum = 0;
+ char error_str[1024];
+ XSEGLOG2(&lc, D, "fd: %d, size: %d, offset: %d", fd, size, offset);
+
+ while (sum < size) {
+ XSEGLOG2(&lc, D, "read: %llu, (aligned)size: %llu", sum, size);
+ r = pread(fd, (char *)data + sum, size - sum, offset + sum);
+ if (r < 0) {
+ XSEGLOG2(&lc, E, "fd: %d, Error: %s", fd, strerror_r(errno, error_str, 1023));
+ break;
+ } else if (r == 0) {
+ break;
+ } else {
+ sum += r;
+ }
+ }
+ XSEGLOG2(&lc, D, "read: %llu, (aligned)size: %llu", sum, size);
+
+ if (sum == 0 && r < 0) {
+ sum = r;
+ }
+ XSEGLOG2(&lc, D, "Finished. Read %d, r = %d", sum, r);
+
+ return sum;
+}
+
+static ssize_t persisting_write(int fd, void *data, size_t size, off_t offset)
+{
+ ssize_t r = 0, sum = 0;
+
+ XSEGLOG2(&lc, D, "fd: %d, size: %d, offset: %d", fd, size, offset);
+ while (sum < size) {
+ XSEGLOG2(&lc, D, "written: %llu, (aligned)size: %llu", sum, size);
+ r = pwrite(fd, (char *)data + sum, size - sum, offset + sum);
+ if (r < 0) {
+ break;
+ } else {
+ sum += r;
+ }
+ }
+ XSEGLOG2(&lc, D, "written: %llu, (aligned)size: %llu", sum, size);
+
+ if (sum == 0 && r < 0) {
+ sum = r;
+ }
+ XSEGLOG2(&lc, D, "Finished. Wrote %d, r = %d", sum, r);
+
+ return sum;
+}
+
+static ssize_t aligned_read(int fd, void *data, ssize_t size, off_t offset, int alignment)
+{
+ char *tmp_data;
+ ssize_t r;
+ size_t misaligned_data, misaligned_size, misaligned_offset;
+ off_t aligned_offset=offset;
+ size_t aligned_size=size;
+
+ misaligned_data = (unsigned long)data % alignment;
+ misaligned_size = size % alignment;
+ misaligned_offset = offset % alignment;
+ XSEGLOG2(&lc, D, "misaligned_data: %u, misaligned_size: %u, misaligned_offset: %u", misaligned_data, misaligned_size, misaligned_offset);
+ if (misaligned_data || misaligned_size || misaligned_offset) {
+ aligned_offset = offset - misaligned_offset;
+ aligned_size = size + misaligned_offset;
+
+ misaligned_size = aligned_size % alignment;
+ aligned_size = aligned_size - misaligned_size + alignment;
+ r = posix_memalign(&tmp_data, alignment, aligned_size);
+ if (r < 0) {
+ return -1;
+ }
+ } else {
+ tmp_data = data;
+ aligned_offset = offset;
+ aligned_size = size;
+ }
+
+ XSEGLOG2(&lc, D, "aligned_data: %u, aligned_size: %u, aligned_offset: %u", tmp_data, aligned_size, aligned_offset);
+ r = persisting_read(fd, tmp_data, aligned_size, aligned_offset);
+
+ //FIXME if r < size ?
+ if (tmp_data != data) {
+ memcpy(data, tmp_data + misaligned_offset, size);
+ free(tmp_data);
+ }
+ if (r >= size)
+ r = size;
+ return r;
+}
+
+pthread_mutex_t m = PTHREAD_MUTEX_INITIALIZER;
+
+int __fcntl_lock(int fd, off_t start, off_t len)
+{
+ return pthread_mutex_lock(&m);
+}
+
+int __fcntl_unlock(int fd, off_t start, off_t len)
+{
+ return pthread_mutex_unlock(&m);
+}
+
+static ssize_t aligned_write(int fd, void *data, size_t size, off_t offset, int alignment)
+{
+ int locked = 0;
+ char *tmp_data;
+ ssize_t r;
+ size_t misaligned_data, misaligned_size, misaligned_offset;
+ size_t aligned_size = size, aligned_offset = offset, read_size;
+ misaligned_data = (unsigned long)data % alignment;
+ misaligned_size = size % alignment;
+ misaligned_offset = offset % alignment;
+ if (misaligned_data || misaligned_size || misaligned_offset) {
+ //if somthing is misaligned then:
+ //
+ // First check if the offset was missaligned.
+ aligned_offset = offset - misaligned_offset;
+
+ // Then adjust the size with the misaligned offset and check if
+ // it remains misaligned.
+ aligned_size = size + misaligned_offset;
+ misaligned_size = aligned_size % alignment;
+
+ // in case there is no misaligned_size
+ if (misaligned_size)
+ aligned_size = aligned_size + alignment - misaligned_size;
+
+ // Allocate aligned memory
+ r = posix_memalign(&tmp_data, alignment, aligned_size);
+ if (r < 0) {
+ return -1;
+ }
+
+ XSEGLOG2(&lc, D, "fd: %d, misaligned_data: %u, misaligned_size: %u, misaligned_offset: %u", fd, misaligned_data, misaligned_size, misaligned_offset);
+ XSEGLOG2(&lc, D, "fd: %d, aligned_data: %u, aligned_size: %u, aligned_offset: %u", fd, tmp_data, aligned_size, aligned_offset);
+ XSEGLOG2(&lc, D, "fd: %d, locking from %u to %u", fd, aligned_offset, aligned_offset + aligned_size);
+ __fcntl_lock(fd, aligned_offset, aligned_size + alignment - misaligned_size);
+ locked = 1;
+
+ if (misaligned_offset) {
+ XSEGLOG2(&lc, D, "fd: %d, size: %d, offset: %d", fd, size, offset);
+ /* read misaligned_offset */
+ read_size = alignment;
+ r = persisting_read(fd, tmp_data, alignment, aligned_offset);
+ if (r < 0) {
+ free(tmp_data);
+ return -1;
+ } else if (r != read_size) {
+ memset(tmp_data + r, 0, read_size - r);
+ }
+ }
+
+ if (misaligned_size) {
+ read_size = alignment;
+ r = persisting_read(fd, tmp_data + aligned_size - alignment, alignment,
+ aligned_offset + aligned_size - alignment);
+ if (r < 0) {
+ free(tmp_data);
+ return -1;
+ } else if (r != read_size) {
+ memset(tmp_data + aligned_size - alignment + r, 0, read_size - r);
+ }
+ }
+ memcpy(tmp_data + misaligned_offset, data, size);
+ } else {
+ aligned_size = size;
+ aligned_offset = offset;
+ tmp_data = data;
+ }
+
+ r = persisting_write(fd, tmp_data, aligned_size, aligned_offset);
+
+ if (locked) {
+ XSEGLOG2(&lc, D, "fd: %d, unlocking from %u to %u", fd, aligned_offset, aligned_offset + aligned_size);
+ __fcntl_unlock(fd, aligned_offset, aligned_size + alignment - misaligned_size);
+ }
+ if (tmp_data != data) {
+ free(tmp_data);
+ }
+
+ if (r >= size)
+ r = size;
+ return r;
+}
+
+static ssize_t filed_write(int fd, void *data, size_t size, off_t offset, int direct)
+{
+ if (direct)
+ return aligned_write(fd, data, size, offset, 512);
+ else
+ return persisting_write(fd, data, size, offset);
+}
+
+static ssize_t filed_read(int fd, void *data, size_t size, off_t offset, int direct)
+{
+ if (direct)
+ return aligned_read(fd, data, size, offset, 512);
+ else
+ return persisting_read(fd, data, size, offset);
+}
+
+static ssize_t pfiled_read(struct pfiled *pfiled, int fd, void *data, size_t size, off_t offset)
+{
+ return filed_read(fd, data, size, offset, pfiled->directio);
+}
+
+static ssize_t pfiled_write(struct pfiled *pfiled, int fd, void *data, size_t size, off_t offset)
+{
+ return filed_write(fd, data, size, offset, pfiled->directio);
+}
+
+static ssize_t generic_io_path(char *path, void *data, size_t size, off_t offset, int write, int flags, mode_t mode)
+{
+ int fd;
+ ssize_t r;
+
+ fd = open(path, flags, mode);
+ if (fd < 0) {
+ return -1;
+ }
+ XSEGLOG2(&lc, D, "Opened file %s as fd %d", path, fd);
+
+ if (write) {
+ r = filed_write(fd, data, size, offset, flags & O_DIRECT);
+ } else {
+ r = filed_read(fd, data, size, offset, flags & O_DIRECT);
+ }
+
+ close(fd);
+
+ return r;
+}
+
+static ssize_t read_path(char *path, void *data, size_t size, off_t offset, int direct)
+{
+ int flags = O_RDONLY;
+ if (direct)
+ flags |= O_DIRECT;
+
+ return generic_io_path(path, data, size, offset, 0, flags, 0);
+}
+
+static ssize_t pfiled_read_name(struct pfiled *pfiled, char *name, uint32_t namelen, void *data, size_t size, off_t offset)
+{
+ char path[XSEG_MAX_TARGETLEN + MAX_PATH_SIZE + 1];
+ int r;
+ r = create_path(path, pfiled, name, namelen, 1);
+ if (r < 0) {
+ XSEGLOG2(&lc, E, "Could not create path");
+ return -1;
+ }
+ return read_path(path, data, size, offset, pfiled->directio);
+}
+
+static ssize_t write_path(char *path, void *data, size_t size, off_t offset, int direct, int extra_open_flags, mode_t mode)
+{
+ int flags = O_RDWR | extra_open_flags;
+ if (direct)
+ flags |= O_DIRECT;
+ return generic_io_path(path, data, size, offset, 1, flags, mode);
+}
+
+static ssize_t pfiled_write_name(struct pfiled *pfiled, char *name, uint32_t namelen, void *data, size_t size, off_t offset, int extra_open_flags, mode_t mode)
+{
+ char path[XSEG_MAX_TARGETLEN + MAX_PATH_SIZE + 1];
+ int r;
+ r = create_path(path, pfiled, name, namelen, 1);
+ if (r < 0) {
+ XSEGLOG2(&lc, E, "Could not create path");
+ return -1;
+ }
+ return write_path(path, data, size, offset, pfiled->directio, extra_open_flags, mode);
+}
+