Statistics
| Branch: | Revision:

root / synthbench / skampi / measurements / localio.c

History | View | Annotate | Download (26.2 kB)

1
/********************************************************************************
2
 * SKaMPI  MPI-Benchmark 
3
 *
4
 * Copyright (C) 2007-2008  Joachim Mathes
5
 * Lehrstuhl Informatik fuer Ingenieure und Naturwissenschaftler
6
 * Institut fuer Algorithmen und Kognitive Systeme
7
 * Fakultaet fuer Informatik
8
 * Universitaet Karlsruhe (TH)
9
 * 
10
 * This program is free software; you can redistribute it and/or
11
 * modify it under the terms of the GNU General Public License
12
 * as published by the Free Software Foundation; either version 2
13
 * of the License, or (at your option) any later version.
14
 * 
15
 * This program is distributed in the hope that it will be useful,
16
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18
 * GNU General Public License for more details.
19
 * 
20
 * You should have received a copy of the GNU General Public License along
21
 * with this program; if not, write to the Free Software Foundation, Inc.,
22
 * 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
23
 *******************************************************************************/
24

    
25
/**
26
 * \file localio.c
27
 * 
28
 * \brief SKaMPI measurement functions for rudimentary local IO
29
 *
30
 * This file provides rudimentary IO measurement functions for SkaMPI. 
31
 *
32
 * \author Joachim Mathes
33
 * \date 2007-2008
34
 */
35

    
36
#include <mpi.h>
37
#include "../mpiversiontest.h"
38

    
39
#ifdef USE_MPI_IO
40

    
41
/**
42
 * The <tt>_GNU_SOURCE</tt> macro must be set to provide the <tt>O_DIRECT</tt>
43
 * parameter for open() routine.
44
 */
45
#define _GNU_SOURCE
46

    
47
#include <stdio.h>
48
#include <stdlib.h>
49
#include <string.h>
50
#include <sys/types.h>
51
#include <sys/stat.h>
52
#include <fcntl.h>
53
#include <unistd.h>
54
#include <assert.h>
55
#include <errno.h>
56
#include <sys/mman.h>
57

    
58
#include "../misc.h"
59
#include "../synchronize.h"
60
#include "../output.h"
61
#include "../mem.h"
62

    
63
#define IO_FILENAME   "skampi_io"
64
#define POSIX_API     "posix"
65
#define MPI_API       "mpi"
66
#define BUFFER_SIZE   4194304   /**< 4 MB */
67
#define MAXIMUM_POWER 62        /**< limited by MPI_Offset size (number of bits) */
68

    
69
#pragma weak begin_skampi_extensions
70

    
71
/**
72
 * \brief Returns a unique filename in a dedicated path
73
 *
74
 * This function is defined in <tt>&lt;SKaMPI path&gt;/measurements/io.c</tt>.
75
 */
76
extern char *get_io_filename (char *, int);
77

    
78
/********************************************************************************
79
 * Global variables
80
 *******************************************************************************/
81

    
82
static char*    io_filename;
83
static MPI_File io_fh;
84
static int      io_fd;
85

    
86
/********************************************************************************
87
 * Measure functions
88
 *******************************************************************************/
89

    
90
/*@{*/
91
/** \name Measurement-Functions
92
 * The following measurement function blocks (<tt>init_</tt>, <tt>measure_</tt>,
93
 * <tt>finalize_</tt>) are meant to work on <em>local</em> disks. Nevertheless
94
 * it depends on the user defined environment variable <tt>TMP</tt>, if a
95
 * distributed RFA-file will be stored on local or global (parallel) disks.
96
 */
97

    
98
/**
99
 * \brief Initialization function of measure function
100
 *        measure_MPI_IO_read_file_once().
101
 *
102
 * Only one process is active. It reads once from a file.
103
 *
104
 * Remark:<br>
105
 * With the <tt>O_DIRECT</tt> flag set, cache effects are minimized, because I/O
106
 * is done directly to/from user space buffers. The operation system's page
107
 * cache is bypassed. Under Linux 2.6 alignment to 512-byte boundaries is
108
 * required for buffer and file offset. Thus the following parameters should be
109
 * set in a SKaMPI input file:
110
 * - <tt>set_send_buffert_alignment (512)</tt>
111
 * - <tt>set_recv_buffert_alignment (512)</tt>
112
 * - <tt>switch_buffer_cycling_off ()</tt><br>
113
 *
114
 * <tt>O_DIRECT</tt> is only relevant if the POSIX-API is used for I/O.
115
 * 
116
 * For more information please refer to the <tt>open ()</tt> man pages.
117
 * 
118
 * \param[in] size          size of memory buffer, i.e. number of <tt>MPI_BYTE</tt>s
119
 * \param[in] api           POSIX-API or MPI-API for I/O accesses
120
 * \param[in] directio_flag open file with <tt>O_DIRECT</tt> flag to minimize
121
 *                          cache effects
122
 *
123
 * \return    void
124
 */
125
void init_MPI_IO_read_file_once (int size, char *api, int directio_flag) {
126
  char *send_buffer;
127

    
128
  assert (size > 0);
129

    
130
  io_filename = get_io_filename (IO_FILENAME, 0);
131

    
132
  if (get_measurement_rank () == 0){
133

    
134
    send_buffer = mpi_malloc_chars (get_extent (size, MPI_BYTE));
135

    
136
    MPI_File_open (MPI_COMM_SELF, io_filename,
137
                   MPI_MODE_WRONLY | MPI_MODE_CREATE | MPI_MODE_UNIQUE_OPEN,
138
                   MPI_INFO_NULL, &io_fh);
139
    MPI_File_set_view (io_fh, (MPI_Offset)0, 
140
                       MPI_BYTE, MPI_BYTE,
141
                       "native", MPI_INFO_NULL);
142
    MPI_File_write (io_fh, send_buffer, size, MPI_BYTE, MPI_STATUS_IGNORE);
143
    MPI_File_close (&io_fh);
144
    mpi_free (send_buffer);
145

    
146
    set_recv_buffer_usage (size);
147
    set_reported_message_size (size);
148
  }
149

    
150
  MPI_Barrier (get_measurement_comm ());
151

    
152
  /* set synchronization type:
153
   SYNC_BARRIER if all SKaMPI processes run on one physical processor 
154
   SYNC_REAL if every SKaMPI process runs on its own physical processor */
155
  set_synchronization (SYNC_REAL);
156

    
157
  init_synchronization ();
158
}
159

    
160
/**
161
 * \brief Measures the time to read once from a file.
162
 *
163
 * Only one process is active. It reads once from a file.
164
 *
165
 * Remark:<br>
166
 * With the <tt>O_DIRECT</tt> flag set, cache effects are minimized, because I/O
167
 * is done directly to/from user space buffers. The operation system's page
168
 * cache is bypassed. Under Linux 2.6 alignment to 512-byte boundaries is
169
 * required for buffer and file offset. Thus the following parameters should be
170
 * set in a SKaMPI input file:
171
 * - <tt>set_send_buffert_alignment (512)</tt>
172
 * - <tt>set_recv_buffert_alignment (512)</tt>
173
 * - <tt>switch_buffer_cycling_off ()</tt><br>
174
 *
175
 * <tt>O_DIRECT</tt> is only relevant if the POSIX-API is used for I/O.
176
 * 
177
 * For more information please refer to the <tt>open ()</tt> man pages.
178
 *
179
 * \param[in] size  size of memory buffer, i.e. number of <tt>MPI_BYTE</tt>s
180
 * \param[in] api   POSIX-API or MPI-API for I/O accesses
181
 * \param[in] directio_flag open file with <tt>O_DIRECT</tt> flag to minimize
182
 *                          cache effects
183
 *
184
 * \return    measured time 
185
 */
186
double measure_MPI_IO_read_file_once (int size, char *api, int directio_flag){
187
  double     start_time = 1.0, end_time = 0.0;
188
  int        open_flags;
189
  char       *error_string;
190
  
191
  if (get_measurement_rank () == 0){
192
    if (strcmp (api, POSIX_API) == 0){ 
193

    
194
      if (directio_flag != 0)
195
        open_flags = O_RDONLY | O_DIRECT;
196
      else
197
        open_flags = O_RDONLY;
198

    
199
      printf ("flags %d,%d\n", open_flags, O_DIRECT);
200

    
201
      errno = 0;
202
      if ((io_fd = open (io_filename, open_flags)) < 0){
203
        error_string = strerror (errno);
204
        error_with_abort (errno,
205
                          "\nmeasure_MPI_IO_read_file_once (int %d, char * %s, int %d) failed."
206
                          "\nCannot open local file (read only mode)."
207
                          "\nError: %s\n",
208
                          size, api, directio_flag, error_string);
209
      }
210
    
211
      start_time = start_synchronization ();
212
      read (io_fd, get_recv_buffer (), size);
213
      end_time = MPI_Wtime ();
214

    
215
      close (io_fd);
216

    
217
    }
218
    else{
219
      MPI_File_open (MPI_COMM_SELF, io_filename, MPI_MODE_RDONLY, MPI_INFO_NULL, &io_fh);
220
      MPI_File_set_view (io_fh, (MPI_Offset)0, 
221
                         MPI_BYTE, MPI_BYTE,
222
                         "native", MPI_INFO_NULL);
223

    
224
      start_time = start_synchronization ();
225
      MPI_File_read (io_fh, get_recv_buffer (), size, MPI_BYTE, MPI_STATUS_IGNORE);
226
      end_time = stop_synchronization ();
227

    
228
      MPI_File_close (&io_fh);
229

    
230
    }
231
  }
232
  else if (get_measurement_rank () != 0) {
233
    start_synchronization ();
234
  }
235
  stop_synchronization ();
236

    
237
  if (get_measurement_rank () == 0)
238
    return end_time - start_time;
239
  else
240
    return -1.0;
241

    
242
}
243

    
244
/**
245
 * \brief Finalization function of measure function
246
 *        measure_MPI_IO_read_file_once().
247
 *
248
 * Only one process is active. It reads once from a file.
249
 *
250
 * Remark:<br>
251
 * With the <tt>O_DIRECT</tt> flag set, cache effects are minimized, because I/O
252
 * is done directly to/from user space buffers. The operation system's page
253
 * cache is bypassed. Under Linux 2.6 alignment to 512-byte boundaries is
254
 * required for buffer and file offset. Thus the following parameters should be
255
 * set in a SKaMPI input file:
256
 * - <tt>set_send_buffert_alignment (512)</tt>
257
 * - <tt>set_recv_buffert_alignment (512)</tt>
258
 * - <tt>switch_buffer_cycling_off ()</tt><br>
259
 *
260
 * <tt>O_DIRECT</tt> is only relevant if the POSIX-API is used for I/O.
261
 * 
262
 * For more information please refer to the <tt>open ()</tt> man pages.
263
 *        
264
 * \param[in] size  size of memory buffer, i.e. number of <tt>MPI_BYTE</tt>s
265
 * \param[in] api   POSIX-API or MPI-API for I/O accesses
266
 * \param[in] directio_flag open file with <tt>O_DIRECT</tt> flag to minimize
267
 *                          cache effects
268
 *
269
 * \return    void
270
 */
271
void finalize_MPI_IO_read_file_once (int size, char *api, int directio_flag){
272
  MPI_File_delete (io_filename, MPI_INFO_NULL);
273
  mpi_free (io_filename);
274
}
275

    
276
/**
277
 * \brief Initialization function of measure function
278
 *        measure_MPI_IO_write_file_once().
279
 *
280
 * Only one process is active. It writes once to a file.
281
 *
282
 * Remark:<br>
283
 * With the <tt>O_DIRECT</tt> flag set, cache effects are minimized, because I/O
284
 * is done directly to/from user space buffers. The operation system's page
285
 * cache is bypassed. Under Linux 2.6 alignment to 512-byte boundaries is
286
 * required for buffer and file offset. Thus the following parameters should be
287
 * set in a SKaMPI input file:
288
 * - <tt>set_send_buffert_alignment (512)</tt>
289
 * - <tt>set_recv_buffert_alignment (512)</tt>
290
 * - <tt>switch_buffer_cycling_off ()</tt><br>
291
 * 
292
 * For more information please refer to the <tt>open ()</tt> man pages.
293
 * 
294
 * \param[in] size        size of memory buffer, i.e. number of <tt>MPI_BYTE</tt>s
295
 * \param[in] api         POSIX-API or MPI-API for I/O accesses
296
 * \param[in] create_flag write into existing file (FALSE) or create it (TRUE)
297
 * \param[in] directio_flag open file with <tt>O_DIRECT</tt> flag to minimize
298
 *                          cache effects
299
 *
300
 * \return    void
301
 */
302
void init_MPI_IO_write_file_once (int size, char *api, int create_flag, int directio_flag) {
303
  char *send_buffer;
304

    
305
  assert (size > 0);
306

    
307
  io_filename = get_io_filename (IO_FILENAME, 0);
308

    
309
  if (get_measurement_rank () == 0){
310

    
311
    if (create_flag == 0){
312
      send_buffer = mpi_malloc_chars (get_extent (size, MPI_BYTE));
313
    
314
      MPI_File_open (MPI_COMM_SELF, io_filename,
315
                     MPI_MODE_WRONLY | MPI_MODE_CREATE | MPI_MODE_UNIQUE_OPEN,
316
                     MPI_INFO_NULL, &io_fh);
317
      MPI_File_set_view (io_fh, (MPI_Offset)0, 
318
                         MPI_BYTE, MPI_BYTE,
319
                         "native", MPI_INFO_NULL);
320
      MPI_File_write (io_fh, send_buffer, size, MPI_BYTE, MPI_STATUS_IGNORE);
321
      MPI_File_close (&io_fh);
322
      mpi_free (send_buffer);
323
    }
324

    
325
    set_send_buffer_usage (size);
326
    set_reported_message_size (size);
327
  }
328

    
329
  MPI_Barrier (get_measurement_comm ());
330

    
331
  /* set synchronization type:
332
   SYNC_BARRIER if all SKaMPI processes run on one physical processor 
333
   SYNC_REAL if every SKaMPI process runs on its own physical processor */
334
  set_synchronization (SYNC_REAL);
335

    
336
  init_synchronization ();
337
}
338

    
339
/**
340
 * \brief Measures the time to write once to a file.
341
 *
342
 * Only one process is active. It writes once to a file.
343
 *
344
 * Remark:<br>
345
 * With the <tt>O_DIRECT</tt> flag set, cache effects are minimized, because I/O
346
 * is done directly to/from user space buffers. The operation system's page
347
 * cache is bypassed. Under Linux 2.6 alignment to 512-byte boundaries is
348
 * required for buffer and file offset. Thus the following parameters should be
349
 * set in a SKaMPI input file:
350
 * - <tt>set_send_buffert_alignment (512)</tt>
351
 * - <tt>set_recv_buffert_alignment (512)</tt>
352
 * - <tt>switch_buffer_cycling_off ()</tt><br>
353
 *
354
 * <tt>O_DIRECT</tt> is only relevant if the POSIX-API is used for I/O.
355
 * 
356
 * For more information please refer to the <tt>open ()</tt> man pages.
357
 *
358
 * \param[in] size        size of memory buffer, i.e. number of <tt>MPI_BYTE</tt>s
359
 * \param[in] api         POSIX-API or MPI-API for I/O accesses
360
 * \param[in] create_flag write into existing file (FALSE) or create it (TRUE)
361
 * \param[in] directio_flag open file with <tt>O_DIRECT</tt> flag to minimize
362
 *                          cache effects
363
 *
364
 * \return    measured time 
365
 */
366
double measure_MPI_IO_write_file_once (int size, char *api, int create_flag, int directio_flag){
367
  double     start_time = 1.0, end_time = 0.0;
368
  int        open_flags;
369
  char       *error_string;
370
  
371
  if (get_measurement_rank () == 0){
372
    if (strcmp (api, POSIX_API) == 0){
373

    
374
      if (directio_flag != 0)
375
        open_flags = O_WRONLY | O_DIRECT;
376
      else
377
        open_flags = O_WRONLY;
378

    
379
      errno = 0;
380

    
381
      if (create_flag == 0){        /* open existing file */
382

    
383
        if ((io_fd = open (io_filename, open_flags)) < 0){
384
          error_string = strerror (errno);
385
          error_with_abort (errno,
386
                            "\nmeasure_MPI_IO_write_file_once (int %d, char * %s, int %d, int %d) failed."
387
                            "\nCannot open local file (write only mode)."
388
                            "\nError: %s\n",
389
                            size, api, create_flag, directio_flag, error_string);
390
        }
391
      }
392
      else {                        /* open nonexisting file and create it */
393
        
394
        if ((io_fd = open (io_filename, open_flags|O_CREAT, S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH)) < 0){
395
          error_string = strerror (errno);
396
          error_with_abort (errno,
397
                           "\nmeasure_MPI_IO_write_file_once (int %d, char * %s, int %d, int %d) failed."
398
                           "\nCannot open local file (write only mode)."
399
                           "\nError: %s\n",
400
                            size, api, create_flag, directio_flag, error_string);
401
        }
402
      }
403

    
404
      start_time = start_synchronization ();
405
      write (io_fd, get_send_buffer (), size);
406
      fsync (io_fd);
407
      end_time = MPI_Wtime ();
408

    
409
      close (io_fd);
410

    
411
    }
412
    else{                         /* if strcmp (api, POSIX_API) != 0 */
413

    
414
      if (create_flag == 0){
415

    
416
        MPI_File_open (MPI_COMM_SELF, io_filename, MPI_MODE_WRONLY, MPI_INFO_NULL, &io_fh);
417
      }
418
      else{                         /* if create_flag != 0*/
419

    
420
        MPI_File_open (MPI_COMM_SELF, io_filename, MPI_MODE_WRONLY|MPI_MODE_CREATE, MPI_INFO_NULL, &io_fh);
421
      }
422
      
423
      MPI_File_set_view (io_fh, (MPI_Offset)0, 
424
                        MPI_BYTE, MPI_BYTE,
425
                        "native", MPI_INFO_NULL);
426

    
427
      start_time = start_synchronization ();
428
      MPI_File_write (io_fh, get_send_buffer (), size, MPI_BYTE, MPI_STATUS_IGNORE);
429
      MPI_File_sync (io_fh);
430
      end_time = MPI_Wtime ();
431
      
432
      MPI_File_close (&io_fh);
433
    }
434
  }
435
  else if (get_measurement_rank () != 0) {
436
    start_synchronization ();
437
  }
438
  stop_synchronization ();
439

    
440
  if (get_measurement_rank () == 0)
441
    return end_time - start_time;
442
  else
443
    return -1.0;
444
}
445

    
446
/**
447
 * \brief Finalization function of measure function
448
 *        measure_MPI_IO_write_file_once().
449
 *
450
 * Only one process is active. It writes once to a file.
451
 *
452
 * Remark:<br>
453
 * With the <tt>O_DIRECT</tt> flag set, cache effects are minimized, because I/O
454
 * is done directly to/from user space buffers. The operation system's page
455
 * cache is bypassed. Under Linux 2.6 alignment to 512-byte boundaries is
456
 * required for buffer and file offset. Thus the following parameters should be
457
 * set in a SKaMPI input file:
458
 * - <tt>set_send_buffert_alignment (512)</tt>
459
 * - <tt>set_recv_buffert_alignment (512)</tt>
460
 * - <tt>switch_buffer_cycling_off ()</tt><br>
461
 *
462
 * <tt>O_DIRECT</tt> is only relevant if the POSIX-API is used for I/O.
463
 * 
464
 * For more information please refer to the <tt>open ()</tt> man pages.
465
 *
466
 * \param[in] size  size of memory buffer, i.e. number of <tt>MPI_BYTE</tt>s
467
 * \param[in] api   POSIX-API or MPI-API for I/O accesses
468
 * \param[in] create_flag write into existing file (FALSE) or create it (TRUE)
469
 * \param[in] directio_flag open file with <tt>O_DIRECT</tt> flag to minimize
470
 *                          cache effects
471
 *
472
 * \return    void
473
 */
474
void finalize_MPI_IO_write_file_once (int size, char *api, int create_flag, int directio_flag){
475
  MPI_File_delete (io_filename, MPI_INFO_NULL);
476
  mpi_free (io_filename);
477
}
478

    
479
/**
480
 * \brief Initialization function of measure function
481
 *        measure_MPI_IO_read_large_file_once().
482
 *
483
 * Only one process is active. It writes once to a file.
484
 *        
485
 * Since SKaMPI measurement functions are not allowed to use MPI_Offset
486
 * parameters, it is impossible to tell an init_-routine to create a file
487
 * which is larger than \f$2^{\mbox{\texttt{sizeof(int)}}-1}-1\f$ directly. As
488
 * a preliminary solution we use a parameter (<tt>power</tt>) which commits the
489
 * power to 2 as an indicator for the file size.
490
 *
491
 * Remark concerning the <em>HP XC6000</em>:<br>
492
 * Measurements showed that there is no significant difference between MPI-API
493
 * and POSIX-API I/O accesses, if files are larger than 1MB. Thus there is no
494
 * choice between these two modes like in measure_MPI_IO_read_file_once(),
495
 * which makes type compatibilty problems much easier. Only MPI-API is
496
 * supported.
497
 *        
498
 * Only one process is active. It reads once from a file.
499
 *
500
 * \param[in] power size of memory buffer; 2 to the power of `power' <tt>MPI_BYTE</tt>s
501
 *
502
 * \return    void
503
 */
504
void init_MPI_IO_read_large_file_once (int power) {
505
  MPI_Offset size;
506
  char       *error_string;
507

    
508
  io_filename = get_io_filename (IO_FILENAME, 0);
509

    
510
  if (get_measurement_rank () == 0){
511

    
512
    if (power > MAXIMUM_POWER || power < 0){
513
      error_string = strerror (EINVAL);
514
      error_with_abort (errno,
515
                        "\ninit_MPI_IO_read_large_file_once (int %d) failed."
516
                        "\nInvalid power argument."
517
                        "\nError: %s\n",
518
                        power, error_string);
519
    }
520

    
521
    size = ((MPI_Offset) 1) << power;
522

    
523
    MPI_File_open (MPI_COMM_SELF, io_filename,
524
                   MPI_MODE_WRONLY | MPI_MODE_CREATE | MPI_MODE_UNIQUE_OPEN,
525
                   MPI_INFO_NULL, &io_fh);
526
    MPI_File_preallocate (io_fh, size);
527
    MPI_File_close (&io_fh);
528

    
529
    set_recv_buffer_usage (size);
530
    set_reported_message_size (size);
531
  }
532

    
533
  MPI_Barrier (get_measurement_comm ());
534

    
535
  /* set synchronization type:
536
   SYNC_BARRIER if all SKaMPI processes run on one physical processor 
537
   SYNC_REAL if every SKaMPI process runs on its own physical processor */
538
  set_synchronization (SYNC_REAL);
539

    
540
  init_synchronization ();
541
}
542

    
543
/**
544
 * \brief Measures the time to read once from a large file.
545
 *
546
 * Only one process is active. It writes once to a file.
547
 * 
548
 * Since SKaMPI measurement functions are not allowed to use MPI_Offset
549
 * parameters, it is impossible to tell an init_-routine to create a file
550
 * which is larger than \f$2^{\mbox{\texttt{sizeof(int)}}-1}-1\f$ directly. As
551
 * a preliminary solution we use a parameter (<tt>power</tt>) which commits the
552
 * power to 2 as an indicator for the file size.
553
 *
554
 * Remark concerning the <em>HP XC6000</em>:<br>
555
 * Measurements showed that there is no significant difference between MPI-API
556
 * and POSIX-API I/O accesses, if files are larger than 1MB. Thus there is no
557
 * choice between these two modes like in measure_MPI_IO_read_file_once(),
558
 * which makes type compatibilty problems much easier. Only MPI-API is
559
 * supported.
560
 * 
561
 * \param[in] power size of memory buffer; 2 to the power of `power' <tt>MPI_BYTE</tt>s
562
 *
563
 * \return    measured time 
564
 */
565
double measure_MPI_IO_read_large_file_once (int power){
566
  MPI_Offset size;
567
  double     start_time = 1.0, end_time = 0.0;
568
  
569
  if (get_measurement_rank () == 0){
570
    
571
    size = ((MPI_Offset) 1) << power;
572

    
573
    MPI_File_open (MPI_COMM_SELF, io_filename, MPI_MODE_RDONLY, MPI_INFO_NULL, &io_fh);
574
    MPI_File_set_view (io_fh, (MPI_Offset)0, 
575
                       MPI_BYTE, MPI_BYTE,
576
                       "native", MPI_INFO_NULL);
577
    
578
    start_time = start_synchronization ();
579
    MPI_File_read (io_fh, get_recv_buffer (), size, MPI_BYTE, MPI_STATUS_IGNORE);
580
    end_time = stop_synchronization ();
581
    
582
    MPI_File_close (&io_fh);
583
  }
584
  else if (get_measurement_rank () != 0) {
585
    start_synchronization ();
586
  }
587
  stop_synchronization ();
588

    
589
  if (get_measurement_rank () == 0)
590
    return end_time - start_time;
591
  else
592
    return -1.0;
593

    
594
}
595

    
596
/**
597
 * \brief Finalization function of measure function
598
 *        measure_MPI_IO_read_large_file_once().
599
 *
600
 * Only one process is active. It writes once to a file.
601
 *
602
 * Since SKaMPI measurement functions are not allowed to use MPI_Offset
603
 * parameters, it is impossible to tell an init_-routine to create a file
604
 * which is larger than \f$2^{\mbox{\texttt{sizeof(int)}}-1}-1\f$ directly. As
605
 * a preliminary solution we use a parameter (<tt>power</tt>) which commits the
606
 * power to 2 as an indicator for the file size.
607
 *
608
 * Remark concerning the <em>HP XC6000</em>:<br>
609
 * Measurements showed that there is no significant difference between MPI-API
610
 * and POSIX-API I/O accesses, if files are larger than 1MB. Thus there is no
611
 * choice between these two modes like in measure_MPI_IO_read_file_once(),
612
 * which makes type compatibilty problems much easier. Only MPI-API is
613
 * supported.
614
 *        
615
 * \param[in] power size of memory buffer; 2 to the power of `power' <tt>MPI_BYTE</tt>s
616
 *
617
 * \return    void
618
 */
619
void finalize_MPI_IO_read_large_file_once (int power){
620
  MPI_File_delete (io_filename, MPI_INFO_NULL);
621
  mpi_free (io_filename);
622
}
623

    
624
/**
625
 * \brief Initialization function of measure function
626
 *        measure_MPI_IO_write_large_file_once().
627
 *
628
 * Only one process is active. It writes once to a file.
629
 *
630
 * Since SKaMPI measurement functions are not allowed to use MPI_Offset
631
 * parameters, it is impossible to tell an init_-routine to create a file
632
 * which is larger than \f$2^{\mbox{\texttt{sizeof(int)}}-1}-1\f$ directly. As
633
 * a preliminary solution we use a parameter (<tt>power</tt>) which commits the
634
 * power to 2 as an indicator for the file size.
635
 *
636
 * Remark concerning the <em>HP XC6000</em>:<br>
637
 * Measurements showed that there is no significant difference between MPI-API
638
 * and POSIX-API I/O accesses, if files are larger than 1MB. Thus there is no
639
 * choice between these two modes like in measure_MPI_IO_read_file_once(),
640
 * which makes type compatibilty problems much easier. Only MPI-API is
641
 * supported.
642
 * 
643
 * \param[in] power       size of memory buffer; 2 to the power of `power' <tt>MPI_BYTE</tt>s
644
 * \param[in] create_flag write into existing file (FALSE) or create it (TRUE)
645
 *
646
 * \return    void
647
 */
648
void init_MPI_IO_write_large_file_once (int power, int create_flag) {
649
  MPI_Offset size;
650
  char       *error_string;
651

    
652
  io_filename = get_io_filename (IO_FILENAME, 0);
653

    
654
  if (get_measurement_rank () == 0){
655

    
656
    if (power > MAXIMUM_POWER || power < 0){
657
      error_string = strerror (EINVAL);
658
      error_with_abort (errno,
659
                        "\ninit_MPI_IO_write_large_file_once (int %d, int %d) failed."
660
                        "\nInvalid power argument."
661
                        "\nError: %s\n",
662
                        power, create_flag, error_string);
663
    }
664

    
665
    size = ((MPI_Offset) 1) << power;
666

    
667
    if (create_flag == 0){
668
    
669
      MPI_File_open (MPI_COMM_SELF, io_filename,
670
                     MPI_MODE_WRONLY | MPI_MODE_CREATE | MPI_MODE_UNIQUE_OPEN,
671
                     MPI_INFO_NULL, &io_fh);
672
      MPI_File_preallocate (io_fh, size);
673
      MPI_File_close (&io_fh);
674
    }
675
    
676
    set_send_buffer_usage (size);
677
    set_reported_message_size (size);
678
  }
679

    
680
  MPI_Barrier (get_measurement_comm ());
681

    
682
  /* set synchronization type:
683
   SYNC_BARRIER if all SKaMPI processes run on one physical processor 
684
   SYNC_REAL if every SKaMPI process runs on its own physical processor */
685
  set_synchronization (SYNC_REAL);
686

    
687
  init_synchronization ();
688
}
689

    
690
/**
691
 * \brief Measures the time to write once to a large file.
692
 *
693
 * Only one process is active. It writes once to a file.
694
 *
695
 * Since SKaMPI measurement functions are not allowed to use MPI_Offset
696
 * parameters, it is impossible to tell an init_-routine to create a file
697
 * which is larger than \f$2^{\mbox{\texttt{sizeof(int)}}-1}-1\f$ directly. As
698
 * a preliminary solution we use a parameter (<tt>power</tt>) which commits the
699
 * power to 2 as an indicator for the file size.
700
 *
701
 * Remark concerning the <em>HP XC6000</em>:<br>
702
 * Measurements showed that there is no significant difference between MPI-API
703
 * and POSIX-API I/O accesses, if files are larger than 1MB. Thus there is no
704
 * choice between these two modes like in measure_MPI_IO_read_file_once(),
705
 * which makes type compatibilty problems much easier. Only MPI-API is
706
 * supported.
707
 * 
708
 * \param[in] power       size of memory buffer; 2 to the power of `power' <tt>MPI_BYTE</tt>s
709
 * \param[in] create_flag write into existing file (FALSE) or create it (TRUE)
710
 *
711
 * \return    measured time 
712
 */
713
double measure_MPI_IO_write_large_file_once (int power, int create_flag){
714
  MPI_Offset size;
715
  double     start_time = 1.0, end_time = 0.0;
716
  
717
  if (get_measurement_rank () == 0){
718
    
719
    size = ((MPI_Offset) 1) << power;
720

    
721
    if (create_flag == 0){
722

    
723
      MPI_File_open (MPI_COMM_SELF, io_filename, MPI_MODE_WRONLY, MPI_INFO_NULL, &io_fh);
724
    }
725
    else{                         /* if create_flag != 0*/
726

    
727
      MPI_File_open (MPI_COMM_SELF, io_filename, MPI_MODE_WRONLY|MPI_MODE_CREATE, MPI_INFO_NULL, &io_fh);
728
    }
729
      
730
    MPI_File_set_view (io_fh, (MPI_Offset)0, 
731
                       MPI_BYTE, MPI_BYTE,
732
                       "native", MPI_INFO_NULL);
733

    
734
    start_time = start_synchronization ();
735
    MPI_File_write (io_fh, get_send_buffer (), size, MPI_BYTE, MPI_STATUS_IGNORE);
736
    end_time = MPI_Wtime ();
737
      
738
    MPI_File_close (&io_fh);
739

    
740
  }
741
  else if (get_measurement_rank () != 0) {
742
    start_synchronization ();
743
  }
744
  stop_synchronization ();
745

    
746
  if (get_measurement_rank () == 0)
747
    return end_time - start_time;
748
  else
749
    return -1.0;
750
}
751

    
752
/**
753
 * \brief Finalization function of measure function
754
 *        measure_MPI_IO_write_large_file_once().
755
 *
756
 * Only one process is active. It writes once to a file.
757
 *
758
 * Since SKaMPI measurement functions are not allowed to use MPI_Offset
759
 * parameters, it is impossible to tell an init_-routine to create a file
760
 * which is larger than \f$2^{\mbox{\texttt{sizeof(int)}}-1}-1\f$ directly. As
761
 * a preliminary solution we use a parameter (<tt>power</tt>) which commits the
762
 * power to 2 as an indicator for the file size.
763
 *
764
 * Remark concerning the <em>HP XC6000</em>:<br>
765
 * Measurements showed that there is no significant difference between MPI-API
766
 * and POSIX-API I/O accesses, if files are larger than 1MB. Thus there is no
767
 * choice between these two modes like in measure_MPI_IO_read_file_once(),
768
 * which makes type compatibilty problems much easier. Only MPI-API is
769
 * supported.
770
 *        
771
 * \param[in] power       size of memory buffer; 2 to the power of `power' <tt>MPI_BYTE</tt>s
772
 * \param[in] create_flag write into existing file (FALSE) or create it (TRUE)
773
 *
774
 * \return    void
775
 */
776
void finalize_MPI_IO_write_large_file_once (int power, int create_flag){
777
  MPI_File_delete (io_filename, MPI_INFO_NULL);
778
  mpi_free (io_filename);
779
}
780

    
781
/*@}*/
782

    
783
#pragma weak end_skampi_extensions
784

    
785
#endif        /* USE_MPI_IO */