Statistics
| Branch: | Revision:

root / synthbench / euroben-ports / base / C-MPI / mod2f / .svn / text-base / mod2f.c.svn-base @ 0:839f52ef7657

History | View | Annotate | Download (4.9 kB)

1
#include <stdio.h>
2
#include <stdlib.h>
3
#include <math.h>
4
#include <mpi.h>
5
#include "fundefs.h"
6
#define  MAXNOD 1000000
7

    
8
int me, nodes;
9
int offset[MAXNOD][2], sizes[MAXNOD][2];
10

    
11
int main( int argc, char* argv[] )
12
{ 
13
   MPI_Comm comm = MPI_COMM_WORLD;
14
   int      m, n, nrep;
15
   int      mflint, mfltrn, ok, gok;
16
   int      i, irep, m1, m2, mc, mr, nc, nr, nx;
17
   double   **arr1r, **arr1i, **arr2r, **arr2i, **carr, **cari;
18
   double   *ur, *ui, *wr, *wi;
19
   double   corr, err, frac, mflops, time1, gtime1, time2, gtime2;
20
   FILE     *inl;
21
// ------------------------------------------------------------------------
22
   MPI_Init( &argc, &argv );
23
   MPI_Comm_rank( comm, &me );
24
   MPI_Comm_size( comm, &nodes );
25
   if ( me == 0 ) {
26
      state( "mod2f" );
27
      prthead( nodes );
28
   }
29
   inl = fopen( "mod2f.in", "r" );
30
   while( ( fscanf( inl, "%d%d\n", &n, &nrep ) != EOF ) ){
31
      m   = ilog2( n );
32
      m2  = m/2;
33
      m1 = m%2 == 0 ? m2 : m2 + 1;
34
      nc = pow( 2, m2 );
35
      nr = pow( 2, m1 );
36
      sizoff( nr, nc );
37
      nx  = nc > nr ? nc : nr;
38
      mr  = nr/nodes;
39
      mc  = nc/nodes;
40
// ------------------------------------------------------------------------
41
// --- Check that No. of processes matches the problem size.
42

    
43
      tstinp( mc, mr, nc, nr, me, nodes );
44

    
45
      err = ( 10.0*n*m )* 1.0e-10;         // --- Allowed error tolerance.
46
// -------------------------------------------------------------------------
47
// --- Allocate partitioned arrays and generate data.
48
      
49
      carr  = makmat( mr, nc ); cari  = makmat( mr, nc );
50
      arr1r = makmat( mr, nc ); arr1i = makmat( mr, nc );
51
      arr2r = makmat( mc, nr ); arr2i = makmat( mc, nr );
52
      ur    = calloc( nx, sizeof ( double ) );
53
      ui    = calloc( nx, sizeof ( double ) );
54
      wr    = calloc( nx, sizeof ( double ) );
55
      wi    = calloc( nx, sizeof ( double ) );
56
      gendat( mr, nc, carr, cari );
57
//--------------------------------------------------------------------------
58
//--- Repeat FFT 'nrep' times for this problem size and do timing.
59
 
60
      time1 = MPI_Wtime();
61
      time2 = 0.0;
62
      for( irep = 1; irep <= nrep; irep++ ) {
63
         cp_arr2d( mr, nc, carr, arr1r );
64
         cp_arr2d( mr, nc, cari, arr1i );
65
//---------------------------------------------------------------------------
66
//--- Do 1st transposition.
67

    
68
         gtrans( nr, nc, sizes[me][0], sizes[me][1], 
69
                 arr1r, arr1i, arr2r, arr2i, 0, &time2 );
70
//---------------------------------------------------------------------------
71
//--- Do 1st pass of MC NR-length FFTs per processor.
72

    
73
         cfft4( 0, m1, ur, ui, arr2r[0], arr2i[0], wr, wi );
74
         for( i = 0; i < mc; i++ ) {
75
            cfft4( 1, m1, ur, ui, arr2r[i], arr2i[i], wr, wi );
76
         }
77
//---------------------------------------------------------------------------
78
//--- Multiply with twiddle factors.
79

    
80
         twiddle( mc, nr, arr2r, arr2i );
81
//---------------------------------------------------------------------------
82
//--- Do 2nd transposition.
83

    
84
         gtrans( nc, nr, sizes[me][1], sizes[me][0],
85
                 arr2r, arr2i, arr1r, arr1i, 1, &time2 );
86
//---------------------------------------------------------------------------
87
//--- Do 2nd pass of MR NC-length FFTs per processor.
88

    
89
         cfft4( 0, m2, ur, ui, arr1r[0], arr1i[0], wr, wi );
90
         for( i = 0; i < mr; i++ ) {
91
            cfft4( 1, m2, ur, ui, arr1r[i], arr1i[i], wr, wi );
92
         } 
93
//---------------------------------------------------------------------------
94
//--- Do 3rd transposition.
95

    
96
         gtrans( nr, nc, sizes[me][0], sizes[me][1], 
97
                 arr1r, arr1i, arr2r, arr2i, 0, &time2 );
98
      }
99
      time1 = MPI_Wtime() - time1;
100
// -------------------------------------------------------------------------
101
// --- Check for errors and correct timing for filling of arrays.
102

    
103
      ok   = check( mc, nr, arr2r, arr2i, err );
104
      corr = MPI_Wtime();
105
      for( irep = 0; i < nrep; i++ ) {
106
         cp_arr2d( mr, nc, carr, arr1r );
107
         cp_arr2d( mr, nc, cari, arr1i );
108
      }
109
      corr  = MPI_Wtime() - corr;
110
      time1 = time1 - corr;
111
      MPI_Reduce( &time1, &gtime1, 1, MPI_DOUBLE, MPI_MAX, 0, comm );
112
      time1 = gtime1/(double)nrep;
113
      MPI_Reduce( &time2, &gtime2, 1, MPI_DOUBLE, MPI_MAX, 0, comm );
114
      time2 = gtime2/(double)nrep;
115
// -------------------------------------------------------------------------
116
// --- Calculate Mflop rates.
117

    
118
      if ( me == 0 ) {
119
         nflops( m, &mflint, &mfltrn );
120
         mflops = 1.0e-6*( mflint + mfltrn )/gtime1;
121
         frac   = 100.0*(time2/time1);
122
         prtspeed( n, time1, mflops, time2, frac, ok );
123
      }
124
      free( wi ); free( wr );
125
      free( ui ); free( ur );
126
      delmat( mc, arr2i ); delmat( mc, arr2r );
127
      delmat( mr, arr1i ); delmat( mr, arr1r );
128
      delmat( mr, cari ) ; delmat( mr, carr );
129
   }
130
   if ( me == 0 ) {
131
      printf( "--------------------------------------------------" );
132
      printf( "----------------------\nRan OK\n" );
133
   }
134
   MPI_Finalize();
135
}