root / synthbench / euroben-ports / base / C-MPI / mod2f / .svn / text-base / mod2f.c.svn-base @ 0:839f52ef7657
History | View | Annotate | Download (4.9 kB)
1 |
#include <stdio.h> |
---|---|
2 |
#include <stdlib.h> |
3 |
#include <math.h> |
4 |
#include <mpi.h> |
5 |
#include "fundefs.h" |
6 |
#define MAXNOD 1000000 |
7 |
|
8 |
int me, nodes; |
9 |
int offset[MAXNOD][2], sizes[MAXNOD][2]; |
10 |
|
11 |
int main( int argc, char* argv[] ) |
12 |
{ |
13 |
MPI_Comm comm = MPI_COMM_WORLD; |
14 |
int m, n, nrep; |
15 |
int mflint, mfltrn, ok, gok; |
16 |
int i, irep, m1, m2, mc, mr, nc, nr, nx; |
17 |
double **arr1r, **arr1i, **arr2r, **arr2i, **carr, **cari; |
18 |
double *ur, *ui, *wr, *wi; |
19 |
double corr, err, frac, mflops, time1, gtime1, time2, gtime2; |
20 |
FILE *inl; |
21 |
// ------------------------------------------------------------------------ |
22 |
MPI_Init( &argc, &argv ); |
23 |
MPI_Comm_rank( comm, &me ); |
24 |
MPI_Comm_size( comm, &nodes ); |
25 |
if ( me == 0 ) { |
26 |
state( "mod2f" ); |
27 |
prthead( nodes ); |
28 |
} |
29 |
inl = fopen( "mod2f.in", "r" ); |
30 |
while( ( fscanf( inl, "%d%d\n", &n, &nrep ) != EOF ) ){ |
31 |
m = ilog2( n ); |
32 |
m2 = m/2; |
33 |
m1 = m%2 == 0 ? m2 : m2 + 1; |
34 |
nc = pow( 2, m2 ); |
35 |
nr = pow( 2, m1 ); |
36 |
sizoff( nr, nc ); |
37 |
nx = nc > nr ? nc : nr; |
38 |
mr = nr/nodes; |
39 |
mc = nc/nodes; |
40 |
// ------------------------------------------------------------------------ |
41 |
// --- Check that No. of processes matches the problem size. |
42 |
|
43 |
tstinp( mc, mr, nc, nr, me, nodes ); |
44 |
|
45 |
err = ( 10.0*n*m )* 1.0e-10; // --- Allowed error tolerance. |
46 |
// ------------------------------------------------------------------------- |
47 |
// --- Allocate partitioned arrays and generate data. |
48 |
|
49 |
carr = makmat( mr, nc ); cari = makmat( mr, nc ); |
50 |
arr1r = makmat( mr, nc ); arr1i = makmat( mr, nc ); |
51 |
arr2r = makmat( mc, nr ); arr2i = makmat( mc, nr ); |
52 |
ur = calloc( nx, sizeof ( double ) ); |
53 |
ui = calloc( nx, sizeof ( double ) ); |
54 |
wr = calloc( nx, sizeof ( double ) ); |
55 |
wi = calloc( nx, sizeof ( double ) ); |
56 |
gendat( mr, nc, carr, cari ); |
57 |
//-------------------------------------------------------------------------- |
58 |
//--- Repeat FFT 'nrep' times for this problem size and do timing. |
59 |
|
60 |
time1 = MPI_Wtime(); |
61 |
time2 = 0.0; |
62 |
for( irep = 1; irep <= nrep; irep++ ) { |
63 |
cp_arr2d( mr, nc, carr, arr1r ); |
64 |
cp_arr2d( mr, nc, cari, arr1i ); |
65 |
//--------------------------------------------------------------------------- |
66 |
//--- Do 1st transposition. |
67 |
|
68 |
gtrans( nr, nc, sizes[me][0], sizes[me][1], |
69 |
arr1r, arr1i, arr2r, arr2i, 0, &time2 ); |
70 |
//--------------------------------------------------------------------------- |
71 |
//--- Do 1st pass of MC NR-length FFTs per processor. |
72 |
|
73 |
cfft4( 0, m1, ur, ui, arr2r[0], arr2i[0], wr, wi ); |
74 |
for( i = 0; i < mc; i++ ) { |
75 |
cfft4( 1, m1, ur, ui, arr2r[i], arr2i[i], wr, wi ); |
76 |
} |
77 |
//--------------------------------------------------------------------------- |
78 |
//--- Multiply with twiddle factors. |
79 |
|
80 |
twiddle( mc, nr, arr2r, arr2i ); |
81 |
//--------------------------------------------------------------------------- |
82 |
//--- Do 2nd transposition. |
83 |
|
84 |
gtrans( nc, nr, sizes[me][1], sizes[me][0], |
85 |
arr2r, arr2i, arr1r, arr1i, 1, &time2 ); |
86 |
//--------------------------------------------------------------------------- |
87 |
//--- Do 2nd pass of MR NC-length FFTs per processor. |
88 |
|
89 |
cfft4( 0, m2, ur, ui, arr1r[0], arr1i[0], wr, wi ); |
90 |
for( i = 0; i < mr; i++ ) { |
91 |
cfft4( 1, m2, ur, ui, arr1r[i], arr1i[i], wr, wi ); |
92 |
} |
93 |
//--------------------------------------------------------------------------- |
94 |
//--- Do 3rd transposition. |
95 |
|
96 |
gtrans( nr, nc, sizes[me][0], sizes[me][1], |
97 |
arr1r, arr1i, arr2r, arr2i, 0, &time2 ); |
98 |
} |
99 |
time1 = MPI_Wtime() - time1; |
100 |
// ------------------------------------------------------------------------- |
101 |
// --- Check for errors and correct timing for filling of arrays. |
102 |
|
103 |
ok = check( mc, nr, arr2r, arr2i, err ); |
104 |
corr = MPI_Wtime(); |
105 |
for( irep = 0; i < nrep; i++ ) { |
106 |
cp_arr2d( mr, nc, carr, arr1r ); |
107 |
cp_arr2d( mr, nc, cari, arr1i ); |
108 |
} |
109 |
corr = MPI_Wtime() - corr; |
110 |
time1 = time1 - corr; |
111 |
MPI_Reduce( &time1, >ime1, 1, MPI_DOUBLE, MPI_MAX, 0, comm ); |
112 |
time1 = gtime1/(double)nrep; |
113 |
MPI_Reduce( &time2, >ime2, 1, MPI_DOUBLE, MPI_MAX, 0, comm ); |
114 |
time2 = gtime2/(double)nrep; |
115 |
// ------------------------------------------------------------------------- |
116 |
// --- Calculate Mflop rates. |
117 |
|
118 |
if ( me == 0 ) { |
119 |
nflops( m, &mflint, &mfltrn ); |
120 |
mflops = 1.0e-6*( mflint + mfltrn )/gtime1; |
121 |
frac = 100.0*(time2/time1); |
122 |
prtspeed( n, time1, mflops, time2, frac, ok ); |
123 |
} |
124 |
free( wi ); free( wr ); |
125 |
free( ui ); free( ur ); |
126 |
delmat( mc, arr2i ); delmat( mc, arr2r ); |
127 |
delmat( mr, arr1i ); delmat( mr, arr1r ); |
128 |
delmat( mr, cari ) ; delmat( mr, carr ); |
129 |
} |
130 |
if ( me == 0 ) { |
131 |
printf( "--------------------------------------------------" ); |
132 |
printf( "----------------------\nRan OK\n" ); |
133 |
} |
134 |
MPI_Finalize(); |
135 |
} |