Statistics
| Branch: | Revision:

root / synthbench / euroben-dm / mod2f / mod2f.f @ 0:839f52ef7657

History | View | Annotate | Download (7.7 kB)

1
      Program mod2f
2
! **********************************************************************
3
! *** This program is part of the EuroBen Benchmark                  ***
4
! ***                                                                ***
5
! *** Copyright: EuroBen Benchmark Group p/o                         ***
6
! ***            Academic Computing Centre Utrecht                   ***
7
! ***            P.O. Box 80.011                                     ***
8
! ***            3508 TA Utrecht                                     ***
9
! ***            The Netherlands                                     ***
10
! ***                                                                ***
11
! *** Author of this program: Aad van der Steen                      ***
12
! *** Date Version 1.0        Summer 1996                            ***
13
! *** Date Version 2.0        Spring 1998                            ***
14
! *** Date Version 2.1        Autumn 1998                            ***
15
! *** Date Version 3.0        Autumn 2005                            ***
16
! **********************************************************************
17
!  ===============
18
!  MPI Version 3.0
19
!  ===============
20
! ----------------------------------------------------------------------
21
! --- Program 'mod2f' measures the speed of a parallel 1-D 
22
!     complex-to-complex FFT on 'nodes' processors.
23
!
24
!     The FFT is factorised into 'nc' 1-D FFTs of length 'nr'.
25
!     After the first sequence of small FFTs the Real and Imaginary
26
!     parts of the sequences that are transformed are multiplied
27
!     with the appropriate twiddle factors.
28
!     Next the sequences are transposed and 'nr' 1-D FFTs of length
29
!     'nc' are done to complete the transformation process.
30
!     See for a description of this kind of implementation:
31
!
32
!     R.C. Agarwal, F.G. Gustavson, M. Zubair, "A High Performance
33
!        Parallel Algorithm for 1-D FFT", Proc. Supercomputing '94,
34
!        IEEE Press, 1994, 34--40.
35
!
36
! --- From version 2.0 on the number of columns 'nc' and the number of
37
!     rows 'nr' need not be divided exactly by the number of processors
38
!     anymore.
39
! ----------------------------------------------------------------------
40
!     Parameters:
41
!
42
!     NC*NR   -- Total length of FFT.
43
!     NC      -- No. FFTs of length NR.
44
!     NODES   -- No. of processors used.
45
!     M1      -- No. of rows of the arrays holding the NR-length FFTs.
46
!     M2      -- No. of rows of the arrays holding the NC-length FFTs.
47
! ----------------------------------------------------------------------
48
      Use                   mpi_module
49
      Use                   numerics
50
      Implicit              None
51

    
52
      Real(l_), Allocatable :: arr1r(:,:), arr1i(:,:),
53
     &                         arr2r(:,:), arr2i(:,:),
54
     &                         carr(:,:),  cari(:,:),
55
     &                         warr(:,:),  wari(:,:),
56
     &                         ur(:),      ui(:),
57
     &                         actsiz(:,:), base(:,:)
58
      Real(l_)              :: time1, time2, timx1, timx2
59
      Integer               :: ml, nl, ok
60
      Real(l_)              :: corr, ctime, err, frac, wclock
61
      Real(l_)              :: rmflint, rmfltrn, mflops
62
      Integer               :: i, irep, mc, mr, m1, m2, mflint,
63
     &                         mfltrn, mfltot, nc, nr, ncase, nrep
64
! ----------------------------------------------------------------------
65
! --- Set up communication.
66

    
67
      Call csetup
68
      If ( me == 0 ) Then
69
         Call state( 'mod2f   ' )
70
         Print 1000, nodes
71
      End If
72
! ----------------------------------------------------------------------
73
! --- Read problem sizes and initialise arrays.
74

    
75
      Open ( 1, File = 'mod2f.in' )
76
   10 Read ( 1, *, End = 20 ) m1, m2, nrep
77
      nc = 2**m2
78
      nr = 2**m1
79
      mr = nr/nodes
80
      mc = nc/nodes
81
! -----------------------------------------------------------------------
82
! --- Check that the number of processors is a power of 2.
83

    
84
      If ( mr*nodes /= nr .OR. mc*nodes /= nc ) Then
85
         Print *, 'Stop: number of processors should be a power of 2.'
86
         Stop
87
      End If
88
      ml = m1+m2
89
      nl = nc*nr
90
      err = ( 10.0_l_*nl*ml*nodes ) * 1.0e-10_l_
91
      Call sizoff( nr, nc )
92
      Allocate( carr(nr,mc),  cari(nr,mc),
93
     &          arr1r(nr,mc), arr1i(nr,mc),
94
     &          arr2r(nc,mr), arr2i(nc,mr),
95
     &          warr(nc,mr),  wari(nc,mr),
96
     &          ur(nr),       ui(nr) )
97
      Call datgen( carr, cari, nr, mc )
98
! ----------------------------------------------------------------------
99
! --- Repeat FFT 'nrep' times for this problem size.
100

    
101
      Call MPI_Barrier( comm, ierr )
102
      time2 = 0.0_l_
103
      time1 = MPI_Wtime()
104
      Do irep = 1, nrep
105
         arr1r = carr
106
         arr1i = cari
107
         Call cfft4( 0, m1, ur, ui, arr1r, arr1i, warr, wari )
108
         Do i = 1, mc
109
            Call cfft4( 1, m1, ur, ui, arr1r(1,i), arr1i(1,i), warr,
110
     &                  wari )
111
         End Do
112
! ----------------------------------------------------------------------
113
! --- Multiply with twiddle factors.
114

    
115
         Call twiddle( arr1r, arr1i, nr, mc )
116
! ----------------------------------------------------------------------
117
! --- Do transposition.
118

    
119
! ----------------------------------------------------------------------
120
! --- Do global block transpose.
121

    
122
         Call gtrans( arr1r, arr1i, arr2r, arr2i, nr, nc, mr, mc, time2)
123
! ----------------------------------------------------------------------
124
! --- Do second pass of M1 NC-length FFTs per processor.
125
 
126
         Call cfft4( 0, m2, ur, ui, arr2r, arr2i, warr, wari )
127
         Do i = 1, mr
128
            Call cfft4( 1, m2, ur, ui, arr2r(1,i), arr2i(1,i), warr, 
129
     &                  wari)
130
         End Do
131
      End Do
132
      time1 = MPI_Wtime() - time1
133
! ----------------------------------------------------------------------
134
! --- Check for errors and correct timing for filling of arrays.
135

    
136
      Call errchk( arr2r, arr2i, nc, mr, err, ok )
137
      corr = MPI_Wtime()
138
      Do irep = 1, nrep
139
         arr1r = carr
140
         arr1i = cari
141
      End Do
142
      corr = MPI_Wtime() - corr
143
      time1 = time1 - corr
144
      Call MPI_Reduce( time1, timx1, 1, rtyp, MPI_Max, 0, comm, ierr )
145
      time1 = timx1/ Real( nrep, l_ )
146
      Call MPI_Reduce( time2, timx2, 1, rtyp, MPI_Max, 0, comm, ierr )
147
      time2 = timx2/ Real( nrep, l_ )
148
      If ( me == 0 ) Then
149
! ----------------------------------------------------------------------
150
! --- Calculate Mflop rates.     
151

    
152
         Call nflops( ml, mflint, mfltrn )
153
         mflops = 1.0E-6_l_*( mfltrn + mflint )
154
     &                     / timx1
155
         frac = 100.0_l_*( time2/time1 )
156
         Print 1010, nl, time1, mflops, time2, frac, ok
157
      End If
158
      Deallocate( carr, cari, arr1r, arr1i, arr2r, arr2i,
159
     &            warr, wari, ur, ui )
160
! ----------------------------------------------------------------------
161
! --- Get new problem.
162

    
163
      Go To 10
164
     
165
! ----------------------------------------------------------------------
166
! --- End of measurements: report results.
167
!     Execution time is maximum time over all nodes.          
168

    
169
   20 If ( me == 0 ) Print 1020
170
      Call MPI_bye
171
! ----------------------------------------------------------------------
172
 1000 Format( /'Program mod2f computes FFT: No. of procs. = ', i3/
173
     &        73('-')/
174
     &       '                 FFT results, Radix-4 algorithm',/
175
     &        73('-')/
176
     &       ' Length |  Total Time  |    Speed     |  Comm. Time  |',
177
     &       '   Frac. | Check  |'/
178
     &       '   N =  |     (sec)    |  (Mflop/s)   |     (sec)    |',
179
     &       '    (%)  |   OK   |'/
180
     &        73('-') )
181
 1010 Format ( i8, '| ', g13.5, '| ', g13.5, '| ',  g13.5, '| ',  f7.3,
182
     &         ' | ', i6, 1x, '|' )
183
 1020 Format ( 73('-') )
184
! ----------------------------------------------------------------------
185
      End Program mod2f