Statistics
| Branch: | Revision:

root / synthbench / euroben-ports / base / Fortran-MPI / mod2a / .svn / text-base / mod2a.f.svn-base @ 0:839f52ef7657

History | View | Annotate | Download (11.1 kB)

1
      Program mod2a
2
! **********************************************************************
3
! *** This program is a distributed-memory version of                ***
4
! *** EuroBen Benchmark program MOD2A (generalized matrix-vector     ***
5
! *** multiplication).                                               ***
6
! ***                                                                ***
7
! *** Copyright: European Benchmark Group p/o                        ***
8
! ***            Computational Physics Dept. Utrecht                 ***
9
! ***            P.O. Box 80125                                      ***
10
! ***            3508 TD Utrecht                                     ***
11
! ***            The Netherlands                                     ***
12
! ***                                                                ***
13
! *** Author of this program: Aad J. van der Steen                   ***
14
! *** Date                    04/29/1998                             ***
15
! *** Based on the shared-memory version of Ruud van der Pas,        ***
16
! ***                                           11/24/1988           ***
17
! *** and Peter de Rijk                       , 02/01/1993           ***
18
! *** Improved communication                  , 05/14/1996           ***
19
! *** Total rework                            , 04/29/1998           ***
20
! **********************************************************************
21
! --- Version 3.3
22
!
23
! --- Purpose of program Mod2a
24
!     ------------------------
25
!     This program measures the performance of the matrix-vector product
26
!     y = A x  and  y = A' x, for a full  n x n  matrix A  with a vector
27
!     of length  n.
28
!     Several variants are examined.
29
!
30
! --- Remarks
31
!     -------
32
!     It is possible to measure the performance of a special library
33
!     routine for matrix vector multiplication.
34
!     If such a routine is available, the recipe is:
35
!       - Replace the string DGEMV in character string LNAME by the
36
!         appropriate name.
37
!       - Set parameter ILIB to a non-zero integer value.
38
!       - Activate in subroutine LIBVER the call to DGEMV or replace
39
!         this statement with the appropriate call to the library
40
!         routine.
41
!     Warning: To prevent making mistakes, there is NO source version of
42
!              DGEMV included in this program.
43
!              The default situation is ILIB=0 i.e. no library version
44
!              present!
45
!
46
! --- A EuroBen encapsulation routine is used for setting the network 
47
!     (csetup). In addition, the EuroBen status routine 'state', and
48
!     the routine for accessing. Routine 'state' should be customized
49
!     for the machine at hand.
50
!
51
! --- Presently, the maximum number of nodes allowed is 2048. To
52
!     increase this bound, increase the value of parameter 'maxnod' in 
53
!     in all relevant routines.
54
! ----------------------------------------------------------------------
55
      Use                   dist_module
56
      Use                   numerics
57
      Integer, Parameter :: nmax = 2500 
58
      Integer, Parameter :: ncases = 4, lda = nmax, mxcase = 50,
59
     &                      ilib = 0
60

    
61
      Character          :: modnam*8, text(ncases+1)*20, lname*43
62
      Integer            :: icase, info, ndim(mxcase)
63
      Integer            :: i, j, k, n, m, mbase, nrep, ipoint,
64
     &                      lchk(nmax)
65
      Real(l_)           :: tiperf((ncases+1)*mxcase,4), extime, perfor
66
      Real(l_)           :: x(nmax), y(nmax), a(lda,nmax), wrk(nmax), 
67
     &                      ychk(nmax)
68
      Real(l_)     :: start_time, end_time
69

    
70
      External              mvrpln, mvrur4, mvcpln, mvcur4
71
      Data modnam /        'mod2a   ' /
72

    
73
!                           123456789 123456789 1
74
      Data text   /        'Rows,    plainly    ' ,
75
     &                     'Rows,    unrolled 4 ' ,
76
     &                     'Columns, plainly    ' ,
77
     &                     'Columns, unrolled 4 ' ,
78
     &                     'Library routine(s)  ' /
79
!
80
!                           123456789 123456789 123456789 123456789 1234
81
      Data lname  /        'Library routine(s) used: DGEMV           ' /
82
! ----------------------------------------------------------------------
83
! --- Set up communication network and print test status.
84

    
85
      Call csetup
86
      If ( me == 0 ) Call state (modnam)
87
      start_time = MPI_Wtime()
88
! ----------------------------------------------------------------------
89
! --- Open file containing the repetition factor and the problem size
90
!     for each problem. Read and compute until EOF (maximum no. of
91
!     cases allowed is mxcase = 50). The maximum problem size
92
!     nmax = 2500.
93

    
94
      icase = 0
95
      Open( 1, File = 'mod2a.in' )
96
   10 Read( 1, *, End = 20 ) nrep, n
97
      icase = icase + 1
98
      If ( icase > mxcase ) Then
99
         If ( me == 0 ) Print *, 'More than mxcase = ', mxcase, 
100
     &                    ': increase mxcase'
101
         Go To 20
102
      End If
103
      ndim(icase) = n
104
      If ( n > nmax ) Then
105
         If ( me == 0 ) Print *, 'n > nmax = ', nmax,
106
     &                           ': increase nmax'
107
         Go To 20
108
      End If
109
! ----------------------------------------------------------------------
110
! --- Distribute a as evenly as possible over the available processors.
111

    
112
      Call evdist( n )
113
      Call bsaddr
114
      m = sizes(me)
115

    
116
! --- Generate the matrix A and vector x.
117

    
118
      Call matgen( a, x, lda, n )
119
! ----------------------------------------------------------------------
120
! --- For checking the correctness we generate the solution vector by
121
!     independent means:
122

    
123
      Call mkbnds( a, lda, ychk )
124
! ----------------------------------------------------------------------
125
!                                                ** Row oriented variant
126
      ipoint = icase
127

    
128
      Call timing( 'N', m, n, a, lda, x, y, mvrpln,
129
     &             nrep, extime, perfor, wrk )
130

    
131
! --- Check correctness:
132

    
133
       Call check( 'mvrpln', 'N', y, ychk, lchk )
134

    
135
       tiperf(ipoint,1) = extime
136
       tiperf(ipoint,2) = perfor
137

    
138
       Call timing( 'T', m, n, a, lda, x, y, mvrpln,
139
     &              nrep, extime, perfor, wrk )
140

    
141
! --- Check correctness:
142

    
143
      Call check( 'mvrpln', 'T', y, ychk, lchk )
144
!
145
      tiperf(ipoint,3) = extime
146
      tiperf(ipoint,4) = perfor
147
! ----------------------------------------------------------------------
148
!                            ** Row oriented variant with loop unrolling 
149
      ipoint = icase + mxcase
150

    
151
      Call timing( 'N', m, n, a, lda, x, y, mvrur4,
152
     &             nrep, extime, perfor, wrk )
153

    
154
! --- Check correctness:
155

    
156
      Call check( 'mvrur4', 'N', y, ychk, lchk )
157

    
158
      tiperf(ipoint,1) = extime
159
      tiperf(ipoint,2) = perfor
160

    
161
      Call timing( 'T', m, n, a, lda, x, y, mvrur4,
162
     &             nrep, extime, perfor, wrk )
163

    
164
! --- Check correctness:
165

    
166
      Call check( 'mvrur4', 'T', y, ychk, lchk )
167

    
168
      tiperf(ipoint,3) = extime
169
      tiperf(ipoint,4) = perfor
170
! ----------------------------------------------------------------------
171
!                                             ** Column oriented variant
172
      ipoint = icase + 2*mxcase
173

    
174
      Call timing( 'N', m, n, a, lda, x, y, mvcpln,
175
     &             nrep, extime, perfor, wrk )
176

    
177
! --- Check correctness:
178

    
179
      Call check( 'mvcpln', 'N', y, ychk, lchk )
180

    
181
      tiperf(ipoint,1) = extime
182
      tiperf(ipoint,2) = perfor
183

    
184
      Call timing( 'T', m, n, a, lda, x, y, mvcpln,
185
     &             nrep, extime, perfor, wrk )
186

    
187
! --- Check correctness:
188

    
189
      Call check ( 'mvcpln', 'T', y, ychk, lchk )
190

    
191
      tiperf(ipoint,3) = extime
192
      tiperf(ipoint,4) = perfor
193
! ----------------------------------------------------------------------
194
!                         ** Column oriented variant with loop unrolling
195
      ipoint = icase + 3*mxcase
196

    
197
      Call timing( 'N', m, n, a, lda, x, y, mvcur4,
198
     &             nrep, extime, perfor, wrk )
199

    
200
! --- Check correctness:
201

    
202
      Call check( 'mvcur4', 'N', y, ychk, lchk )
203

    
204
      tiperf(ipoint,1) = extime
205
      tiperf(ipoint,2) = perfor
206

    
207
      Call timing( 'T', m, n, a, lda, x, y, mvcur4,
208
     &             nrep, extime, perfor, wrk )
209

    
210
! --- Check correctness:
211

    
212
      Call check( 'mvcur4', 'T', y, ychk, lchk )
213

    
214
      tiperf(ipoint,3) = extime
215
      tiperf(ipoint,4) = perfor
216
! ----------------------------------------------------------------------
217
!                                             ** Special library variant
218
      If ( ilib /= 0 ) Then
219
         ipoint = icase + 4*mxcase
220

    
221
! --- Special library version of matrix vector multiplication
222

    
223
         Call libver( 'N', m, n, a, lda, x, y, 
224
     &                nrep, extime, perfor, wrk )
225

    
226
! --- Check correctness:
227

    
228
         Call check( 'libver', 'N', y, yck, lchk )
229

    
230
         tiperf(ipoint,1) = extime
231
         tiperf(ipoint,2) = perfor
232

    
233
         Call libver( 'T', m, n, a, lda, x, y, 
234
     &                nrep, extime, perfor, wrk )
235

    
236
! --- Check correctness:
237

    
238
         Call check( 'libver', 'T', y, yck, lchk )
239

    
240
         tiperf(ipoint,3) = extime
241
         tiperf(ipoint,4) = perfor
242
      EndIf
243
! ----------------------------------------------------------------------
244
! >>> Get new case at label 10 <<<
245

    
246
      Go To 10
247
! ----------------------------------------------------------------------
248
!                                                  ** Print the results.
249
   20 If ( me == 0 ) Then
250
         Print 9010, nodes, nrep
251

    
252
         Do k = 1,ncases
253
            ipoint = (k-1)*mxcase + 1
254
            Print 9020, text(k), ndim(1), ndim(1), 
255
     &                  (tiperf(ipoint,j), j=1,4)
256
            Do i = 2, icase
257
               ipoint = ipoint + 1
258
               Print 9030, ' ',ndim(i), ndim(i), (tiperf(ipoint,j),
259
     &                     j=1,4)
260
            End Do
261
         End Do
262
!
263
         If ( ilib /= 0 ) Then
264
            ipoint = ncases*mxcase + 1
265
            Print 9020, text(ncases+1), ndim(1), ndim(1), 
266
     &                 (tiperf(ipoint,j), j=1,4)
267
            Do i = 2,nt
268
               ipoint = ipoint + 1
269
               Print 9030, ' ', ndim(i), ndim(i), (tiperf(ipoint,j),
270
     &                     j=1,4)
271
            End Do
272
            Print 9040, lname
273
         End If
274
      End If
275

    
276
      end_time =  MPI_Wtime() - start_time
277
      If (me == 0) Then
278
         Write(6,22) 'Walltime: ', end_time, " s"
279
 22      Format(A,F9.3,A)
280
      End If
281
! ----------------------------------------------------------------------
282
!                                               ** Exit network orderly.
283
      Call MPI_Finalize( info )
284
      If ( me == 0 ) Then
285
      Write(6,*) 'Program terminated normally'
286
      End If
287
! ----------------------------------------------------------------------
288
!                                                            ** Formats.
289
 9010 Format( //79('-')/
290
     &'Generalized matrix-vector multiplication'/
291
     &4X,'y := alpha*A*x + beta*y   or',
292
     &3X,'y := alpha*A''*x + beta*y',/
293
     &'Several variants are measured, No. of procs. = ', i3 /
294
     &79('-')//
295
     &'Initial repetition factor used in timings: ',I3//
296
     &'Variant, i.e. array',18X,'No Transpose',12X,'Transpose'/
297
     &'A is accessed by',4X,4X,'M',4X,'N',
298
     &    2(4X,'seconds',4X,'Mflop/s ')/
299
     &79('-') )
300
 9020 Format(/A20,2(1X,I4),1P,2(3X,E9.3,2X,E9.3) )
301
 9030 Format( A20,2(1X,I4),1P,2(3X,E9.3,2X,E9.3) )
302
 9040 Format( A43 )
303
! ----------------------------------------------------------------------
304
      End Program mod2a