root / synthbench / euroben-ports / base / Fortran-MPI / mod2a / .svn / text-base / mod2a.f.svn-base @ 0:839f52ef7657
History | View | Annotate | Download (11.1 kB)
1 |
Program mod2a |
---|---|
2 |
! ********************************************************************** |
3 |
! *** This program is a distributed-memory version of *** |
4 |
! *** EuroBen Benchmark program MOD2A (generalized matrix-vector *** |
5 |
! *** multiplication). *** |
6 |
! *** *** |
7 |
! *** Copyright: European Benchmark Group p/o *** |
8 |
! *** Computational Physics Dept. Utrecht *** |
9 |
! *** P.O. Box 80125 *** |
10 |
! *** 3508 TD Utrecht *** |
11 |
! *** The Netherlands *** |
12 |
! *** *** |
13 |
! *** Author of this program: Aad J. van der Steen *** |
14 |
! *** Date 04/29/1998 *** |
15 |
! *** Based on the shared-memory version of Ruud van der Pas, *** |
16 |
! *** 11/24/1988 *** |
17 |
! *** and Peter de Rijk , 02/01/1993 *** |
18 |
! *** Improved communication , 05/14/1996 *** |
19 |
! *** Total rework , 04/29/1998 *** |
20 |
! ********************************************************************** |
21 |
! --- Version 3.3 |
22 |
! |
23 |
! --- Purpose of program Mod2a |
24 |
! ------------------------ |
25 |
! This program measures the performance of the matrix-vector product |
26 |
! y = A x and y = A' x, for a full n x n matrix A with a vector |
27 |
! of length n. |
28 |
! Several variants are examined. |
29 |
! |
30 |
! --- Remarks |
31 |
! ------- |
32 |
! It is possible to measure the performance of a special library |
33 |
! routine for matrix vector multiplication. |
34 |
! If such a routine is available, the recipe is: |
35 |
! - Replace the string DGEMV in character string LNAME by the |
36 |
! appropriate name. |
37 |
! - Set parameter ILIB to a non-zero integer value. |
38 |
! - Activate in subroutine LIBVER the call to DGEMV or replace |
39 |
! this statement with the appropriate call to the library |
40 |
! routine. |
41 |
! Warning: To prevent making mistakes, there is NO source version of |
42 |
! DGEMV included in this program. |
43 |
! The default situation is ILIB=0 i.e. no library version |
44 |
! present! |
45 |
! |
46 |
! --- A EuroBen encapsulation routine is used for setting the network |
47 |
! (csetup). In addition, the EuroBen status routine 'state', and |
48 |
! the routine for accessing. Routine 'state' should be customized |
49 |
! for the machine at hand. |
50 |
! |
51 |
! --- Presently, the maximum number of nodes allowed is 2048. To |
52 |
! increase this bound, increase the value of parameter 'maxnod' in |
53 |
! in all relevant routines. |
54 |
! ---------------------------------------------------------------------- |
55 |
Use dist_module |
56 |
Use numerics |
57 |
Integer, Parameter :: nmax = 2500 |
58 |
Integer, Parameter :: ncases = 4, lda = nmax, mxcase = 50, |
59 |
& ilib = 0 |
60 |
|
61 |
Character :: modnam*8, text(ncases+1)*20, lname*43 |
62 |
Integer :: icase, info, ndim(mxcase) |
63 |
Integer :: i, j, k, n, m, mbase, nrep, ipoint, |
64 |
& lchk(nmax) |
65 |
Real(l_) :: tiperf((ncases+1)*mxcase,4), extime, perfor |
66 |
Real(l_) :: x(nmax), y(nmax), a(lda,nmax), wrk(nmax), |
67 |
& ychk(nmax) |
68 |
Real(l_) :: start_time, end_time |
69 |
|
70 |
External mvrpln, mvrur4, mvcpln, mvcur4 |
71 |
Data modnam / 'mod2a ' / |
72 |
|
73 |
! 123456789 123456789 1 |
74 |
Data text / 'Rows, plainly ' , |
75 |
& 'Rows, unrolled 4 ' , |
76 |
& 'Columns, plainly ' , |
77 |
& 'Columns, unrolled 4 ' , |
78 |
& 'Library routine(s) ' / |
79 |
! |
80 |
! 123456789 123456789 123456789 123456789 1234 |
81 |
Data lname / 'Library routine(s) used: DGEMV ' / |
82 |
! ---------------------------------------------------------------------- |
83 |
! --- Set up communication network and print test status. |
84 |
|
85 |
Call csetup |
86 |
If ( me == 0 ) Call state (modnam) |
87 |
start_time = MPI_Wtime() |
88 |
! ---------------------------------------------------------------------- |
89 |
! --- Open file containing the repetition factor and the problem size |
90 |
! for each problem. Read and compute until EOF (maximum no. of |
91 |
! cases allowed is mxcase = 50). The maximum problem size |
92 |
! nmax = 2500. |
93 |
|
94 |
icase = 0 |
95 |
Open( 1, File = 'mod2a.in' ) |
96 |
10 Read( 1, *, End = 20 ) nrep, n |
97 |
icase = icase + 1 |
98 |
If ( icase > mxcase ) Then |
99 |
If ( me == 0 ) Print *, 'More than mxcase = ', mxcase, |
100 |
& ': increase mxcase' |
101 |
Go To 20 |
102 |
End If |
103 |
ndim(icase) = n |
104 |
If ( n > nmax ) Then |
105 |
If ( me == 0 ) Print *, 'n > nmax = ', nmax, |
106 |
& ': increase nmax' |
107 |
Go To 20 |
108 |
End If |
109 |
! ---------------------------------------------------------------------- |
110 |
! --- Distribute a as evenly as possible over the available processors. |
111 |
|
112 |
Call evdist( n ) |
113 |
Call bsaddr |
114 |
m = sizes(me) |
115 |
|
116 |
! --- Generate the matrix A and vector x. |
117 |
|
118 |
Call matgen( a, x, lda, n ) |
119 |
! ---------------------------------------------------------------------- |
120 |
! --- For checking the correctness we generate the solution vector by |
121 |
! independent means: |
122 |
|
123 |
Call mkbnds( a, lda, ychk ) |
124 |
! ---------------------------------------------------------------------- |
125 |
! ** Row oriented variant |
126 |
ipoint = icase |
127 |
|
128 |
Call timing( 'N', m, n, a, lda, x, y, mvrpln, |
129 |
& nrep, extime, perfor, wrk ) |
130 |
|
131 |
! --- Check correctness: |
132 |
|
133 |
Call check( 'mvrpln', 'N', y, ychk, lchk ) |
134 |
|
135 |
tiperf(ipoint,1) = extime |
136 |
tiperf(ipoint,2) = perfor |
137 |
|
138 |
Call timing( 'T', m, n, a, lda, x, y, mvrpln, |
139 |
& nrep, extime, perfor, wrk ) |
140 |
|
141 |
! --- Check correctness: |
142 |
|
143 |
Call check( 'mvrpln', 'T', y, ychk, lchk ) |
144 |
! |
145 |
tiperf(ipoint,3) = extime |
146 |
tiperf(ipoint,4) = perfor |
147 |
! ---------------------------------------------------------------------- |
148 |
! ** Row oriented variant with loop unrolling |
149 |
ipoint = icase + mxcase |
150 |
|
151 |
Call timing( 'N', m, n, a, lda, x, y, mvrur4, |
152 |
& nrep, extime, perfor, wrk ) |
153 |
|
154 |
! --- Check correctness: |
155 |
|
156 |
Call check( 'mvrur4', 'N', y, ychk, lchk ) |
157 |
|
158 |
tiperf(ipoint,1) = extime |
159 |
tiperf(ipoint,2) = perfor |
160 |
|
161 |
Call timing( 'T', m, n, a, lda, x, y, mvrur4, |
162 |
& nrep, extime, perfor, wrk ) |
163 |
|
164 |
! --- Check correctness: |
165 |
|
166 |
Call check( 'mvrur4', 'T', y, ychk, lchk ) |
167 |
|
168 |
tiperf(ipoint,3) = extime |
169 |
tiperf(ipoint,4) = perfor |
170 |
! ---------------------------------------------------------------------- |
171 |
! ** Column oriented variant |
172 |
ipoint = icase + 2*mxcase |
173 |
|
174 |
Call timing( 'N', m, n, a, lda, x, y, mvcpln, |
175 |
& nrep, extime, perfor, wrk ) |
176 |
|
177 |
! --- Check correctness: |
178 |
|
179 |
Call check( 'mvcpln', 'N', y, ychk, lchk ) |
180 |
|
181 |
tiperf(ipoint,1) = extime |
182 |
tiperf(ipoint,2) = perfor |
183 |
|
184 |
Call timing( 'T', m, n, a, lda, x, y, mvcpln, |
185 |
& nrep, extime, perfor, wrk ) |
186 |
|
187 |
! --- Check correctness: |
188 |
|
189 |
Call check ( 'mvcpln', 'T', y, ychk, lchk ) |
190 |
|
191 |
tiperf(ipoint,3) = extime |
192 |
tiperf(ipoint,4) = perfor |
193 |
! ---------------------------------------------------------------------- |
194 |
! ** Column oriented variant with loop unrolling |
195 |
ipoint = icase + 3*mxcase |
196 |
|
197 |
Call timing( 'N', m, n, a, lda, x, y, mvcur4, |
198 |
& nrep, extime, perfor, wrk ) |
199 |
|
200 |
! --- Check correctness: |
201 |
|
202 |
Call check( 'mvcur4', 'N', y, ychk, lchk ) |
203 |
|
204 |
tiperf(ipoint,1) = extime |
205 |
tiperf(ipoint,2) = perfor |
206 |
|
207 |
Call timing( 'T', m, n, a, lda, x, y, mvcur4, |
208 |
& nrep, extime, perfor, wrk ) |
209 |
|
210 |
! --- Check correctness: |
211 |
|
212 |
Call check( 'mvcur4', 'T', y, ychk, lchk ) |
213 |
|
214 |
tiperf(ipoint,3) = extime |
215 |
tiperf(ipoint,4) = perfor |
216 |
! ---------------------------------------------------------------------- |
217 |
! ** Special library variant |
218 |
If ( ilib /= 0 ) Then |
219 |
ipoint = icase + 4*mxcase |
220 |
|
221 |
! --- Special library version of matrix vector multiplication |
222 |
|
223 |
Call libver( 'N', m, n, a, lda, x, y, |
224 |
& nrep, extime, perfor, wrk ) |
225 |
|
226 |
! --- Check correctness: |
227 |
|
228 |
Call check( 'libver', 'N', y, yck, lchk ) |
229 |
|
230 |
tiperf(ipoint,1) = extime |
231 |
tiperf(ipoint,2) = perfor |
232 |
|
233 |
Call libver( 'T', m, n, a, lda, x, y, |
234 |
& nrep, extime, perfor, wrk ) |
235 |
|
236 |
! --- Check correctness: |
237 |
|
238 |
Call check( 'libver', 'T', y, yck, lchk ) |
239 |
|
240 |
tiperf(ipoint,3) = extime |
241 |
tiperf(ipoint,4) = perfor |
242 |
EndIf |
243 |
! ---------------------------------------------------------------------- |
244 |
! >>> Get new case at label 10 <<< |
245 |
|
246 |
Go To 10 |
247 |
! ---------------------------------------------------------------------- |
248 |
! ** Print the results. |
249 |
20 If ( me == 0 ) Then |
250 |
Print 9010, nodes, nrep |
251 |
|
252 |
Do k = 1,ncases |
253 |
ipoint = (k-1)*mxcase + 1 |
254 |
Print 9020, text(k), ndim(1), ndim(1), |
255 |
& (tiperf(ipoint,j), j=1,4) |
256 |
Do i = 2, icase |
257 |
ipoint = ipoint + 1 |
258 |
Print 9030, ' ',ndim(i), ndim(i), (tiperf(ipoint,j), |
259 |
& j=1,4) |
260 |
End Do |
261 |
End Do |
262 |
! |
263 |
If ( ilib /= 0 ) Then |
264 |
ipoint = ncases*mxcase + 1 |
265 |
Print 9020, text(ncases+1), ndim(1), ndim(1), |
266 |
& (tiperf(ipoint,j), j=1,4) |
267 |
Do i = 2,nt |
268 |
ipoint = ipoint + 1 |
269 |
Print 9030, ' ', ndim(i), ndim(i), (tiperf(ipoint,j), |
270 |
& j=1,4) |
271 |
End Do |
272 |
Print 9040, lname |
273 |
End If |
274 |
End If |
275 |
|
276 |
end_time = MPI_Wtime() - start_time |
277 |
If (me == 0) Then |
278 |
Write(6,22) 'Walltime: ', end_time, " s" |
279 |
22 Format(A,F9.3,A) |
280 |
End If |
281 |
! ---------------------------------------------------------------------- |
282 |
! ** Exit network orderly. |
283 |
Call MPI_Finalize( info ) |
284 |
If ( me == 0 ) Then |
285 |
Write(6,*) 'Program terminated normally' |
286 |
End If |
287 |
! ---------------------------------------------------------------------- |
288 |
! ** Formats. |
289 |
9010 Format( //79('-')/ |
290 |
&'Generalized matrix-vector multiplication'/ |
291 |
&4X,'y := alpha*A*x + beta*y or', |
292 |
&3X,'y := alpha*A''*x + beta*y',/ |
293 |
&'Several variants are measured, No. of procs. = ', i3 / |
294 |
&79('-')// |
295 |
&'Initial repetition factor used in timings: ',I3// |
296 |
&'Variant, i.e. array',18X,'No Transpose',12X,'Transpose'/ |
297 |
&'A is accessed by',4X,4X,'M',4X,'N', |
298 |
& 2(4X,'seconds',4X,'Mflop/s ')/ |
299 |
&79('-') ) |
300 |
9020 Format(/A20,2(1X,I4),1P,2(3X,E9.3,2X,E9.3) ) |
301 |
9030 Format( A20,2(1X,I4),1P,2(3X,E9.3,2X,E9.3) ) |
302 |
9040 Format( A43 ) |
303 |
! ---------------------------------------------------------------------- |
304 |
End Program mod2a |