root / synthbench / stream2 / stream2-mpi.f
History | View | Annotate | Download (6.2 kB)
1 | 0:839f52ef7657 | louridas | ************************************************* |
---|---|---|---|
2 | 0:839f52ef7657 | louridas | * Program: STREAM2 * |
3 | 0:839f52ef7657 | louridas | * Revision: 0.1, 99.10.26 * |
4 | 0:839f52ef7657 | louridas | * Author: John McCalpin * |
5 | 0:839f52ef7657 | louridas | * john@mccalpin.com * |
6 | 0:839f52ef7657 | louridas | ************************************************* |
7 | 0:839f52ef7657 | louridas | *----------------------------------------------------------------------- |
8 | 0:839f52ef7657 | louridas | * Copyright 1991-2003: John D. McCalpin |
9 | 0:839f52ef7657 | louridas | *----------------------------------------------------------------------- |
10 | 0:839f52ef7657 | louridas | * License: |
11 | 0:839f52ef7657 | louridas | * 1. You are free to use this program and/or to redistribute |
12 | 0:839f52ef7657 | louridas | * this program. |
13 | 0:839f52ef7657 | louridas | * 2. You are free to modify this program for your own use, |
14 | 0:839f52ef7657 | louridas | * including commercial use, subject to the publication |
15 | 0:839f52ef7657 | louridas | * restrictions in item 3. |
16 | 0:839f52ef7657 | louridas | * 3. You are free to publish results obtained from running this |
17 | 0:839f52ef7657 | louridas | * program, or from works that you derive from this program, |
18 | 0:839f52ef7657 | louridas | * with the following limitations: |
19 | 0:839f52ef7657 | louridas | * 3a. In order to be referred to as "STREAM2 benchmark results", |
20 | 0:839f52ef7657 | louridas | * published results must be in conformance to the STREAM |
21 | 0:839f52ef7657 | louridas | * Run Rules, (briefly reviewed below) published at |
22 | 0:839f52ef7657 | louridas | * http://www.cs.virginia.edu/stream/ref.html |
23 | 0:839f52ef7657 | louridas | * and incorporated herein by reference. |
24 | 0:839f52ef7657 | louridas | * As the copyright holder, John McCalpin retains the |
25 | 0:839f52ef7657 | louridas | * right to determine conformity with the Run Rules. |
26 | 0:839f52ef7657 | louridas | * 3b. Results based on modified source code or on runs not in |
27 | 0:839f52ef7657 | louridas | * accordance with the STREAM Run Rules must be clearly |
28 | 0:839f52ef7657 | louridas | * labelled whenever they are published. Examples of |
29 | 0:839f52ef7657 | louridas | * proper labelling include: |
30 | 0:839f52ef7657 | louridas | * "tuned STREAM2 benchmark results" |
31 | 0:839f52ef7657 | louridas | * "based on a variant of the STREAM2 benchmark code" |
32 | 0:839f52ef7657 | louridas | * Other comparable, clear and reasonable labelling is |
33 | 0:839f52ef7657 | louridas | * acceptable. |
34 | 0:839f52ef7657 | louridas | * 3c. Submission of results to the STREAM benchmark web site |
35 | 0:839f52ef7657 | louridas | * is encouraged, but not required. |
36 | 0:839f52ef7657 | louridas | * 4. Use of this program or creation of derived works based on this |
37 | 0:839f52ef7657 | louridas | * program constitutes acceptance of these licensing restrictions. |
38 | 0:839f52ef7657 | louridas | * 5. Absolutely no warranty is expressed or implied. |
39 | 0:839f52ef7657 | louridas | *----------------------------------------------------------------------- |
40 | 0:839f52ef7657 | louridas | ************************************************* |
41 | 0:839f52ef7657 | louridas | * This program measures sustained bandwidth * |
42 | 0:839f52ef7657 | louridas | * using four computational kernels: * |
43 | 0:839f52ef7657 | louridas | * * |
44 | 0:839f52ef7657 | louridas | * FILL: a(i) = 0 * |
45 | 0:839f52ef7657 | louridas | * COPY: a(i) = b(i) * |
46 | 0:839f52ef7657 | louridas | * DAXPY: a(i) = a(i) + q*b(i) * |
47 | 0:839f52ef7657 | louridas | * DOT: sum += a(i) * b(i) * |
48 | 0:839f52ef7657 | louridas | * * |
49 | 0:839f52ef7657 | louridas | * Results are presented in MB/s, assuming * |
50 | 0:839f52ef7657 | louridas | * 8 Bytes per iteration for FILL and SUM, * |
51 | 0:839f52ef7657 | louridas | * 16 Bytes per iteration for COPY, and * |
52 | 0:839f52ef7657 | louridas | * 24 Bytes per iteration for DAXPY * |
53 | 0:839f52ef7657 | louridas | ************************************************* |
54 | 0:839f52ef7657 | louridas | program stream2_mpi |
55 | 0:839f52ef7657 | louridas | IMPLICIT NONE |
56 | 0:839f52ef7657 | louridas | include 'mpif.h' |
57 | 0:839f52ef7657 | louridas | integer numtask, rank |
58 | 0:839f52ef7657 | louridas | integer rc, mpierr |
59 | 0:839f52ef7657 | louridas | DOUBLE PRECISION rc1 |
60 | 0:839f52ef7657 | louridas | |
61 | 0:839f52ef7657 | louridas | integer NMIN, NMAX, NTIMES, NUMSIZES |
62 | 0:839f52ef7657 | louridas | parameter (NMIN=30,NMAX=2 000 000) |
63 | 0:839f52ef7657 | louridas | parameter (NTIMES=10,NUMSIZES=32) |
64 | 0:839f52ef7657 | louridas | integer NPAD |
65 | 0:839f52ef7657 | louridas | parameter (NPAD=5) |
66 | 0:839f52ef7657 | louridas | |
67 | 0:839f52ef7657 | louridas | real*8 a(NMAX+NPAD),b(NMAX+NPAD) |
68 | 0:839f52ef7657 | louridas | real*8 time(4,NTIMES),mysecond,scalar,inner |
69 | 0:839f52ef7657 | louridas | real*8 sum,start,finish |
70 | 0:839f52ef7657 | louridas | real*8 rate(4),besttime(4),bytes(4),rsum(4) |
71 | 0:839f52ef7657 | louridas | real*8 exp,tdelta |
72 | 0:839f52ef7657 | louridas | logical ALLTIMES |
73 | 0:839f52ef7657 | louridas | integer i,j,k,l,M |
74 | 0:839f52ef7657 | louridas | external mysecond |
75 | 0:839f52ef7657 | louridas | |
76 | 0:839f52ef7657 | louridas | data bytes/8,16,24,8/ |
77 | 0:839f52ef7657 | louridas | data ALLTIMES/.false./ |
78 | 0:839f52ef7657 | louridas | |
79 | 0:839f52ef7657 | louridas | |
80 | 0:839f52ef7657 | louridas | * .. MPI Initialization .. |
81 | 0:839f52ef7657 | louridas | |
82 | 0:839f52ef7657 | louridas | call MPI_INIT ( rc ) |
83 | 0:839f52ef7657 | louridas | if ( rc .ne. 0 ) then |
84 | 0:839f52ef7657 | louridas | WRITE(*,*) ' MPI Initialization problem, error code: ',rc |
85 | 0:839f52ef7657 | louridas | stop |
86 | 0:839f52ef7657 | louridas | endif |
87 | 0:839f52ef7657 | louridas | call MPI_COMM_RANK ( MPI_COMM_WORLD, rank, rc ) |
88 | 0:839f52ef7657 | louridas | call MPI_COMM_SIZE ( MPI_COMM_WORLD, numtask, rc ) |
89 | 0:839f52ef7657 | louridas | |
90 | 0:839f52ef7657 | louridas | if ( rank .eq. 0 ) then |
91 | 0:839f52ef7657 | louridas | * check timer granularity |
92 | 0:839f52ef7657 | louridas | do i=1,min(10000,NMAX) |
93 | 0:839f52ef7657 | louridas | a(i) = 0.0d0 |
94 | 0:839f52ef7657 | louridas | end do |
95 | 0:839f52ef7657 | louridas | do i=1,min(10000,NMAX) |
96 | 0:839f52ef7657 | louridas | a(i) = mysecond() |
97 | 0:839f52ef7657 | louridas | end do |
98 | 0:839f52ef7657 | louridas | tdelta = 1.d36 |
99 | 0:839f52ef7657 | louridas | do i=1,min(10000,NMAX)-1 |
100 | 0:839f52ef7657 | louridas | if (a(i+1).ne.a(i)) then |
101 | 0:839f52ef7657 | louridas | tdelta = min(tdelta,abs(a(i+1)-a(i))) |
102 | 0:839f52ef7657 | louridas | end if |
103 | 0:839f52ef7657 | louridas | end do |
104 | 0:839f52ef7657 | louridas | print *,'Smallest time delta is ',tdelta |
105 | 0:839f52ef7657 | louridas | |
106 | 0:839f52ef7657 | louridas | |
107 | 0:839f52ef7657 | louridas | print *,' Size Iter FILL COPY DAXPY SUM' |
108 | 0:839f52ef7657 | louridas | endif |
109 | 0:839f52ef7657 | louridas | |
110 | 0:839f52ef7657 | louridas | * Loop over problem size |
111 | 0:839f52ef7657 | louridas | do j=1,NUMSIZES |
112 | 0:839f52ef7657 | louridas | exp = log10(dble(NMIN)) + dble(j-1)/dble(NUMSIZES-1)* |
113 | 0:839f52ef7657 | louridas | $ (log10(dble(NMAX))-log10(dble(NMIN))) |
114 | 0:839f52ef7657 | louridas | M = NINT(10.**exp) |
115 | 0:839f52ef7657 | louridas | |
116 | 0:839f52ef7657 | louridas | * Initialize Arrays |
117 | 0:839f52ef7657 | louridas | |
118 | 0:839f52ef7657 | louridas | do i=1,M |
119 | 0:839f52ef7657 | louridas | a(i) = 0.0d0 |
120 | 0:839f52ef7657 | louridas | b(i) = 0.0d0 |
121 | 0:839f52ef7657 | louridas | end do |
122 | 0:839f52ef7657 | louridas | |
123 | 0:839f52ef7657 | louridas | do k=1,NTIMES |
124 | 0:839f52ef7657 | louridas | inner = NMAX/M |
125 | 0:839f52ef7657 | louridas | |
126 | 0:839f52ef7657 | louridas | call MPI_BARRIER( MPI_COMM_WORLD, rc) |
127 | 0:839f52ef7657 | louridas | |
128 | 0:839f52ef7657 | louridas | start = mysecond() |
129 | 0:839f52ef7657 | louridas | do l=1,inner |
130 | 0:839f52ef7657 | louridas | scalar = dble(k+l) |
131 | 0:839f52ef7657 | louridas | do i=1,M |
132 | 0:839f52ef7657 | louridas | a(i) = scalar |
133 | 0:839f52ef7657 | louridas | end do |
134 | 0:839f52ef7657 | louridas | end do |
135 | 0:839f52ef7657 | louridas | finish = mysecond() |
136 | 0:839f52ef7657 | louridas | time(1,k) = (finish-start)/dble(inner) |
137 | 0:839f52ef7657 | louridas | |
138 | 0:839f52ef7657 | louridas | call MPI_BARRIER( MPI_COMM_WORLD, rc) |
139 | 0:839f52ef7657 | louridas | |
140 | 0:839f52ef7657 | louridas | start = mysecond() |
141 | 0:839f52ef7657 | louridas | do l=1,inner |
142 | 0:839f52ef7657 | louridas | a(l) = 1.0d0 |
143 | 0:839f52ef7657 | louridas | do i=1,M |
144 | 0:839f52ef7657 | louridas | b(i) = a(i) |
145 | 0:839f52ef7657 | louridas | end do |
146 | 0:839f52ef7657 | louridas | end do |
147 | 0:839f52ef7657 | louridas | finish = mysecond() |
148 | 0:839f52ef7657 | louridas | time(2,k) = (finish-start)/dble(inner) |
149 | 0:839f52ef7657 | louridas | |
150 | 0:839f52ef7657 | louridas | call MPI_BARRIER( MPI_COMM_WORLD, rc) |
151 | 0:839f52ef7657 | louridas | |
152 | 0:839f52ef7657 | louridas | start = mysecond() |
153 | 0:839f52ef7657 | louridas | do l=1,inner |
154 | 0:839f52ef7657 | louridas | a(l) = 1.0d0 |
155 | 0:839f52ef7657 | louridas | do i=1,M |
156 | 0:839f52ef7657 | louridas | b(i) = b(i) + scalar*a(i) |
157 | 0:839f52ef7657 | louridas | end do |
158 | 0:839f52ef7657 | louridas | end do |
159 | 0:839f52ef7657 | louridas | finish = mysecond() |
160 | 0:839f52ef7657 | louridas | time(3,k) = (finish-start)/dble(inner) |
161 | 0:839f52ef7657 | louridas | |
162 | 0:839f52ef7657 | louridas | call MPI_BARRIER( MPI_COMM_WORLD, rc) |
163 | 0:839f52ef7657 | louridas | |
164 | 0:839f52ef7657 | louridas | start = mysecond() |
165 | 0:839f52ef7657 | louridas | do l=1,inner |
166 | 0:839f52ef7657 | louridas | b(l) = 1.0d0 |
167 | 0:839f52ef7657 | louridas | sum = 0.0d0 |
168 | 0:839f52ef7657 | louridas | do i=1,M |
169 | 0:839f52ef7657 | louridas | sum = sum + a(i)*b(i) |
170 | 0:839f52ef7657 | louridas | end do |
171 | 0:839f52ef7657 | louridas | end do |
172 | 0:839f52ef7657 | louridas | finish = mysecond() |
173 | 0:839f52ef7657 | louridas | time(4,k) = (finish-start)/dble(inner) |
174 | 0:839f52ef7657 | louridas | |
175 | 0:839f52ef7657 | louridas | call MPI_BARRIER( MPI_COMM_WORLD, rc) |
176 | 0:839f52ef7657 | louridas | |
177 | 0:839f52ef7657 | louridas | end do |
178 | 0:839f52ef7657 | louridas | |
179 | 0:839f52ef7657 | louridas | do i=1,4 |
180 | 0:839f52ef7657 | louridas | besttime(i) = 1.d+36 |
181 | 0:839f52ef7657 | louridas | do k=1,NTIMES |
182 | 0:839f52ef7657 | louridas | besttime(i) = min(besttime(i),time(i,k)) |
183 | 0:839f52ef7657 | louridas | if (ALLTIMES) print *,i,k,time(i,k) |
184 | 0:839f52ef7657 | louridas | end do |
185 | 0:839f52ef7657 | louridas | rate(i) = dble(M)* bytes(i)/besttime(i) / 1.d6 |
186 | 0:839f52ef7657 | louridas | end do |
187 | 0:839f52ef7657 | louridas | |
188 | 0:839f52ef7657 | louridas | call MPI_REDUCE(rate, rsum,4, MPI_REAL8, MPI_SUM, 0 |
189 | 0:839f52ef7657 | louridas | $ , MPI_COMM_WORLD, mpierr) |
190 | 0:839f52ef7657 | louridas | if ( rank .eq. 0 ) then |
191 | 0:839f52ef7657 | louridas | write (*,1) M,NTIMES,rsum(1)/numtask,rsum(2)/numtask, |
192 | 0:839f52ef7657 | louridas | $ rsum(3)/numtask,rsum(4)/numtask, tdelta/besttime(1) |
193 | 0:839f52ef7657 | louridas | |
194 | 0:839f52ef7657 | louridas | open (unit=3,form='unformatted') |
195 | 0:839f52ef7657 | louridas | write (3) sum |
196 | 0:839f52ef7657 | louridas | close (unit=3) |
197 | 0:839f52ef7657 | louridas | endif |
198 | 0:839f52ef7657 | louridas | end do |
199 | 0:839f52ef7657 | louridas | |
200 | 0:839f52ef7657 | louridas | 1 format (1x,i8,2x,i4,1x,5(f8.1,2x)) |
201 | 0:839f52ef7657 | louridas | |
202 | 0:839f52ef7657 | louridas | call MPI_FINALIZE ( rc ) |
203 | 0:839f52ef7657 | louridas | |
204 | 0:839f52ef7657 | louridas | end |