Statistics
| Branch: | Revision:

root / synthbench / stream2 / stream2-mpi.f

History | View | Annotate | Download (6.2 kB)

1 0:839f52ef7657 louridas
*************************************************
2 0:839f52ef7657 louridas
* Program:  STREAM2                             *
3 0:839f52ef7657 louridas
* Revision: 0.1, 99.10.26                       *
4 0:839f52ef7657 louridas
* Author:   John McCalpin                       *
5 0:839f52ef7657 louridas
*           john@mccalpin.com                   *
6 0:839f52ef7657 louridas
*************************************************
7 0:839f52ef7657 louridas
*-----------------------------------------------------------------------
8 0:839f52ef7657 louridas
* Copyright 1991-2003: John D. McCalpin
9 0:839f52ef7657 louridas
*-----------------------------------------------------------------------
10 0:839f52ef7657 louridas
* License:
11 0:839f52ef7657 louridas
*  1. You are free to use this program and/or to redistribute
12 0:839f52ef7657 louridas
*     this program.
13 0:839f52ef7657 louridas
*  2. You are free to modify this program for your own use,
14 0:839f52ef7657 louridas
*     including commercial use, subject to the publication
15 0:839f52ef7657 louridas
*     restrictions in item 3.
16 0:839f52ef7657 louridas
*  3. You are free to publish results obtained from running this
17 0:839f52ef7657 louridas
*     program, or from works that you derive from this program,
18 0:839f52ef7657 louridas
*     with the following limitations:
19 0:839f52ef7657 louridas
*     3a. In order to be referred to as "STREAM2 benchmark results",
20 0:839f52ef7657 louridas
*         published results must be in conformance to the STREAM
21 0:839f52ef7657 louridas
*         Run Rules, (briefly reviewed below) published at
22 0:839f52ef7657 louridas
*         http://www.cs.virginia.edu/stream/ref.html
23 0:839f52ef7657 louridas
*         and incorporated herein by reference.
24 0:839f52ef7657 louridas
*         As the copyright holder, John McCalpin retains the
25 0:839f52ef7657 louridas
*         right to determine conformity with the Run Rules.
26 0:839f52ef7657 louridas
*     3b. Results based on modified source code or on runs not in
27 0:839f52ef7657 louridas
*         accordance with the STREAM Run Rules must be clearly
28 0:839f52ef7657 louridas
*         labelled whenever they are published.  Examples of
29 0:839f52ef7657 louridas
*         proper labelling include:
30 0:839f52ef7657 louridas
*         "tuned STREAM2 benchmark results"
31 0:839f52ef7657 louridas
*         "based on a variant of the STREAM2 benchmark code"
32 0:839f52ef7657 louridas
*         Other comparable, clear and reasonable labelling is
33 0:839f52ef7657 louridas
*         acceptable.
34 0:839f52ef7657 louridas
*     3c. Submission of results to the STREAM benchmark web site
35 0:839f52ef7657 louridas
*         is encouraged, but not required.
36 0:839f52ef7657 louridas
*  4. Use of this program or creation of derived works based on this
37 0:839f52ef7657 louridas
*     program constitutes acceptance of these licensing restrictions.
38 0:839f52ef7657 louridas
*  5. Absolutely no warranty is expressed or implied.
39 0:839f52ef7657 louridas
*-----------------------------------------------------------------------
40 0:839f52ef7657 louridas
*************************************************
41 0:839f52ef7657 louridas
* This program measures sustained bandwidth     *
42 0:839f52ef7657 louridas
* using four computational kernels:             *
43 0:839f52ef7657 louridas
*                                               *
44 0:839f52ef7657 louridas
*       FILL:   a(i) = 0                        *
45 0:839f52ef7657 louridas
*       COPY:   a(i) = b(i)                     *
46 0:839f52ef7657 louridas
*       DAXPY:  a(i) = a(i) + q*b(i)            *
47 0:839f52ef7657 louridas
*       DOT:    sum += a(i) * b(i)              *
48 0:839f52ef7657 louridas
*                                               *
49 0:839f52ef7657 louridas
* Results are presented in MB/s, assuming       *
50 0:839f52ef7657 louridas
*   8 Bytes per iteration for FILL and SUM,     *
51 0:839f52ef7657 louridas
*  16 Bytes per iteration for COPY, and         *
52 0:839f52ef7657 louridas
*  24 Bytes per iteration for DAXPY             *
53 0:839f52ef7657 louridas
*************************************************
54 0:839f52ef7657 louridas
	program stream2_mpi
55 0:839f52ef7657 louridas
	IMPLICIT NONE
56 0:839f52ef7657 louridas
        include 'mpif.h'
57 0:839f52ef7657 louridas
        integer numtask, rank
58 0:839f52ef7657 louridas
        integer rc, mpierr
59 0:839f52ef7657 louridas
        DOUBLE PRECISION rc1
60 0:839f52ef7657 louridas
61 0:839f52ef7657 louridas
	integer NMIN, NMAX, NTIMES, NUMSIZES
62 0:839f52ef7657 louridas
	parameter (NMIN=30,NMAX=2 000 000)
63 0:839f52ef7657 louridas
	parameter (NTIMES=10,NUMSIZES=32)
64 0:839f52ef7657 louridas
	integer NPAD
65 0:839f52ef7657 louridas
	parameter (NPAD=5)
66 0:839f52ef7657 louridas
67 0:839f52ef7657 louridas
	real*8 a(NMAX+NPAD),b(NMAX+NPAD)
68 0:839f52ef7657 louridas
	real*8 time(4,NTIMES),mysecond,scalar,inner
69 0:839f52ef7657 louridas
	real*8 sum,start,finish
70 0:839f52ef7657 louridas
	real*8 rate(4),besttime(4),bytes(4),rsum(4)
71 0:839f52ef7657 louridas
	real*8 exp,tdelta
72 0:839f52ef7657 louridas
	logical ALLTIMES
73 0:839f52ef7657 louridas
	integer i,j,k,l,M
74 0:839f52ef7657 louridas
	external mysecond
75 0:839f52ef7657 louridas
76 0:839f52ef7657 louridas
	data bytes/8,16,24,8/
77 0:839f52ef7657 louridas
	data ALLTIMES/.false./
78 0:839f52ef7657 louridas
79 0:839f52ef7657 louridas
80 0:839f52ef7657 louridas
*     .. MPI Initialization ..
81 0:839f52ef7657 louridas
82 0:839f52ef7657 louridas
      call MPI_INIT ( rc )
83 0:839f52ef7657 louridas
      if ( rc .ne. 0 ) then
84 0:839f52ef7657 louridas
         WRITE(*,*) ' MPI Initialization problem, error code: ',rc
85 0:839f52ef7657 louridas
         stop
86 0:839f52ef7657 louridas
      endif
87 0:839f52ef7657 louridas
      call MPI_COMM_RANK ( MPI_COMM_WORLD, rank, rc )
88 0:839f52ef7657 louridas
      call MPI_COMM_SIZE ( MPI_COMM_WORLD, numtask, rc )
89 0:839f52ef7657 louridas
90 0:839f52ef7657 louridas
	if ( rank .eq. 0 ) then
91 0:839f52ef7657 louridas
* check timer granularity
92 0:839f52ef7657 louridas
	do i=1,min(10000,NMAX)
93 0:839f52ef7657 louridas
	    a(i) = 0.0d0
94 0:839f52ef7657 louridas
	end do
95 0:839f52ef7657 louridas
	do i=1,min(10000,NMAX)
96 0:839f52ef7657 louridas
	    a(i) = mysecond()
97 0:839f52ef7657 louridas
	end do
98 0:839f52ef7657 louridas
	tdelta = 1.d36
99 0:839f52ef7657 louridas
	do i=1,min(10000,NMAX)-1
100 0:839f52ef7657 louridas
	    if (a(i+1).ne.a(i)) then
101 0:839f52ef7657 louridas
		tdelta = min(tdelta,abs(a(i+1)-a(i)))
102 0:839f52ef7657 louridas
	    end if
103 0:839f52ef7657 louridas
	end do
104 0:839f52ef7657 louridas
	print *,'Smallest time delta is ',tdelta
105 0:839f52ef7657 louridas
106 0:839f52ef7657 louridas
107 0:839f52ef7657 louridas
	print *,'    Size  Iter     FILL      COPY     DAXPY       SUM'
108 0:839f52ef7657 louridas
	endif
109 0:839f52ef7657 louridas
110 0:839f52ef7657 louridas
* Loop over problem size
111 0:839f52ef7657 louridas
	do j=1,NUMSIZES
112 0:839f52ef7657 louridas
	    exp = log10(dble(NMIN)) + dble(j-1)/dble(NUMSIZES-1)*
113 0:839f52ef7657 louridas
     $          (log10(dble(NMAX))-log10(dble(NMIN)))
114 0:839f52ef7657 louridas
	    M = NINT(10.**exp)
115 0:839f52ef7657 louridas
116 0:839f52ef7657 louridas
* Initialize Arrays
117 0:839f52ef7657 louridas
118 0:839f52ef7657 louridas
	    do i=1,M
119 0:839f52ef7657 louridas
	        a(i) = 0.0d0
120 0:839f52ef7657 louridas
	        b(i) = 0.0d0
121 0:839f52ef7657 louridas
	    end do
122 0:839f52ef7657 louridas
123 0:839f52ef7657 louridas
	    do k=1,NTIMES
124 0:839f52ef7657 louridas
		inner = NMAX/M
125 0:839f52ef7657 louridas
126 0:839f52ef7657 louridas
            call MPI_BARRIER( MPI_COMM_WORLD, rc)
127 0:839f52ef7657 louridas
128 0:839f52ef7657 louridas
	        start = mysecond()
129 0:839f52ef7657 louridas
		do l=1,inner
130 0:839f52ef7657 louridas
	            scalar = dble(k+l)
131 0:839f52ef7657 louridas
	            do i=1,M
132 0:839f52ef7657 louridas
	    	        a(i) = scalar
133 0:839f52ef7657 louridas
	            end do
134 0:839f52ef7657 louridas
	        end do
135 0:839f52ef7657 louridas
	        finish = mysecond()
136 0:839f52ef7657 louridas
	        time(1,k) = (finish-start)/dble(inner)
137 0:839f52ef7657 louridas
138 0:839f52ef7657 louridas
            call MPI_BARRIER( MPI_COMM_WORLD, rc)
139 0:839f52ef7657 louridas
140 0:839f52ef7657 louridas
	        start = mysecond()
141 0:839f52ef7657 louridas
		do l=1,inner
142 0:839f52ef7657 louridas
                    a(l) = 1.0d0
143 0:839f52ef7657 louridas
	            do i=1,M
144 0:839f52ef7657 louridas
		        b(i) = a(i)
145 0:839f52ef7657 louridas
	            end do
146 0:839f52ef7657 louridas
	        end do
147 0:839f52ef7657 louridas
	        finish = mysecond()
148 0:839f52ef7657 louridas
	        time(2,k) = (finish-start)/dble(inner)
149 0:839f52ef7657 louridas
150 0:839f52ef7657 louridas
            call MPI_BARRIER( MPI_COMM_WORLD, rc)
151 0:839f52ef7657 louridas
152 0:839f52ef7657 louridas
	        start = mysecond()
153 0:839f52ef7657 louridas
		do l=1,inner
154 0:839f52ef7657 louridas
                    a(l) = 1.0d0
155 0:839f52ef7657 louridas
	            do i=1,M
156 0:839f52ef7657 louridas
		        b(i) = b(i) + scalar*a(i)
157 0:839f52ef7657 louridas
	            end do
158 0:839f52ef7657 louridas
	        end do
159 0:839f52ef7657 louridas
	        finish = mysecond()
160 0:839f52ef7657 louridas
	        time(3,k) = (finish-start)/dble(inner)
161 0:839f52ef7657 louridas
162 0:839f52ef7657 louridas
            call MPI_BARRIER( MPI_COMM_WORLD, rc)
163 0:839f52ef7657 louridas
164 0:839f52ef7657 louridas
	        start = mysecond()
165 0:839f52ef7657 louridas
		do l=1,inner
166 0:839f52ef7657 louridas
                    b(l) = 1.0d0
167 0:839f52ef7657 louridas
	            sum = 0.0d0
168 0:839f52ef7657 louridas
	            do i=1,M
169 0:839f52ef7657 louridas
		        sum = sum + a(i)*b(i)
170 0:839f52ef7657 louridas
	            end do
171 0:839f52ef7657 louridas
	        end do
172 0:839f52ef7657 louridas
	        finish = mysecond()
173 0:839f52ef7657 louridas
	        time(4,k) = (finish-start)/dble(inner)
174 0:839f52ef7657 louridas
175 0:839f52ef7657 louridas
            call MPI_BARRIER( MPI_COMM_WORLD, rc)
176 0:839f52ef7657 louridas
177 0:839f52ef7657 louridas
	    end do
178 0:839f52ef7657 louridas
179 0:839f52ef7657 louridas
	    do i=1,4
180 0:839f52ef7657 louridas
		besttime(i) = 1.d+36
181 0:839f52ef7657 louridas
	        do k=1,NTIMES
182 0:839f52ef7657 louridas
	            besttime(i) = min(besttime(i),time(i,k))
183 0:839f52ef7657 louridas
		    if (ALLTIMES) print *,i,k,time(i,k)
184 0:839f52ef7657 louridas
	        end do
185 0:839f52ef7657 louridas
	        rate(i) = dble(M)* bytes(i)/besttime(i) / 1.d6
186 0:839f52ef7657 louridas
	    end do
187 0:839f52ef7657 louridas
188 0:839f52ef7657 louridas
	    call MPI_REDUCE(rate, rsum,4, MPI_REAL8, MPI_SUM, 0
189 0:839f52ef7657 louridas
     $ , MPI_COMM_WORLD, mpierr)
190 0:839f52ef7657 louridas
	    if ( rank .eq. 0 ) then
191 0:839f52ef7657 louridas
	    write (*,1) M,NTIMES,rsum(1)/numtask,rsum(2)/numtask,
192 0:839f52ef7657 louridas
     $ rsum(3)/numtask,rsum(4)/numtask, tdelta/besttime(1)
193 0:839f52ef7657 louridas
194 0:839f52ef7657 louridas
	    open (unit=3,form='unformatted')
195 0:839f52ef7657 louridas
	    write (3) sum
196 0:839f52ef7657 louridas
	    close (unit=3)
197 0:839f52ef7657 louridas
	    endif
198 0:839f52ef7657 louridas
	end do
199 0:839f52ef7657 louridas
200 0:839f52ef7657 louridas
    1	format (1x,i8,2x,i4,1x,5(f8.1,2x))
201 0:839f52ef7657 louridas
202 0:839f52ef7657 louridas
        call MPI_FINALIZE ( rc )
203 0:839f52ef7657 louridas
204 0:839f52ef7657 louridas
	end