root / synthbench / mixedMode / src / pt_to_pt_pingping.f90
History | View | Annotate | Download (14.2 kB)
1 |
!-----------------------------------------------------------! |
---|---|
2 |
! Contains the point-to-point pingping mixed mode ! |
3 |
! OpenMP/MPI benchmarks. ! |
4 |
! This includes: -masteronly pingping ! |
5 |
! -funnelled pingping ! |
6 |
! -multiple pingping ! |
7 |
!-----------------------------------------------------------! |
8 |
MODULE pt_to_pt_pingPing |
9 |
|
10 |
use parallelEnvironment |
11 |
use omp_lib |
12 |
use benchmarkSetup |
13 |
use output |
14 |
|
15 |
implicit none |
16 |
|
17 |
!Module variables |
18 |
integer :: pingRankA, pingRankB !ranks of 2 pingping processes |
19 |
integer :: sizeofBuffer |
20 |
integer, dimension(:), allocatable :: pingSendBuf, pingRecvBuf |
21 |
integer, dimension(:), allocatable :: finalRecvBuf, testBuf |
22 |
|
23 |
CONTAINS |
24 |
|
25 |
!---------------------------------------------------------! |
26 |
! Subroutine: pingPing ! |
27 |
! ! |
28 |
! Driver subroutine for the pingping benchmark. ! |
29 |
!---------------------------------------------------------! |
30 |
SUBROUTINE pingPing(benchmarkType) |
31 |
integer, intent(in) :: benchmarkType |
32 |
integer :: dataSizeIter |
33 |
logical :: sameNode |
34 |
|
35 |
pingRankA = PPRanks(1) |
36 |
pingRankB = PPRanks(2) |
37 |
|
38 |
!Check if pingRankA and pingRankB are on the same node |
39 |
sameNode = compareProcNames(pingRankA, pingRankB) |
40 |
|
41 |
IF (myMPIRank == 0) THEN |
42 |
!print message saying if benchmark is inter or intra node |
43 |
CALL printNodeReport(sameNode,pingRankA,pingRankB) |
44 |
!..then print benchmark report column headings. |
45 |
CALL printBenchHeader() |
46 |
END IF |
47 |
|
48 |
!initialise totalReps to defaultReps |
49 |
totalReps = defaultReps |
50 |
|
51 |
!Start loop over data sizes |
52 |
dataSizeIter = minDataSize !initialise dataSizeIter |
53 |
DO WHILE (dataSizeIter <= maxDataSize) |
54 |
|
55 |
!set size of buffer |
56 |
sizeofBuffer = dataSizeIter * numThreads |
57 |
|
58 |
!Allocate space for main data arrays |
59 |
CALL allocateData(sizeofBuffer) |
60 |
|
61 |
!warm-up for benchmarkType |
62 |
IF (benchmarkType == MASTERONLY) THEN |
63 |
!Masteronly warmup sweep |
64 |
CALL masteronlyPingping(warmUpIters, dataSizeIter) |
65 |
ELSE IF (benchmarkType == FUNNELLED) THEN |
66 |
!funnelled warmup |
67 |
CALL funnelledPingping(warmUpIters, dataSizeIter) |
68 |
ELSE IF (benchmarkType == MULTIPLE) THEN |
69 |
!perform multiple pinping warmup |
70 |
CALL multiplePingping(warmUpIters, dataSizeIter) |
71 |
END IF |
72 |
|
73 |
!Perform verification test for pingping |
74 |
!this is only done by pingRankA and pingRankB |
75 |
CALL testPingping(sizeofBuffer, dataSizeIter) |
76 |
|
77 |
!Initialise the benchmark |
78 |
repsToDo = totalReps |
79 |
benchComplete = .false. |
80 |
!Execute benchmark until target time is reached |
81 |
DO WHILE (benchComplete .NEQV. .true.) |
82 |
!Start timer. |
83 |
CALL MPI_Barrier(comm, ierr) |
84 |
startTime = MPI_Wtime() |
85 |
|
86 |
!Execute benchmarkType for repsToDo repetitions |
87 |
IF (benchmarkType == MASTERONLY) THEN |
88 |
CALL masteronlyPingping(repsToDo, dataSizeIter) |
89 |
ELSE IF (benchmarkType == FUNNELLED) THEN |
90 |
CALL funnelledPingping(repsToDo, dataSizeIter) |
91 |
ELSE IF (benchmarkType == MULTIPLE) THEN |
92 |
CALL multiplePingping(repsToDo, dataSizeIter) |
93 |
END IF |
94 |
|
95 |
!Stop timer. |
96 |
CALL MPI_Barrier(comm, ierr) |
97 |
finishTime = MPI_Wtime() |
98 |
totalTime = finishTime - startTime |
99 |
|
100 |
!Test if target time was reached with number of |
101 |
!repetitions. |
102 |
benchComplete = repTimeCheck(totalTime, repsToDo) |
103 |
|
104 |
END DO !End of benchComplete loop |
105 |
|
106 |
!Master process sets benchmark results |
107 |
IF (myMPIRank == 0) THEN |
108 |
CALL setReportParams(dataSizeIter,repsToDo,totalTime) |
109 |
CALL printReport() |
110 |
END IF |
111 |
|
112 |
!Free allocated data |
113 |
CALL freeData() |
114 |
|
115 |
!Double dataSize and loop again |
116 |
dataSizeIter = dataSizeIter * 2 |
117 |
|
118 |
END DO !End loop over data sizes |
119 |
|
120 |
END SUBROUTINE pingPing |
121 |
|
122 |
!---------------------------------------------------------! |
123 |
!Subroutine: masteronlyPingPing ! |
124 |
! ! |
125 |
! Two processes send a message to each other using the ! |
126 |
! MPI_Isend, MPI_Recv and MPI_Wait routines. ! |
127 |
! Inter-process communication takes place outside of the ! |
128 |
! parallel region. ! |
129 |
!---------------------------------------------------------! |
130 |
SUBROUTINE masteronlyPingping(totalReps, dataSize) |
131 |
integer, intent(in) :: totalReps, dataSize |
132 |
integer :: repIter, i |
133 |
integer :: destRank |
134 |
|
135 |
!set destRank to ID of other process |
136 |
IF (myMPIRank == pingRankA) THEN |
137 |
destRank = pingRankB |
138 |
ELSE IF (myMPIRank == pingRankB) THEN |
139 |
destRank = pingRankA |
140 |
END IF |
141 |
|
142 |
DO repIter = 1, totalReps !loop totalReps times |
143 |
|
144 |
IF(myMPIRank == pingRankA .or. myMPIRank == pingRankB) THEN |
145 |
|
146 |
!Each thread writes its globalID to pingSendBuf |
147 |
!using a PARALLEL DO directive. |
148 |
!$OMP PARALLEL DO DEFAULT(NONE), & |
149 |
!$OMP PRIVATE(i), & |
150 |
!$OMP SHARED(pingSendBuf,dataSize,sizeofBuffer,globalIDarray), & |
151 |
!$OMP SCHEDULE(STATIC,dataSize) |
152 |
DO i = 1, sizeofBuffer |
153 |
pingSendBuf(i) = globalIDarray(myThreadID) |
154 |
END DO |
155 |
!$OMP END PARALLEL DO |
156 |
|
157 |
!Process calls non-blocking send to start transfer of |
158 |
!pingSendBuf to other process. |
159 |
CALL MPI_ISend(pingSendBuf, sizeofBuffer, MPI_INTEGER, & |
160 |
destRank, tag, comm, requestID, ierr) |
161 |
|
162 |
!Process then waits for message from other process. |
163 |
CALL MPI_Recv(pingRecvBuf, sizeofBuffer, MPI_INTEGER, & |
164 |
destRank, tag, comm, status, ierr) |
165 |
|
166 |
!Finish the Send operation with an MPI_Wait |
167 |
CALL MPI_Wait(requestID, status, ierr) |
168 |
|
169 |
!Each thread under the MPI process now reads its part |
170 |
!of the received buffer. |
171 |
!$OMP PARALLEL DO DEFAULT(NONE), & |
172 |
!$OMP PRIVATE(i), & |
173 |
!$OMP SHARED(finalRecvBuf,dataSize,sizeofBuffer,pingRecvBuf), & |
174 |
!$OMP SCHEDULE(STATIC,dataSize) |
175 |
DO i = 1, sizeofBuffer |
176 |
finalRecvBuf(i) = pingRecvBuf(i) |
177 |
END DO |
178 |
!$OMP END PARALLEL DO |
179 |
|
180 |
END IF |
181 |
|
182 |
END DO !End repetitions loop |
183 |
END SUBROUTINE masteronlyPingping |
184 |
|
185 |
!---------------------------------------------------------! |
186 |
!Subroutine: funnelledPingPing ! |
187 |
! ! |
188 |
! Two processes send a message to each other using the ! |
189 |
! MPI_Isend, MPI_Recv and MPI_Wait routines. ! |
190 |
! Inter-process communication takes place inside the ! |
191 |
! OpenMP parallel region. ! |
192 |
!---------------------------------------------------------! |
193 |
SUBROUTINE funnelledPingping(totalReps, dataSize) |
194 |
integer, intent(in) :: totalReps, dataSize |
195 |
integer :: repIter, i |
196 |
integer :: destRank |
197 |
|
198 |
!set destRank to ID of other process |
199 |
IF (myMPIRank == pingRankA) THEN |
200 |
destRank = pingRankB |
201 |
ELSE IF (myMPIRank == pingRankB) THEN |
202 |
destRank = pingRankA |
203 |
END IF |
204 |
|
205 |
!Open the parallel region |
206 |
!$OMP PARALLEL DEFAULT(NONE), & |
207 |
!$OMP PRIVATE(i,repIter), & |
208 |
!$OMP SHARED(dataSize,sizeofBuffer,pingSendBuf,globalIDarray), & |
209 |
!$OMP SHARED(pingRecvBuf,finalRecvBuf,status,requestID,ierr), & |
210 |
!$OMP SHARED(destRank,comm,myMPIRank,pingRankA,pingRankB,totalReps) |
211 |
|
212 |
DO repIter = 1, totalReps !loop totalRep times |
213 |
|
214 |
IF(myMPIRank == pingRankA .or. myMPIRank == pingRankB) THEN |
215 |
|
216 |
!Each threads write its globalID to its part of |
217 |
!pingSendBuf. |
218 |
!$OMP DO SCHEDULE(STATIC,dataSize) |
219 |
DO i = 1,sizeofBuffer |
220 |
pingSendBuf(i) = globalIDarray(myThreadID) |
221 |
END DO |
222 |
!$OMP END DO |
223 |
!Implicit barrier here takes care of necessary synchronisation. |
224 |
|
225 |
!$OMP MASTER |
226 |
!Master thread starts send of buffer |
227 |
CALL MPI_Isend(pingSendBuf, sizeofBuffer, MPI_INTEGER,& |
228 |
destRank, tag, comm, requestID, ierr) |
229 |
!then waits for message from other process. |
230 |
CALL MPI_Recv(pingRecvBuf, sizeofBuffer, MPI_INTEGER,& |
231 |
destRank, tag, comm, status, ierr) |
232 |
!Master thread then completes send using MPI_Wait. |
233 |
CALL MPI_Wait(requestID, status, ierr) |
234 |
!$OMP END MASTER |
235 |
|
236 |
!Barrier needed to ensure master thread has completed transfer. |
237 |
!$OMP BARRIER |
238 |
|
239 |
!Each thread reads its part of the received buffer |
240 |
!$OMP DO SCHEDULE(STATIC,dataSize) |
241 |
DO i = 1,sizeofBuffer |
242 |
finalRecvBuf(i) = pingRecvBuf(i) |
243 |
END DO |
244 |
!$OMP END DO |
245 |
|
246 |
END IF |
247 |
|
248 |
END DO !End repetitions loop |
249 |
!$OMP END PARALLEL |
250 |
|
251 |
END SUBROUTINE funnelledPingping |
252 |
|
253 |
!---------------------------------------------------------! |
254 |
! Subroutine: multiplePingping ! |
255 |
! ! |
256 |
! With this algorithmn multiple threads take place in the ! |
257 |
! communication and computation. ! |
258 |
! Each thread sends its portion of the pingSendBuf to the ! |
259 |
! other process using MPI_Isend/ MPI_Recv/ MPI_Wait ! |
260 |
! routines. ! |
261 |
!---------------------------------------------------------! |
262 |
SUBROUTINE multiplePingping(totalReps, dataSize) |
263 |
integer, intent(in) :: totalReps, dataSize |
264 |
integer :: repIter, i |
265 |
integer :: destRank |
266 |
integer :: lBound, uBound |
267 |
|
268 |
!set destRank to be ID of other process |
269 |
IF (myMPIRank == pingRankA) THEN |
270 |
destRank = pingRankB |
271 |
ELSE IF (myMPIRank == pingRankB) THEN |
272 |
destRank = pingRankA |
273 |
END IF |
274 |
|
275 |
!Open parallel region |
276 |
!$OMP PARALLEL DEFAULT(NONE), & |
277 |
!$OMP PRIVATE(i,lBound,uBound,requestID,status,ierr,repIter), & |
278 |
!$OMP SHARED(pingSendBuf,pingRecvBuf,finalRecvBuf,sizeofBuffer),& |
279 |
!$OMP SHARED(destRank,myMPIRank,pingRankA,pingRankB,totalReps),& |
280 |
!$OMP SHARED(dataSize,globalIDarray,comm) |
281 |
|
282 |
DO repIter = 1,totalReps !Loop totalReps times |
283 |
IF(myMPIRank == pingRankA .or. myMPIRank == pingRankB) THEN |
284 |
|
285 |
!Calculate lower and upper bound of each threads |
286 |
!portion of the data arrays |
287 |
lBound = ((myThreadID-1)* dataSize) + 1 |
288 |
uBound = (myThreadID * dataSize) |
289 |
|
290 |
!Each thread writes to its part of pingSendBuf |
291 |
!$OMP DO SCHEDULE(STATIC,dataSize) |
292 |
DO i = 1,sizeofBuffer |
293 |
pingSendBuf(i) = globalIDarray(myThreadID) |
294 |
END DO |
295 |
!$OMP END DO NOWAIT |
296 |
|
297 |
!Each thread starts send of dataSize items of |
298 |
!pingSendBuf to process with rank = destRank |
299 |
CALL MPI_Isend(pingSendBuf(lBound:uBound), dataSize,& |
300 |
MPI_INTEGER, destRank, myThreadID, comm,& |
301 |
requestID, ierr) |
302 |
!Thread then waits for message from destRank with |
303 |
!tag equal to its thread id. |
304 |
CALL MPI_Recv(pingRecvBuf(lBound:uBound), dataSize,& |
305 |
MPI_INTEGER, destRank, myThreadID, comm,& |
306 |
status, ierr) |
307 |
!Thread completes send using MPI_Wait |
308 |
CALL MPI_Wait(requestID, status, ierr) |
309 |
|
310 |
!Each thread reads its part of received buffer. |
311 |
!$OMP DO SCHEDULE(STATIC,dataSize) |
312 |
DO i = 1, sizeofBuffer |
313 |
finalRecvBuf(i) = pingRecvBuf(i) |
314 |
END DO |
315 |
!$OMP END DO NOWAIT |
316 |
|
317 |
END IF |
318 |
|
319 |
END DO !End repetitions loop |
320 |
!$OMP END PARALLEL |
321 |
|
322 |
END SUBROUTINE multiplePingping |
323 |
|
324 |
!---------------------------------------------------------! |
325 |
! Subroutine: allocateData ! |
326 |
! ! |
327 |
! Allocate memory for the main data arrays in pingping. ! |
328 |
!---------------------------------------------------------! |
329 |
SUBROUTINE allocateData(bufferSize) |
330 |
integer, intent(in) :: bufferSize |
331 |
|
332 |
allocate(pingSendBuf(bufferSize), pingRecvBuf(bufferSize)) |
333 |
allocate(finalRecvBuf(bufferSize)) |
334 |
|
335 |
END SUBROUTINE allocateData |
336 |
|
337 |
!---------------------------------------------------------! |
338 |
! Subroutine: freeData ! |
339 |
! ! |
340 |
! Free allocated memory for main data arrays. ! |
341 |
!---------------------------------------------------------! |
342 |
SUBROUTINE freeData() |
343 |
|
344 |
deallocate(pingSendBuf, pingRecvBuf) |
345 |
deallocate(finalRecvBuf) |
346 |
|
347 |
END SUBROUTINE freeData |
348 |
|
349 |
!---------------------------------------------------------! |
350 |
! Subroutine: testPingPing ! |
351 |
! ! |
352 |
! Verifies that the Ping Ping benchmark worked correctly. ! |
353 |
!---------------------------------------------------------! |
354 |
SUBROUTINE testPingPing(sizeofBuffer, dataSize) |
355 |
integer, intent(in) :: sizeofBuffer, dataSize |
356 |
integer :: otherPingRank, i |
357 |
logical :: testFlag, reduceFlag |
358 |
|
359 |
!set testFlag to true |
360 |
testFlag = .true. |
361 |
|
362 |
!Testing only needs to be done by pingRankA & pingRankB |
363 |
IF (myMPIRank == pingRankA .or. myMPIRank == pingRankB) THEN |
364 |
!allocate space for testBuf |
365 |
allocate(testBuf(sizeofBuffer)) |
366 |
|
367 |
!set the ID of other pingRank |
368 |
IF (myMPIRank == pingRankA) THEN |
369 |
otherPingRank = pingRankB |
370 |
ELSE IF (myMPIRank == pingRankB) THEN |
371 |
otherPingRank = pingRankA |
372 |
END IF |
373 |
|
374 |
!Construct testBuf with correct values |
375 |
!$OMP PARALLEL DO DEFAULT(NONE), & |
376 |
!$OMP PRIVATE(i), & |
377 |
!$OMP SHARED(otherPingRank,numThreads,dataSize), & |
378 |
!$OMP SHARED(sizeofBuffer,testBuf), & |
379 |
!$OMP SCHEDULE(STATIC,dataSize) |
380 |
DO i = 1, sizeofBuffer |
381 |
!calculate globalID of thread expected in finalRecvBuf |
382 |
!this is done by using otherPingRank |
383 |
testBuf(i) = (otherPingRank * numThreads) + myThreadID |
384 |
END DO |
385 |
!$OMP END PARALLEL DO |
386 |
|
387 |
!Compare each element of testBuf and finalRecvBuf |
388 |
DO i = 1, sizeofBuffer |
389 |
IF (testBuf(i) /= finalRecvBuf(i)) THEN |
390 |
testFlag = .false. |
391 |
END IF |
392 |
END DO |
393 |
|
394 |
!free space for testBuf |
395 |
deallocate(testBuf) |
396 |
|
397 |
END IF !End test loop |
398 |
|
399 |
!Reduce testFlag into master with logical AND logical |
400 |
CALL MPI_Reduce(testFlag, reduceFlag, 1, MPI_LOGICAL, & |
401 |
MPI_LAND, 0, comm, ierr) |
402 |
|
403 |
!master sets testOutcome flag |
404 |
IF (myMPIRank == 0) THEN |
405 |
CALL setTestOutcome(reduceFlag) |
406 |
END IF |
407 |
|
408 |
END SUBROUTINE testPingPing |
409 |
|
410 |
END MODULE pt_to_pt_pingPing |