root / synthbench / euroben-dm / mod2i / d_psrs.f @ 0:839f52ef7657
History | View | Annotate | Download (5.3 kB)
1 |
Subroutine d_psrs( np, mx, keys, npnew ) |
---|---|
2 |
! ---------------------------------------------------------------------- |
3 |
Use numerics |
4 |
Use dist_module |
5 |
Implicit None |
6 |
|
7 |
Include 'mpif.h' |
8 |
! ---------------------------------------------------------------------- |
9 |
! --- Parallel Sort by Regular Sampling algorithm as described by |
10 |
! Xiaobo Li, et. al. |
11 |
! |
12 |
! --- This implementation is based on the PSRS version for the |
13 |
! HP/Convex SPP-1000 of C. Mobarry/GSFC & J. Crawford/VSEP |
14 |
! at NASA. |
15 |
! --- Adapted to be used as a generic subroutine for 8-byte Real data. |
16 |
! In this Fortran 90 implementation no preprocessing is needed. |
17 |
! --- npnew has been added to the parameter list to enable global |
18 |
! combining of the sorted list. |
19 |
! --- A bug has been removed out of the merging routine that caused |
20 |
! the keys() array not to be rearranged. |
21 |
! ---------------------------------------------------------------------- |
22 |
! --- keys() is the array of keys to be sorted. |
23 |
! w1() is a work array. |
24 |
! ---------------------------------------------------------------------- |
25 |
Integer :: np, mx, npnew |
26 |
Real(l_) :: keys(mx) |
27 |
|
28 |
Real(l_) :: w1(mx) |
29 |
Integer :: irnkl(mx) |
30 |
Integer :: islen(nodes), irlen(nodes) |
31 |
|
32 |
! --- fencepost index and key values for shuffle: |
33 |
Integer :: fposts(nodes+1), gposts(nodes+1) |
34 |
Integer :: itabr(nodes), itabl(nodes+1) |
35 |
Real(l_) :: work(nodes*(nodes+1)) |
36 |
Real(l_) :: fpval(nodes+1) |
37 |
Real(l_) :: lmax, ak |
38 |
|
39 |
Integer :: stat(MPI_Status_Size) |
40 |
Integer :: comm, datyp1, datyp2 |
41 |
Real(l_) :: buf(nodes) |
42 |
Integer :: ier, itag |
43 |
Integer :: i, j, k, step |
44 |
! ---------------------------------------------------------------------- |
45 |
itag = 0 |
46 |
datyp1 = MPI_Integer |
47 |
datyp2 = MPI_Real8 |
48 |
comm = MPI_Comm_World |
49 |
! ---------------------------------------------------------------------- |
50 |
! --- Local sorts. Note that indx() is a local index on the process. |
51 |
|
52 |
w1 = keys |
53 |
Call dqsort( w1, np , 1, np ) |
54 |
If ( nodes == 1 ) Then ! --- Serial sort: we're done. |
55 |
keys = w1 |
56 |
npnew = np |
57 |
Return |
58 |
End If |
59 |
|
60 |
! --- w1() is now the sorted keys. |
61 |
|
62 |
lmax = w1(np) |
63 |
|
64 |
! --- Choose nodes evenly spaced values out of every bin. |
65 |
! Store them all in work(). |
66 |
|
67 |
k = 1 |
68 |
step = np/nodes |
69 |
Do i = 1,nodes |
70 |
work(i) = w1(k) |
71 |
k = k + step |
72 |
End Do |
73 |
|
74 |
! --- work(1:nodes) are the sampled key values for the fenceposts. |
75 |
|
76 |
itag = itag + 1 |
77 |
|
78 |
If ( me /= 0 ) Then |
79 |
Call MPI_Send( work, nodes, datyp2, 0, itag, comm, ier ) |
80 |
Else |
81 |
Do i = 1, nodes - 1 |
82 |
Call MPI_Recv( buf, nodes, datyp2, MPI_Any_Source, |
83 |
& itag, comm, stat, ier) |
84 |
Do j = 1, nodes |
85 |
work(stat(MPI_Source)*nodes+j) = buf(j) |
86 |
End Do |
87 |
End Do |
88 |
End If |
89 |
|
90 |
! --- Insertion sort the fencepost values of keys and indexes. |
91 |
|
92 |
If ( me == 0 ) Then |
93 |
Do i = 2, nodes*nodes |
94 |
ak = work(i) |
95 |
Do j = i, 2, -1 |
96 |
If ( work(j-1) <= ak ) Go To 10 |
97 |
work(j) = work(j-1) |
98 |
End Do |
99 |
j = 1 |
100 |
10 Continue |
101 |
work(j) = ak |
102 |
End Do |
103 |
|
104 |
! --- After the insertion sort work() contains the sorted sampled |
105 |
! keys for the fenceposts. We put them in fpval and braodcast them. |
106 |
|
107 |
k = 1 |
108 |
Do i = 1, nodes*nodes, nodes |
109 |
fpval(k) = work(i) |
110 |
k = k + 1 |
111 |
End Do |
112 |
End If |
113 |
Call MPI_BCast( fpval, nodes+1, datyp2, 0, comm, ier ) |
114 |
|
115 |
fpval(nodes+1) = lmax + 1 |
116 |
|
117 |
! --- Determine segment boundaries. Within each bin, fposts(i) is the |
118 |
! start of the i-th shuffle segment. |
119 |
|
120 |
fposts(1) = 1 |
121 |
k = 2 |
122 |
Do i = 1, np |
123 |
|
124 |
! --- The first element may be greater than several fencepost values, |
125 |
! so we must use a do-while loop. |
126 |
|
127 |
Do |
128 |
If ( w1(i) < fpval(k) ) Exit |
129 |
fposts(k) = i |
130 |
k = k + 1 |
131 |
End Do |
132 |
End Do |
133 |
|
134 |
! --- The last element may not be greater than the last fencepost value, |
135 |
! so we must assign an appropriate value to every fencepost past the |
136 |
! last. |
137 |
|
138 |
Do i = k, nodes+1 |
139 |
fposts(i) = np + 1 |
140 |
End Do |
141 |
|
142 |
! --- Every process needs fposts() values from every other process, so |
143 |
! we will give each process a copy of all the fposts's in work(). |
144 |
|
145 |
Do i = 1, nodes |
146 |
islen(i) = fposts(i+1) - fposts(i) |
147 |
End Do |
148 |
|
149 |
Call MPI_Alltoall(islen, 1, datyp1, irlen, 1, datyp1, comm, ier ) |
150 |
|
151 |
! --- Make sure that "fposts" and "gposts" are zero based for |
152 |
! MPI_Alltoallv. fposts and gposts are the addresses of the segment |
153 |
! boundaries. |
154 |
|
155 |
fposts(1) = 0 |
156 |
gposts(1) = 0 |
157 |
Do i = 1, nodes |
158 |
fposts(i+1) = fposts(i) + islen(i) |
159 |
gposts(i+1) = gposts(i) + irlen(i) |
160 |
End Do |
161 |
|
162 |
npnew = gposts(nodes+1) |
163 |
|
164 |
Call MPI_Alltoallv( |
165 |
& w1 , islen, fposts, datyp2, |
166 |
& keys, irlen, gposts, datyp2, |
167 |
& comm, ier ) |
168 |
|
169 |
!--- Set up the information for the merge: |
170 |
|
171 |
Do i = 1, nodes + 1 |
172 |
itabl(i) = gposts(i) |
173 |
End Do |
174 |
|
175 |
! --- Merge the segments within each bin. |
176 |
|
177 |
Call d_merge( npnew, nodes, mx, keys, irnkl, itabl, itabr ) |
178 |
! ---------------------------------------------------------------------- |
179 |
End Subroutine d_psrs |