root / synthbench / euroben-dm / mod2i / .svn / text-base / i_psrs.f.svn-base @ 0:839f52ef7657
History | View | Annotate | Download (5.1 kB)
1 |
Subroutine i_psrs( np, mx, keys, npnew ) |
---|---|
2 |
! ---------------------------------------------------------------------- |
3 |
Use dist_module |
4 |
Implicit None |
5 |
|
6 |
Include 'mpif.h' |
7 |
! ---------------------------------------------------------------------- |
8 |
! --- Parallel Sort by Regular Sampling algorithm as described by |
9 |
! Xiaobo Li, et. al. |
10 |
! |
11 |
! --- This implementation is based on the PSRS version for the |
12 |
! HP/Convex SPP-1000 of C. Mobarry/GSFC & J. Crawford/VSEP |
13 |
! at NASA. |
14 |
! --- Adapted to be used as a generic subroutine for Integer data. |
15 |
! In this Fortran 90 implementation no preprocessing is needed. |
16 |
! --- npnew has been added to the parameter list to enable global |
17 |
! combining of the sorted list. |
18 |
! --- A bug has been removed out of the merging routine that caused |
19 |
! the keys() array not to be rearranged. |
20 |
! ---------------------------------------------------------------------- |
21 |
! --- keys() is the array of keys to be sorted. |
22 |
! w1() is a work array. |
23 |
! ---------------------------------------------------------------------- |
24 |
Integer :: np, mx, npnew |
25 |
Integer :: keys(mx) |
26 |
|
27 |
Integer :: w1(mx) |
28 |
Integer :: irnkl(mx) |
29 |
Integer :: islen(nodes), irlen(nodes) |
30 |
|
31 |
! --- Fencepost index and key values for shuffle: |
32 |
Integer :: fposts(nodes+1), gposts(nodes+1) |
33 |
Integer :: itabr(nodes), itabl(nodes+1) |
34 |
Integer :: work(nodes*(nodes+1)) |
35 |
Integer :: fpval(nodes+1) |
36 |
Integer :: lmax, ak |
37 |
|
38 |
Integer :: stat(MPI_Status_Size) |
39 |
Integer :: comm, datype |
40 |
Integer :: buf(nodes) |
41 |
Integer :: ier, itag |
42 |
Integer :: i, j, k, step |
43 |
! ---------------------------------------------------------------------- |
44 |
itag = 0 |
45 |
datype = MPI_Integer |
46 |
Comm = MPI_Comm_World |
47 |
! ---------------------------------------------------------------------- |
48 |
! --- Local sorts. |
49 |
|
50 |
w1 = keys |
51 |
Call iqsort( w1, np , 1, np ) |
52 |
If ( nodes == 1 ) Then ! --- Serial sort: we're done. |
53 |
keys = w1 |
54 |
npnew = np |
55 |
Return |
56 |
End If |
57 |
|
58 |
! --- w1() is now the sorted keys. |
59 |
|
60 |
lmax = w1(np) |
61 |
|
62 |
! --- Choose nproc evenly spaced values out of every bin. |
63 |
! Store them all in work(). |
64 |
|
65 |
k = 1 |
66 |
step = np/nodes |
67 |
Do i = 1,nodes |
68 |
work(i) = w1(k) |
69 |
k = k + step |
70 |
End Do |
71 |
|
72 |
! --- work(1:nodes) are the sampled key values for the fenceposts. |
73 |
|
74 |
itag = itag + 1 |
75 |
|
76 |
If ( me /= 0 ) Then |
77 |
Call MPI_Send( work, nodes, datype, 0, itag, comm, ier ) |
78 |
Else |
79 |
Do i = 1, nodes - 1 |
80 |
Call MPI_Recv( buf, nodes, datype, MPI_Any_Source, |
81 |
& itag, comm, stat, ier) |
82 |
Do j = 1, nodes |
83 |
work(stat(MPI_Source)*nodes+j) = buf(j) |
84 |
End Do |
85 |
End Do |
86 |
End If |
87 |
|
88 |
! --- Insertion sort the fencepost values of keys and indexes. |
89 |
|
90 |
If ( me == 0 ) Then |
91 |
Do i = 2, nodes*nodes |
92 |
ak = work(i) |
93 |
Do j = i, 2, -1 |
94 |
If ( work(j-1) <= ak ) Go To 10 |
95 |
work(j) = work(j-1) |
96 |
End Do |
97 |
j = 1 |
98 |
10 Continue |
99 |
work(j) = ak |
100 |
End Do |
101 |
|
102 |
! --- After the insertion sort work() contains the sorted sampled |
103 |
! keys for the fenceposts. We put them in fpval and braodcast them. |
104 |
|
105 |
k = 1 |
106 |
Do i = 1, nodes*nodes, nodes |
107 |
fpval(k) = work(i) |
108 |
k = k + 1 |
109 |
End Do |
110 |
End If |
111 |
Call MPI_BCast( fpval, nodes+1, datype, 0, comm, ier ) |
112 |
fpval(nodes+1) = lmax + 1 |
113 |
|
114 |
! --- Determine segment boundaries. Within each bin, fposts(i) is the |
115 |
! start of the i-th shuffle segment. |
116 |
|
117 |
fposts(1) = 1 |
118 |
k = 2 |
119 |
Do i = 1, np |
120 |
|
121 |
! --- The first element may be greater than several fencepost values, |
122 |
! so we must use a do-while loop. |
123 |
|
124 |
Do |
125 |
If ( w1(i) < fpval(k) ) Exit |
126 |
fposts(k) = i |
127 |
k = k + 1 |
128 |
End Do |
129 |
End Do |
130 |
|
131 |
! --- The last element may not be greater than the last fencepost value, |
132 |
! so we must assign an appropriate value to every fencepost past the |
133 |
! last. |
134 |
|
135 |
Do i = k, nodes + 1 |
136 |
fposts(i) = np + 1 |
137 |
End Do |
138 |
|
139 |
! --- Every process needs fposts() values from every other process, so |
140 |
! we will give each process a copy of all the fposts's in work(). |
141 |
|
142 |
Do i = 1, nodes |
143 |
islen(i) = fposts(i+1) - fposts(i) |
144 |
End Do |
145 |
|
146 |
Call MPI_Alltoall(islen, 1, datype, irlen, 1, datype, comm, ier ) |
147 |
|
148 |
! --- Make sure that "fposts" and "gposts" are zero based for |
149 |
! MPI_Alltoallv. fposts and gposts are the addresses of the segment |
150 |
! boundaries. |
151 |
|
152 |
fposts(1) = 0 |
153 |
gposts(1) = 0 |
154 |
Do i = 1, nodes |
155 |
fposts(i+1) = fposts(i) + islen(i) |
156 |
gposts(i+1) = gposts(i) + irlen(i) |
157 |
End Do |
158 |
|
159 |
npnew = gposts(nodes+1) |
160 |
|
161 |
Call MPI_Alltoallv( |
162 |
& w1 , islen, fposts, datype, |
163 |
& keys, irlen, gposts, datype, |
164 |
& comm, ier ) |
165 |
|
166 |
!--- Set up the information for the merge: |
167 |
|
168 |
Do i = 1, nodes + 1 |
169 |
itabl(i) = gposts(i) |
170 |
End Do |
171 |
|
172 |
! --- Merge the segments within each bin. |
173 |
|
174 |
Call i_merge( npnew, nodes, mx, keys, irnkl, itabl, itabr ) |
175 |
! ---------------------------------------------------------------------- |
176 |
End Subroutine i_psrs |