Statistics
| Branch: | Revision:

root / synthbench / euroben-dm / mod2i / d_psrs.f @ 0:839f52ef7657

History | View | Annotate | Download (5.3 kB)

1
      Subroutine d_psrs( np, mx, keys, npnew )
2
! ----------------------------------------------------------------------
3
      Use      numerics
4
      Use      dist_module
5
      Implicit None
6

    
7
      Include 'mpif.h'
8
! ----------------------------------------------------------------------
9
! --- Parallel Sort by Regular Sampling algorithm as described by
10
!     Xiaobo Li, et. al.
11
!
12
! --- This implementation is based on the PSRS version for the 
13
!     HP/Convex SPP-1000 of C. Mobarry/GSFC & J. Crawford/VSEP
14
!     at NASA.
15
! --- Adapted to be used as a generic subroutine for 8-byte Real data.
16
!     In this Fortran 90 implementation no preprocessing is needed.
17
! --- npnew has been added to the parameter list to enable global
18
!     combining of the sorted list.
19
! --- A bug has been removed out of the merging routine that caused
20
!     the keys() array not to be rearranged.
21
! ----------------------------------------------------------------------
22
! --- keys() is the array of keys to be sorted.
23
!     w1() is a work array.
24
! ----------------------------------------------------------------------
25
      Integer  :: np, mx, npnew
26
      Real(l_) :: keys(mx)
27

    
28
      Real(l_) :: w1(mx)
29
      Integer  :: irnkl(mx)
30
      Integer  :: islen(nodes), irlen(nodes)
31

    
32
! --- fencepost index and key values for shuffle:
33
      Integer  :: fposts(nodes+1), gposts(nodes+1)
34
      Integer  :: itabr(nodes), itabl(nodes+1)
35
      Real(l_) :: work(nodes*(nodes+1))
36
      Real(l_) :: fpval(nodes+1)
37
      Real(l_) :: lmax, ak
38

    
39
      Integer  :: stat(MPI_Status_Size)
40
      Integer  :: comm, datyp1, datyp2
41
      Real(l_) :: buf(nodes)
42
      Integer  :: ier, itag
43
      Integer  :: i, j, k, step
44
! ----------------------------------------------------------------------
45
      itag   = 0
46
      datyp1 = MPI_Integer
47
      datyp2 = MPI_Real8
48
      comm   = MPI_Comm_World
49
! ----------------------------------------------------------------------
50
! --- Local sorts. Note that indx() is a local index on the process.
51

    
52
      w1 = keys
53
      Call dqsort( w1, np , 1, np )
54
      If ( nodes == 1 ) Then       ! --- Serial sort: we're done.
55
         keys  = w1
56
         npnew = np
57
         Return
58
      End If
59

    
60
! --- w1() is now the sorted keys.
61

    
62
      lmax = w1(np)
63

    
64
! --- Choose nodes evenly spaced values out of every bin.
65
!     Store them all in work().
66

    
67
      k    = 1
68
      step = np/nodes
69
      Do i = 1,nodes
70
         work(i) = w1(k)
71
         k       = k + step
72
      End Do
73

    
74
! --- work(1:nodes) are the sampled key values for the fenceposts.
75

    
76
      itag = itag + 1
77

    
78
      If ( me /= 0 ) Then
79
         Call MPI_Send( work, nodes, datyp2, 0, itag, comm, ier )
80
      Else
81
         Do i = 1, nodes - 1
82
            Call MPI_Recv( buf, nodes, datyp2, MPI_Any_Source,
83
     &                     itag, comm, stat, ier)
84
            Do j = 1, nodes
85
               work(stat(MPI_Source)*nodes+j) = buf(j)
86
            End Do
87
         End Do
88
      End If
89

    
90
! --- Insertion sort the fencepost values of keys and indexes.
91

    
92
      If ( me == 0 ) Then
93
         Do i = 2, nodes*nodes
94
            ak = work(i)
95
            Do j = i, 2, -1
96
               If ( work(j-1) <= ak ) Go To 10
97
               work(j) = work(j-1)
98
            End Do
99
            j = 1
100
   10       Continue
101
            work(j) = ak
102
         End Do
103

    
104
! --- After the insertion sort work() contains the sorted sampled
105
!     keys for the fenceposts. We put them in fpval and braodcast them.
106

    
107
         k = 1
108
         Do i = 1, nodes*nodes, nodes
109
            fpval(k) = work(i)
110
            k = k + 1
111
         End Do
112
      End If
113
      Call MPI_BCast( fpval, nodes+1, datyp2, 0, comm, ier )
114

    
115
      fpval(nodes+1) = lmax + 1
116

    
117
! --- Determine segment boundaries. Within each bin, fposts(i) is the
118
!     start of the i-th shuffle segment.
119

    
120
      fposts(1) = 1
121
      k         = 2
122
      Do i = 1, np
123

    
124
! --- The first element may be greater than several fencepost values,
125
!     so we must use a do-while loop.
126

    
127
         Do 
128
            If ( w1(i) < fpval(k) ) Exit
129
            fposts(k) = i
130
            k = k + 1
131
         End Do
132
      End Do
133

    
134
! --- The last element may not be greater than the last fencepost value,
135
!     so we must assign an appropriate value to every fencepost past the
136
!     last.
137

    
138
      Do i = k, nodes+1
139
         fposts(i) = np + 1
140
      End Do
141

    
142
! --- Every process needs fposts() values from every other process, so
143
!     we will give each process a copy of all the fposts's in work().
144

    
145
      Do i = 1, nodes
146
         islen(i) = fposts(i+1) - fposts(i)
147
      End Do
148

    
149
      Call MPI_Alltoall(islen, 1, datyp1, irlen, 1, datyp1, comm, ier )
150

    
151
! --- Make sure that "fposts" and "gposts" are zero based for
152
!     MPI_Alltoallv. fposts and gposts are the addresses of the segment
153
!     boundaries.
154

    
155
      fposts(1) = 0
156
      gposts(1) = 0
157
      Do i = 1, nodes
158
         fposts(i+1) = fposts(i) + islen(i)
159
         gposts(i+1) = gposts(i) + irlen(i)
160
      End Do
161

    
162
      npnew = gposts(nodes+1)
163

    
164
      Call MPI_Alltoallv(
165
     &     w1  , islen, fposts, datyp2,
166
     &     keys, irlen, gposts, datyp2,
167
     &     comm, ier )
168

    
169
!--- Set up the information for the merge:
170

    
171
      Do i = 1, nodes + 1
172
         itabl(i) = gposts(i)
173
      End Do
174

    
175
! --- Merge the segments within each bin.
176

    
177
      Call d_merge( npnew, nodes, mx, keys, irnkl, itabl, itabr )
178
! ----------------------------------------------------------------------
179
      End Subroutine d_psrs