Statistics
| Branch: | Revision:

root / synthbench / euroben-dm / mod2i / .svn / text-base / i_psrs.f.svn-base @ 0:839f52ef7657

History | View | Annotate | Download (5.1 kB)

1
      Subroutine i_psrs( np, mx, keys, npnew )
2
! ----------------------------------------------------------------------
3
      Use      dist_module
4
      Implicit None
5

    
6
      Include 'mpif.h'
7
! ----------------------------------------------------------------------
8
! --- Parallel Sort by Regular Sampling algorithm as described by
9
!     Xiaobo Li, et. al.
10
!
11
! --- This implementation is based on the PSRS version for the
12
!     HP/Convex SPP-1000 of C. Mobarry/GSFC & J. Crawford/VSEP
13
!     at NASA.
14
! --- Adapted to be used as a generic subroutine for Integer data.
15
!     In this Fortran 90 implementation no preprocessing is needed.
16
! --- npnew has been added to the parameter list to enable global
17
!     combining of the sorted list.
18
! --- A bug has been removed out of the merging routine that caused
19
!     the keys() array not to be rearranged.
20
! ----------------------------------------------------------------------
21
! --- keys() is the array of keys to be sorted.
22
!     w1() is a work array.
23
! ----------------------------------------------------------------------
24
      Integer  :: np, mx, npnew
25
      Integer  :: keys(mx)
26

    
27
      Integer  :: w1(mx)
28
      Integer  :: irnkl(mx)
29
      Integer  :: islen(nodes), irlen(nodes)
30

    
31
! --- Fencepost index and key values for shuffle:
32
      Integer  :: fposts(nodes+1), gposts(nodes+1)
33
      Integer  :: itabr(nodes), itabl(nodes+1)
34
      Integer  :: work(nodes*(nodes+1))
35
      Integer  :: fpval(nodes+1)
36
      Integer  :: lmax, ak
37

    
38
      Integer  :: stat(MPI_Status_Size)
39
      Integer  :: comm, datype
40
      Integer  :: buf(nodes)
41
      Integer  :: ier, itag
42
      Integer  :: i, j, k, step
43
! ----------------------------------------------------------------------
44
      itag   = 0
45
      datype = MPI_Integer
46
      Comm   = MPI_Comm_World
47
! ----------------------------------------------------------------------
48
! --- Local sorts.
49

    
50
      w1 = keys
51
      Call iqsort( w1, np , 1, np )
52
      If ( nodes == 1 ) Then       ! --- Serial sort: we're done.
53
         keys  = w1
54
         npnew = np
55
         Return
56
      End If
57

    
58
! --- w1() is now the sorted keys.
59

    
60
      lmax = w1(np)
61

    
62
! --- Choose nproc evenly spaced values out of every bin.
63
!     Store them all in work().
64

    
65
      k    = 1
66
      step = np/nodes
67
      Do i = 1,nodes
68
         work(i) = w1(k)
69
         k       = k + step
70
      End Do
71

    
72
! --- work(1:nodes) are the sampled key values for the fenceposts.
73

    
74
      itag = itag + 1
75

    
76
      If ( me /= 0 ) Then
77
         Call MPI_Send( work, nodes, datype, 0, itag, comm, ier )
78
      Else
79
         Do i = 1, nodes - 1
80
            Call MPI_Recv( buf, nodes, datype, MPI_Any_Source,
81
     &                     itag, comm, stat, ier)
82
            Do j = 1, nodes
83
               work(stat(MPI_Source)*nodes+j) = buf(j)
84
            End Do
85
         End Do
86
      End If
87

    
88
! --- Insertion sort the fencepost values of keys and indexes.
89

    
90
      If ( me == 0 ) Then
91
         Do i = 2, nodes*nodes
92
            ak = work(i)
93
            Do j = i, 2, -1
94
               If ( work(j-1) <= ak ) Go To 10
95
               work(j) = work(j-1)
96
            End Do
97
            j = 1
98
   10       Continue
99
            work(j) = ak
100
         End Do
101

    
102
! --- After the insertion sort work() contains the sorted sampled
103
!     keys for the fenceposts. We put them in fpval and braodcast them.
104

    
105
         k = 1
106
         Do i = 1, nodes*nodes, nodes
107
            fpval(k) = work(i)
108
            k = k + 1
109
         End Do
110
      End If
111
      Call MPI_BCast( fpval, nodes+1, datype, 0, comm, ier )
112
      fpval(nodes+1) = lmax + 1
113

    
114
! --- Determine segment boundaries. Within each bin, fposts(i) is the
115
!     start of the i-th shuffle segment.
116

    
117
      fposts(1) = 1
118
      k         = 2
119
      Do i = 1, np
120

    
121
! --- The first element may be greater than several fencepost values,
122
!     so we must use a do-while loop.
123

    
124
         Do 
125
            If ( w1(i) < fpval(k) ) Exit
126
            fposts(k) = i
127
            k = k + 1
128
         End Do
129
      End Do
130

    
131
! --- The last element may not be greater than the last fencepost value,
132
!     so we must assign an appropriate value to every fencepost past the
133
!     last.
134

    
135
      Do i = k, nodes + 1
136
         fposts(i) = np + 1
137
      End Do
138

    
139
! --- Every process needs fposts() values from every other process, so
140
!     we will give each process a copy of all the fposts's in work().
141

    
142
      Do i = 1, nodes
143
         islen(i) = fposts(i+1) - fposts(i)
144
      End Do
145

    
146
      Call MPI_Alltoall(islen, 1, datype, irlen, 1, datype, comm, ier )
147

    
148
! --- Make sure that "fposts" and "gposts" are zero based for
149
!     MPI_Alltoallv. fposts and gposts are the addresses of the segment
150
!     boundaries.
151

    
152
      fposts(1) = 0
153
      gposts(1) = 0
154
      Do i = 1, nodes
155
         fposts(i+1) = fposts(i) + islen(i)
156
         gposts(i+1) = gposts(i) + irlen(i)
157
      End Do
158

    
159
      npnew = gposts(nodes+1)
160

    
161
      Call MPI_Alltoallv(
162
     &     w1  , islen, fposts, datype,
163
     &     keys, irlen, gposts, datype,
164
     &     comm, ier )
165

    
166
!--- Set up the information for the merge:
167

    
168
      Do i = 1, nodes + 1
169
         itabl(i) = gposts(i)
170
      End Do
171

    
172
! --- Merge the segments within each bin.
173

    
174
      Call i_merge( npnew, nodes, mx, keys, irnkl, itabl, itabr )
175
! ----------------------------------------------------------------------
176
      End Subroutine i_psrs