root / synthbench / euroben-ports / base / C-MPI / mod2f / gtrans.c @ 0:839f52ef7657
History | View | Annotate | Download (2.5 kB)
1 |
#include <stdlib.h> |
---|---|
2 |
#include <mpi.h> |
3 |
#include "mpiargs.h" |
4 |
#include "fundefs.h" |
5 |
|
6 |
void gtrans( int n1, int n2, int m1, int m2, |
7 |
double **ar, double **ai, double **atr, double **ati, |
8 |
int dir, double *time ) |
9 |
// ---------------------------------------------------------------------
|
10 |
// 'gtrans' does a global transposition of arrays 'ar' & 'ai' and
|
11 |
// puts them in arrays 'atr' & 'ati'.
|
12 |
// ---------------------------------------------------------------------
|
13 |
{ |
14 |
MPI_Comm comm = MPI_COMM_WORLD; |
15 |
double **wrkr, **wrk2r, **wrki, **wrk2i;
|
16 |
int *scnts, *sdpls, *rcnts, *rdpls;
|
17 |
double *bufin, *bufout;
|
18 |
double ltime;
|
19 |
// ---------------------------------------------------------------------
|
20 |
// --- If nodes = 1, only local transposition.
|
21 |
|
22 |
if ( nodes == 1 ) { |
23 |
ltrans( n1, n2, ar, ai, atr, ati ); |
24 |
return;
|
25 |
} |
26 |
// ---------------------------------------------------------------------
|
27 |
// --- Make work arrays.
|
28 |
|
29 |
wrkr = makmat( n2, m1 ); wrki = makmat( n2, m1 ); |
30 |
bufin = calloc( n2*m1, sizeof( double ) ); |
31 |
bufout = calloc( n1*m2, sizeof( double ) ); |
32 |
scnts = calloc( nodes, sizeof( int ) ); |
33 |
sdpls = calloc( nodes, sizeof( int ) ); |
34 |
rcnts = calloc( nodes, sizeof( int ) ); |
35 |
rdpls = calloc( nodes, sizeof( int ) ); |
36 |
|
37 |
// ---------------------------------------------------------------------
|
38 |
// --- Do local transpositions first.
|
39 |
|
40 |
ltrans( m1, n2, ar, ai, wrkr, wrki ); |
41 |
// ---------------------------------------------------------------------
|
42 |
// --- Determine sizes and displacements of data to be sent.
|
43 |
|
44 |
cntdpls( scnts, sdpls, rcnts, rdpls, dir ); |
45 |
// ---------------------------------------------------------------------
|
46 |
// --- Distribute appropriate blocks over the processors and do a block
|
47 |
// transposition on the output buffers afterwards to get the
|
48 |
// elements of 'at[r|i]' in the right order.
|
49 |
|
50 |
|
51 |
d2to1( n2, m1, wrkr, bufin ); |
52 |
ltime = MPI_Wtime(); |
53 |
MPI_Alltoallv( bufin, scnts, sdpls, MPI_DOUBLE, |
54 |
bufout, rcnts, rdpls, MPI_DOUBLE, comm ); |
55 |
btrans( n1, n2, bufout, atr, dir ); |
56 |
d2to1( n2, m1, wrki, bufin ); |
57 |
MPI_Alltoallv( bufin, scnts, sdpls, MPI_DOUBLE, |
58 |
bufout, rcnts, rdpls, MPI_DOUBLE, comm ); |
59 |
btrans( n1, n2, bufout, ati, dir ); |
60 |
*time = *time + MPI_Wtime() - ltime; |
61 |
// ---------------------------------------------------------------------
|
62 |
// --- Clean up.
|
63 |
|
64 |
free( bufin ); free( bufout ); |
65 |
free( scnts ); free( sdpls ); free( rcnts ); free( rdpls ); |
66 |
delmat( m2, wrki ); delmat( m2, wrkr ); |
67 |
} |