root / synthbench / euroben-ports / base / C-MPI / mod2f / gtrans.c @ 0:839f52ef7657
History | View | Annotate | Download (2.5 kB)
1 | 0:839f52ef7657 | louridas | #include <stdlib.h> |
---|---|---|---|
2 | 0:839f52ef7657 | louridas | #include <mpi.h> |
3 | 0:839f52ef7657 | louridas | #include "mpiargs.h" |
4 | 0:839f52ef7657 | louridas | #include "fundefs.h" |
5 | 0:839f52ef7657 | louridas | |
6 | 0:839f52ef7657 | louridas | void gtrans( int n1, int n2, int m1, int m2, |
7 | 0:839f52ef7657 | louridas | double **ar, double **ai, double **atr, double **ati, |
8 | 0:839f52ef7657 | louridas | int dir, double *time ) |
9 | 0:839f52ef7657 | louridas | // ---------------------------------------------------------------------
|
10 | 0:839f52ef7657 | louridas | // 'gtrans' does a global transposition of arrays 'ar' & 'ai' and
|
11 | 0:839f52ef7657 | louridas | // puts them in arrays 'atr' & 'ati'.
|
12 | 0:839f52ef7657 | louridas | // ---------------------------------------------------------------------
|
13 | 0:839f52ef7657 | louridas | { |
14 | 0:839f52ef7657 | louridas | MPI_Comm comm = MPI_COMM_WORLD; |
15 | 0:839f52ef7657 | louridas | double **wrkr, **wrk2r, **wrki, **wrk2i;
|
16 | 0:839f52ef7657 | louridas | int *scnts, *sdpls, *rcnts, *rdpls;
|
17 | 0:839f52ef7657 | louridas | double *bufin, *bufout;
|
18 | 0:839f52ef7657 | louridas | double ltime;
|
19 | 0:839f52ef7657 | louridas | // ---------------------------------------------------------------------
|
20 | 0:839f52ef7657 | louridas | // --- If nodes = 1, only local transposition.
|
21 | 0:839f52ef7657 | louridas | |
22 | 0:839f52ef7657 | louridas | if ( nodes == 1 ) { |
23 | 0:839f52ef7657 | louridas | ltrans( n1, n2, ar, ai, atr, ati ); |
24 | 0:839f52ef7657 | louridas | return;
|
25 | 0:839f52ef7657 | louridas | } |
26 | 0:839f52ef7657 | louridas | // ---------------------------------------------------------------------
|
27 | 0:839f52ef7657 | louridas | // --- Make work arrays.
|
28 | 0:839f52ef7657 | louridas | |
29 | 0:839f52ef7657 | louridas | wrkr = makmat( n2, m1 ); wrki = makmat( n2, m1 ); |
30 | 0:839f52ef7657 | louridas | bufin = calloc( n2*m1, sizeof( double ) ); |
31 | 0:839f52ef7657 | louridas | bufout = calloc( n1*m2, sizeof( double ) ); |
32 | 0:839f52ef7657 | louridas | scnts = calloc( nodes, sizeof( int ) ); |
33 | 0:839f52ef7657 | louridas | sdpls = calloc( nodes, sizeof( int ) ); |
34 | 0:839f52ef7657 | louridas | rcnts = calloc( nodes, sizeof( int ) ); |
35 | 0:839f52ef7657 | louridas | rdpls = calloc( nodes, sizeof( int ) ); |
36 | 0:839f52ef7657 | louridas | |
37 | 0:839f52ef7657 | louridas | // ---------------------------------------------------------------------
|
38 | 0:839f52ef7657 | louridas | // --- Do local transpositions first.
|
39 | 0:839f52ef7657 | louridas | |
40 | 0:839f52ef7657 | louridas | ltrans( m1, n2, ar, ai, wrkr, wrki ); |
41 | 0:839f52ef7657 | louridas | // ---------------------------------------------------------------------
|
42 | 0:839f52ef7657 | louridas | // --- Determine sizes and displacements of data to be sent.
|
43 | 0:839f52ef7657 | louridas | |
44 | 0:839f52ef7657 | louridas | cntdpls( scnts, sdpls, rcnts, rdpls, dir ); |
45 | 0:839f52ef7657 | louridas | // ---------------------------------------------------------------------
|
46 | 0:839f52ef7657 | louridas | // --- Distribute appropriate blocks over the processors and do a block
|
47 | 0:839f52ef7657 | louridas | // transposition on the output buffers afterwards to get the
|
48 | 0:839f52ef7657 | louridas | // elements of 'at[r|i]' in the right order.
|
49 | 0:839f52ef7657 | louridas | |
50 | 0:839f52ef7657 | louridas | |
51 | 0:839f52ef7657 | louridas | d2to1( n2, m1, wrkr, bufin ); |
52 | 0:839f52ef7657 | louridas | ltime = MPI_Wtime(); |
53 | 0:839f52ef7657 | louridas | MPI_Alltoallv( bufin, scnts, sdpls, MPI_DOUBLE, |
54 | 0:839f52ef7657 | louridas | bufout, rcnts, rdpls, MPI_DOUBLE, comm ); |
55 | 0:839f52ef7657 | louridas | btrans( n1, n2, bufout, atr, dir ); |
56 | 0:839f52ef7657 | louridas | d2to1( n2, m1, wrki, bufin ); |
57 | 0:839f52ef7657 | louridas | MPI_Alltoallv( bufin, scnts, sdpls, MPI_DOUBLE, |
58 | 0:839f52ef7657 | louridas | bufout, rcnts, rdpls, MPI_DOUBLE, comm ); |
59 | 0:839f52ef7657 | louridas | btrans( n1, n2, bufout, ati, dir ); |
60 | 0:839f52ef7657 | louridas | *time = *time + MPI_Wtime() - ltime; |
61 | 0:839f52ef7657 | louridas | // ---------------------------------------------------------------------
|
62 | 0:839f52ef7657 | louridas | // --- Clean up.
|
63 | 0:839f52ef7657 | louridas | |
64 | 0:839f52ef7657 | louridas | free( bufin ); free( bufout ); |
65 | 0:839f52ef7657 | louridas | free( scnts ); free( sdpls ); free( rcnts ); free( rdpls ); |
66 | 0:839f52ef7657 | louridas | delmat( m2, wrki ); delmat( m2, wrkr ); |
67 | 0:839f52ef7657 | louridas | } |