root / synthbench / euroben-ports / base / Fortran-MPI / mod2a / .svn / text-base / mvcur4.f.svn-base @ 0:839f52ef7657
History | View | Annotate | Download (5.3 kB)
1 |
Subroutine mvcur4( trans, m, n, alpha, a, lda, x, incx, |
---|---|
2 |
& beta, y, incy, wrk ) |
3 |
! ---------------------------------------------------------------------- |
4 |
! --- Column oriented implementation of matrix-vector multiplication |
5 |
! y = beta*y + alpha*A*x or y = beta*y + alpha*A'*x |
6 |
! with loop unrolling to a depth of 4. |
7 |
! Based on the report of Dongarra and Eisenstat. |
8 |
! The calling sequence is based on the Level 2 BLAS routine DGEMV |
9 |
! |
10 |
! Parameters |
11 |
! ---------- |
12 |
! TRANS - Input - Specifies the operation to be performed: |
13 |
! TRANS = 'N' implies y = A*X + beta*y. |
14 |
! TRANS = 'T' implies y = A'*x + beta*y. |
15 |
! M, N - Input - Dimension of the problem. |
16 |
! ALPHA - Input - Specifies scalar alpha. |
17 |
! A(M,N) - Input - Contains the matrix A. |
18 |
! LDA - Input - Leading Dimension of A. |
19 |
! X(*) - Input - Contains the vector x. |
20 |
! INCX - Input - Specifies the increment for the elements of X. |
21 |
! In this implementation, INCX must be 1. The |
22 |
! parameter is only here for compatibility with |
23 |
! Level 2 BLAS routine DGEMV. |
24 |
! BETA - Input - Scalar beta; if BETA = 0, then Y need not be set. |
25 |
! Y(*) - Output - Contains result vector y |
26 |
! INCY - Input - Specifies the increment for the elements of Y. |
27 |
! In this implementation, INCY must be 1. The |
28 |
! parameter is only here for compatibility with |
29 |
! Level 2 BLAS routine DGEMV. |
30 |
! WRK(*) - Workspace. NOT compatible with DGEMV! |
31 |
! ---------------------------------------------------------------------- |
32 |
Use dist_module |
33 |
Use numerics |
34 |
Character*1 :: trans |
35 |
Integer :: m, n, lda, incx, incy |
36 |
Real(l_) :: alpha, beta |
37 |
Real(l_) :: a(lda,*), x(*), y(*), wrk(*) |
38 |
|
39 |
! --- Communication types |
40 |
Include 'mpif.h' |
41 |
Integer com, type |
42 |
|
43 |
! --- Local constants |
44 |
Real(l_), Parameter :: zero = 0.0_l_, one = 1.0_l_ |
45 |
|
46 |
! --- Local variables |
47 |
Integer :: i, imin, info, j, jmin, jt, k, mybase |
48 |
Real(l_) :: temp, temp1, temp2, temp3 |
49 |
! ---------------------------------------------------------------------- |
50 |
! ---Test the input parameters |
51 |
|
52 |
info = 0 |
53 |
If ( trans /= 'N' .AND. trans /= 'T' ) Then |
54 |
info = 1 |
55 |
Else If ( m < 0 ) Then |
56 |
info = 2 |
57 |
Else If ( n < 0 ) Then |
58 |
info = 3 |
59 |
Else If ( lda < MAX( 1, m ) ) Then |
60 |
info = 6 |
61 |
Else If ( incx /= 1 ) Then |
62 |
info = 8 |
63 |
Else If ( incy /= 1 ) Then |
64 |
info = 11 |
65 |
End If |
66 |
If ( info /= 0 ) Then |
67 |
Call eberr ( 'mvcur4', info ) |
68 |
Return |
69 |
End If |
70 |
! ---------------------------------------------------------------------- |
71 |
! --- Start the operations; First form y := beta*y |
72 |
|
73 |
If ( beta /= one ) Then |
74 |
If ( beta == zero ) Then |
75 |
y(1:m) = zero |
76 |
Else |
77 |
y(1:m) = beta*y(1:m) |
78 |
End If |
79 |
End If |
80 |
If ( alpha == zero ) Return |
81 |
If ( trans == 'N' ) Then |
82 |
! ---------------------------------------------------------------------- |
83 |
! --- Form y := y + alpha*A*x |
84 |
|
85 |
j = Mod ( n, 2 ) |
86 |
If ( j >= 1 ) Then |
87 |
temp = alpha * x(j) |
88 |
Do i = 1, m |
89 |
y(i) = y(i) + a(i,j) * temp |
90 |
End Do |
91 |
End If |
92 |
j = Mod ( n, 4 ) |
93 |
If ( j >= 2 ) Then |
94 |
temp1 = alpha * x(j-1) |
95 |
temp = alpha * x(j) |
96 |
Do i = 1, m |
97 |
y(i) = y(i) + a(i,j-1) * temp1 + a(i,j) * temp |
98 |
End Do |
99 |
End If |
100 |
jmin = j + 4 |
101 |
Do j = jmin, n, 4 |
102 |
temp3 = alpha * x(j-3) |
103 |
temp2 = alpha * x(j-2) |
104 |
temp1 = alpha * x(j-1) |
105 |
temp = alpha * x(j) |
106 |
Do i = 1, m |
107 |
y(i) = y(i) + a(i,j-3) * temp3 + a(i,j-2) * temp2 |
108 |
& + a(i,j-1) * temp1 + a(i,j ) * temp |
109 |
End Do |
110 |
End Do |
111 |
Else |
112 |
! ---------------------------------------------------------------------- |
113 |
! --- Form y := y + alpha*A'*x; generate partial results. |
114 |
|
115 |
mybase = offset(me) |
116 |
Do j = 1, n |
117 |
temp = zero |
118 |
wrk(j) = zero |
119 |
i = Mod ( m, 2 ) |
120 |
If ( i >= 1 ) Then |
121 |
temp = temp + a(i,j) * x(i+mybase) |
122 |
End If |
123 |
i = Mod ( m, 4 ) |
124 |
If ( i >= 2 ) Then |
125 |
temp = temp + a(i-1,j) * x(i+mybase-1) + |
126 |
& a(i,j) * x(i+mybase) |
127 |
End If |
128 |
imin = i + 4 |
129 |
Do i = imin, m, 4 |
130 |
temp = temp + a(i-3,j) * x(i+mybase-3) + |
131 |
& a(i-2,j) * x(i+mybase-2) + |
132 |
& a(i-1,j) * x(i+mybase-1) + |
133 |
& a(i,j) * x(i+mybase) |
134 |
End Do |
135 |
wrk(j) = wrk(j) + alpha * temp |
136 |
End Do |
137 |
! ---------------------------------------------------------------------- |
138 |
! --- Combine partial results. |
139 |
|
140 |
type = MPI_Real8 |
141 |
com = MPI_Comm_World |
142 |
Do k = 0, nodes - 1 |
143 |
kbase = offset(k) + 1 |
144 |
Call MPI_Reduce( wrk(kbase), y, sizes(k), type, MPI_Sum, k, |
145 |
& com, info ) |
146 |
End Do |
147 |
Endif |
148 |
! ---------------------------------------------------------------------- |
149 |
End Subroutine mvcur4 |