root / synthbench / euroben-dm / mod2a / .svn / text-base / mvrur4.f.svn-base @ 0:839f52ef7657
History | View | Annotate | Download (5.2 kB)
1 |
Subroutine mvrur4( trans, m, n, alpha, a, lda, x, incx, |
---|---|
2 |
& beta, y, incy, wrk ) |
3 |
! ---------------------------------------------------------------------- |
4 |
! --- Row oriented implementation of matrix-vector multiplication |
5 |
! y = alpha*A*x + beta*y or y = alpha*A'*x + beta*y |
6 |
! with loop unrolling to a depth of 4. |
7 |
! The calling sequence is based on the Level 2 BLAS routine DGEMV |
8 |
! |
9 |
! Parameters |
10 |
! ---------- |
11 |
! TRANS - Input - Specifies the operation to be performed: |
12 |
! TRANS = 'N' implies y = alpha*A*x + beta*y. |
13 |
! TRANS = 'T' implies y = alpha*A'*x + beta*y. |
14 |
! M, N - Input - Dimension of the problem. |
15 |
! ALPHA - Input - Specifies the scalar alpha. |
16 |
! A(M,N) - Input - Contains the matrix A. |
17 |
! LDA - Input - Leading Dimension of A. |
18 |
! X(*) - Input - Contains the vector x. |
19 |
! INCX - Input - Specifies the increment for the elements of x. |
20 |
! In this implementation, only INCX = 0 is allowed. |
21 |
! This parameter is here only for compatibility |
22 |
! with Level 2 BLAS routine DGEMV only. |
23 |
! BETA - Input - Scalar beta; BETA = 0, then Y need not be set. |
24 |
! Y(*) - Output - Contains result vector y. |
25 |
! |
26 |
! INCY - Input - Specifies the increment for the elements of Y. |
27 |
! In this implementation, only INCY = 0 is allowed. |
28 |
! This parameter is here only for compatibility |
29 |
! with Level 2 BLAS routine DGEMV only. |
30 |
! WRK(*) - Workspace. NOT compatible with DGEMV! |
31 |
! ---------------------------------------------------------------------- |
32 |
Use dist_module |
33 |
Use numerics |
34 |
Character*1 :: trans |
35 |
Integer :: m, n, lda, incx, incy |
36 |
Real(l_) :: alpha, beta |
37 |
Real(l_) :: a(lda,*), x(*), y(*), wrk(*) |
38 |
! ---------------------------------------------------------------------- |
39 |
! --- Communication types |
40 |
Include 'mpif.h' |
41 |
Integer :: com, type |
42 |
|
43 |
! --- Local constants |
44 |
Real(l_), Parameter :: zero = 0.0_l_, one = 1.0_l_ |
45 |
|
46 |
! --- Local variables |
47 |
Integer :: i, j, imin, info, jmin, k, mybase |
48 |
Real(l_) :: temp, temp1, temp2, temp3 |
49 |
! ---------------------------------------------------------------------- |
50 |
! --- Test the input parameters |
51 |
|
52 |
info = 0 |
53 |
If ( trans /= 'N' .AND. trans /= 'T' ) Then |
54 |
info = 1 |
55 |
Else If ( m < 0 ) Then |
56 |
info = 2 |
57 |
Else If ( n < 0 ) Then |
58 |
info = 3 |
59 |
Else If ( lda < Max( 1, m ) ) Then |
60 |
info = 6 |
61 |
Else If ( incx /= 1 ) Then |
62 |
info = 8 |
63 |
Else If ( incy /= 1 ) Then |
64 |
info = 11 |
65 |
End If |
66 |
If ( info /= 0 ) Then |
67 |
Call eberr( 'mvrur4', info ) |
68 |
Return |
69 |
End If |
70 |
! ---------------------------------------------------------------------- |
71 |
! --- Start the operations; First form y := beta*y |
72 |
|
73 |
If ( beta /= one ) Then |
74 |
If ( beta == zero ) Then |
75 |
y(1:m) = zero |
76 |
Else |
77 |
y(1:m) = beta*y(1:m) |
78 |
End If |
79 |
End If |
80 |
If ( alpha == zero ) Return |
81 |
If ( trans == 'N' ) Then |
82 |
! ---------------------------------------------------------------------- |
83 |
! --- Form y := y + alpha*A*x |
84 |
|
85 |
Do i = 1, m |
86 |
temp = zero |
87 |
j = Mod ( n, 2 ) |
88 |
If ( j >= 1 ) Then |
89 |
temp = temp + a(i,j) * x(j) |
90 |
End If |
91 |
j = Mod ( n, 4 ) |
92 |
If ( j >= 2 ) Then |
93 |
temp = temp + a(i,j-1) * x(j-1) + a(i,j) * x(j) |
94 |
End If |
95 |
jmin = j + 4 |
96 |
Do j = jmin, n, 4 |
97 |
temp = temp + a(i,j-3) * x(j-3) + a(i,j-2) * x(j-2) |
98 |
& + a(i,j-1) * x(j-1) + a(i,j ) * x(j) |
99 |
End Do |
100 |
y(i) = y(i) + alpha * temp |
101 |
End Do |
102 |
Else |
103 |
! ---------------------------------------------------------------------- |
104 |
! --- Form y := y + alpha*A'*x; generate partial results. |
105 |
|
106 |
mybase = offset(me) |
107 |
wrk(1:n) = zero |
108 |
i = Mod ( m, 2 ) |
109 |
If ( i >= 1 ) Then |
110 |
temp = alpha * x(mybase+i) |
111 |
wrk(1:n) = wrk(1:n) + a(i,1:n)*temp |
112 |
End If |
113 |
i = Mod ( m, 4 ) |
114 |
If( i .GE. 2 ) Then |
115 |
temp1 = alpha * x(mybase+i-1) |
116 |
temp = alpha * x(mybase+i) |
117 |
Do j = 1, n |
118 |
wrk(j) = wrk(j) + a(i-1,j) * temp1 + a(i,j) * temp |
119 |
End Do |
120 |
End If |
121 |
imin = i + 4 |
122 |
Do i = imin, m, 4 |
123 |
temp3 = alpha * x(mybase+i-3) |
124 |
temp2 = alpha * x(mybase+i-2) |
125 |
temp1 = alpha * x(mybase+i-1) |
126 |
temp = alpha * x(mybase+i) |
127 |
Do j = 1, n |
128 |
wrk(j) = wrk(j) + a(i-3,j) * temp3 + a(i-2,j) * temp2 + |
129 |
& a(i-1,j) * temp1 + a(i,j) * temp |
130 |
End Do |
131 |
End Do |
132 |
! ---------------------------------------------------------------------- |
133 |
! --- Combine partial results. |
134 |
|
135 |
type = MPI_Real8 |
136 |
com = MPI_Comm_World |
137 |
Do k = 0, nodes - 1 |
138 |
kbase = offset(k) + 1 |
139 |
Call MPI_Reduce( wrk(kbase), y, sizes(k), type, MPI_Sum, k, |
140 |
& com, info ) |
141 |
End Do |
142 |
Endif |
143 |
! ---------------------------------------------------------------------- |
144 |
End Subroutine mvrur4 |