Statistics
| Branch: | Revision:

root / synthbench / euroben-dm / mod2a / .svn / text-base / mvrur4.f.svn-base @ 0:839f52ef7657

History | View | Annotate | Download (5.2 kB)

1
      Subroutine mvrur4( trans, m, n, alpha, a, lda, x, incx,
2
     &                   beta, y, incy, wrk )
3
! ----------------------------------------------------------------------
4
! --- Row oriented implementation of matrix-vector multiplication 
5
!     y = alpha*A*x + beta*y   or   y = alpha*A'*x + beta*y
6
!     with loop unrolling to a depth of 4.
7
!     The calling sequence is based on the Level 2 BLAS routine DGEMV
8
!
9
!  Parameters
10
!  ----------
11
!  TRANS   - Input  - Specifies the operation to be performed:
12
!                     TRANS = 'N' implies  y = alpha*A*x + beta*y.
13
!                     TRANS = 'T' implies  y = alpha*A'*x + beta*y.
14
!  M, N    - Input  - Dimension of the problem.
15
!  ALPHA   - Input  - Specifies the scalar alpha.
16
!  A(M,N)  - Input  - Contains the matrix A.
17
!  LDA     - Input  - Leading Dimension of A.
18
!  X(*)    - Input  - Contains the vector x.
19
!  INCX    - Input  - Specifies the increment for the elements of x.
20
!                     In this implementation, only INCX = 0 is allowed.
21
!                     This parameter is here only for compatibility 
22
!                     with Level 2 BLAS routine DGEMV only.
23
!  BETA    - Input  - Scalar beta; BETA = 0, then Y need not be set.
24
!  Y(*)    - Output - Contains result vector y.
25
!
26
!  INCY    - Input  - Specifies the increment for the elements of Y.
27
!                     In this implementation, only INCY = 0 is allowed.
28
!                     This parameter is here only for compatibility 
29
!                     with Level 2 BLAS routine DGEMV only.
30
!  WRK(*)  - Workspace. NOT compatible with DGEMV!
31
! ----------------------------------------------------------------------
32
      Use                    dist_module
33
      Use                    numerics
34
      Character*1         :: trans
35
      Integer             :: m, n, lda, incx, incy
36
      Real(l_)            :: alpha, beta
37
      Real(l_)            :: a(lda,*), x(*), y(*), wrk(*)
38
! ----------------------------------------------------------------------
39
! --- Communication types
40
      Include                'mpif.h'
41
      Integer             :: com, type
42

    
43
! --- Local constants
44
      Real(l_), Parameter :: zero = 0.0_l_, one = 1.0_l_
45

    
46
! --- Local variables
47
      Integer             :: i, j, imin, info, jmin, k, mybase
48
      Real(l_)            :: temp, temp1, temp2, temp3
49
! ----------------------------------------------------------------------
50
! --- Test the input parameters
51

    
52
      info = 0
53
      If ( trans /= 'N' .AND. trans /= 'T' ) Then
54
         info = 1
55
      Else If ( m < 0 ) Then
56
         info = 2
57
      Else If ( n < 0 ) Then
58
         info = 3
59
      Else If ( lda < Max( 1, m ) ) Then
60
         info = 6
61
      Else If ( incx /= 1 ) Then
62
         info = 8
63
      Else If ( incy /= 1 ) Then
64
         info = 11
65
      End If
66
      If ( info /= 0 ) Then
67
         Call eberr( 'mvrur4', info )
68
         Return
69
      End If
70
! ----------------------------------------------------------------------
71
! --- Start the operations; First form y := beta*y
72

    
73
      If ( beta /= one ) Then
74
         If ( beta == zero ) Then
75
            y(1:m) = zero
76
         Else
77
            y(1:m) = beta*y(1:m)
78
         End If
79
      End If
80
      If ( alpha == zero ) Return
81
      If ( trans == 'N' ) Then
82
! ----------------------------------------------------------------------
83
! --- Form  y := y + alpha*A*x
84

    
85
        Do i = 1, m
86
           temp = zero
87
           j = Mod ( n, 2 )
88
           If ( j >= 1 ) Then
89
              temp = temp + a(i,j) * x(j)
90
           End If
91
           j = Mod ( n, 4 )
92
           If ( j >= 2 ) Then
93
              temp = temp + a(i,j-1) * x(j-1) + a(i,j) * x(j)
94
           End If
95
           jmin = j + 4
96
           Do j = jmin, n, 4
97
              temp = temp + a(i,j-3) * x(j-3) + a(i,j-2) * x(j-2)
98
     &                    + a(i,j-1) * x(j-1) + a(i,j  ) * x(j)
99
            End Do
100
           y(i) = y(i) + alpha * temp
101
         End Do
102
      Else
103
! ----------------------------------------------------------------------
104
! --- Form  y := y + alpha*A'*x; generate partial results.
105

    
106
        mybase = offset(me)
107
        wrk(1:n) = zero
108
        i = Mod ( m, 2 )
109
        If ( i >= 1 ) Then
110
           temp = alpha * x(mybase+i)
111
           wrk(1:n) = wrk(1:n) + a(i,1:n)*temp
112
        End If
113
        i = Mod ( m, 4 )
114
        If( i .GE. 2 ) Then
115
           temp1 = alpha * x(mybase+i-1)
116
           temp  = alpha * x(mybase+i)
117
           Do  j = 1, n
118
              wrk(j) = wrk(j) + a(i-1,j) * temp1 + a(i,j) * temp
119
           End Do
120
        End If
121
        imin = i + 4
122
        Do  i = imin, m, 4
123
          temp3 = alpha * x(mybase+i-3)
124
          temp2 = alpha * x(mybase+i-2)
125
          temp1 = alpha * x(mybase+i-1)
126
          temp  = alpha * x(mybase+i)
127
          Do  j = 1, n
128
            wrk(j) = wrk(j) + a(i-3,j) * temp3 + a(i-2,j) * temp2 +
129
     &                        a(i-1,j) * temp1 + a(i,j)   * temp
130
          End Do
131
        End Do
132
! ----------------------------------------------------------------------
133
! --- Combine partial results.
134

    
135
        type = MPI_Real8
136
        com  = MPI_Comm_World
137
        Do k = 0, nodes - 1
138
           kbase = offset(k) + 1
139
           Call MPI_Reduce( wrk(kbase), y, sizes(k), type, MPI_Sum, k, 
140
     &                      com, info ) 
141
        End Do
142
      Endif
143
! ----------------------------------------------------------------------
144
      End Subroutine mvrur4