Statistics
| Branch: | Revision:

root / synthbench / euroben-ports / base / Fortran-MPI / mod2a / .svn / text-base / mvcur4.f.svn-base @ 0:839f52ef7657

History | View | Annotate | Download (5.3 kB)

1
      Subroutine mvcur4( trans, m, n, alpha, a, lda, x, incx,
2
     &                   beta, y, incy, wrk )
3
! ----------------------------------------------------------------------
4
! --- Column oriented implementation of matrix-vector multiplication 
5
!     y = beta*y + alpha*A*x   or   y = beta*y + alpha*A'*x 
6
!     with loop unrolling to a depth of 4.
7
!     Based on the report of Dongarra and Eisenstat.
8
!     The calling sequence is based on the Level 2 BLAS routine DGEMV
9
!
10
!  Parameters
11
!  ----------
12
!  TRANS   - Input  - Specifies the operation to be performed:
13
!                     TRANS = 'N' implies  y = A*X + beta*y.
14
!                     TRANS = 'T' implies  y = A'*x + beta*y.
15
!  M, N    - Input  - Dimension of the problem.
16
!  ALPHA   - Input  - Specifies scalar alpha.
17
!  A(M,N)  - Input  - Contains the matrix A.
18
!  LDA     - Input  - Leading Dimension of A.
19
!  X(*)    - Input  - Contains the vector x.
20
!  INCX    - Input  - Specifies the increment for the elements of X.
21
!                     In this implementation, INCX must be 1.  The
22
!                     parameter is only here for compatibility with
23
!                     Level 2 BLAS routine DGEMV.
24
!  BETA    - Input  - Scalar beta; if BETA = 0, then Y need not be set.
25
!  Y(*)    - Output - Contains result vector y
26
!  INCY    - Input  - Specifies the increment for the elements of Y.
27
!                     In this implementation, INCY must be 1. The
28
!                     parameter is only here for compatibility with
29
!                     Level 2 BLAS routine DGEMV.
30
!  WRK(*)  - Workspace. NOT compatible with DGEMV!
31
! ----------------------------------------------------------------------
32
      Use                    dist_module
33
      Use                    numerics
34
      Character*1         :: trans
35
      Integer             :: m, n, lda, incx, incy
36
      Real(l_)            :: alpha, beta
37
      Real(l_)            :: a(lda,*), x(*), y(*), wrk(*)
38

    
39
! --- Communication types
40
      Include                'mpif.h'
41
      Integer                com, type
42

    
43
! --- Local constants
44
      Real(l_), Parameter :: zero = 0.0_l_, one = 1.0_l_
45

    
46
! --- Local variables
47
      Integer             :: i, imin, info, j, jmin, jt, k, mybase
48
      Real(l_)            :: temp, temp1, temp2, temp3 
49
! ----------------------------------------------------------------------
50
! ---Test the input parameters
51

    
52
      info = 0
53
      If ( trans /= 'N' .AND. trans /= 'T' ) Then
54
         info = 1
55
      Else If ( m < 0 ) Then
56
         info = 2
57
      Else If ( n < 0 ) Then
58
         info = 3
59
      Else If ( lda < MAX( 1, m ) ) Then
60
         info = 6
61
      Else If ( incx /= 1 ) Then
62
         info = 8
63
      Else If ( incy /= 1 ) Then
64
         info = 11
65
      End If
66
      If ( info /= 0 ) Then
67
         Call eberr ( 'mvcur4', info )
68
         Return
69
      End If
70
! ----------------------------------------------------------------------
71
! --- Start the operations; First form y := beta*y
72

    
73
      If ( beta /= one ) Then
74
         If ( beta == zero ) Then
75
            y(1:m) = zero
76
         Else
77
            y(1:m) = beta*y(1:m)
78
         End If
79
      End If
80
      If ( alpha == zero ) Return
81
      If ( trans == 'N' ) Then
82
! ----------------------------------------------------------------------
83
! --- Form  y := y + alpha*A*x
84

    
85
        j = Mod ( n, 2 )
86
        If ( j >= 1 ) Then
87
           temp = alpha * x(j)
88
           Do i = 1, m
89
              y(i) = y(i) + a(i,j) * temp
90
           End Do
91
        End If
92
        j = Mod ( n, 4 )
93
        If ( j >= 2 ) Then
94
           temp1 = alpha * x(j-1)
95
           temp  = alpha * x(j)
96
           Do i = 1, m
97
              y(i) = y(i) + a(i,j-1) * temp1 + a(i,j) * temp
98
           End Do
99
        End If
100
        jmin = j + 4
101
        Do j = jmin, n, 4
102
           temp3 = alpha * x(j-3)
103
           temp2 = alpha * x(j-2)
104
           temp1 = alpha * x(j-1)
105
           temp  = alpha * x(j)
106
           Do i = 1, m
107
              y(i) = y(i) + a(i,j-3) * temp3 + a(i,j-2) * temp2
108
     &                    + a(i,j-1) * temp1 + a(i,j  ) * temp
109
           End Do
110
        End Do
111
      Else
112
! ----------------------------------------------------------------------
113
! --- Form  y := y + alpha*A'*x; generate partial results.
114

    
115
        mybase = offset(me)
116
        Do j = 1, n
117
           temp = zero
118
           wrk(j) = zero
119
           i = Mod ( m, 2 )
120
           If ( i >= 1 ) Then
121
             temp = temp + a(i,j) * x(i+mybase)
122
           End If
123
           i = Mod ( m, 4 )
124
           If ( i >= 2 ) Then
125
              temp = temp + a(i-1,j) * x(i+mybase-1) +
126
     &                      a(i,j)   * x(i+mybase)
127
           End If
128
           imin = i + 4
129
           Do i = imin, m, 4
130
              temp = temp + a(i-3,j) * x(i+mybase-3) +
131
     &                      a(i-2,j) * x(i+mybase-2) +
132
     &                      a(i-1,j) * x(i+mybase-1) +
133
     &                      a(i,j)   * x(i+mybase)
134
           End Do
135
           wrk(j) = wrk(j) + alpha * temp
136
        End Do
137
! ----------------------------------------------------------------------
138
! --- Combine partial results.
139

    
140
        type = MPI_Real8
141
        com  = MPI_Comm_World
142
        Do k = 0, nodes - 1
143
           kbase = offset(k) + 1
144
           Call MPI_Reduce( wrk(kbase), y, sizes(k), type, MPI_Sum, k, 
145
     &                      com, info ) 
146
        End Do
147
      Endif
148
! ----------------------------------------------------------------------
149
      End Subroutine mvcur4