Statistics
| Branch: | Revision:

root / synthbench / euroben-shm / mod2d / dgemm.f

History | View | Annotate | Download (9.8 kB)

1 0:839f52ef7657 louridas
      SUBROUTINE DGEMM ( TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB,
2 0:839f52ef7657 louridas
     $                   BETA, C, LDC )
3 0:839f52ef7657 louridas
      Use numerics
4 0:839f52ef7657 louridas
*     .. Scalar Arguments ..
5 0:839f52ef7657 louridas
      CHARACTER*1        TRANSA, TRANSB
6 0:839f52ef7657 louridas
      INTEGER            M, N, K, LDA, LDB, LDC
7 0:839f52ef7657 louridas
      Real(l_)   ALPHA, BETA
8 0:839f52ef7657 louridas
*     .. Array Arguments ..
9 0:839f52ef7657 louridas
      Real(l_)   A( LDA, * ), B( LDB, * ), C( LDC, * )
10 0:839f52ef7657 louridas
*     ..
11 0:839f52ef7657 louridas
*
12 0:839f52ef7657 louridas
*  Purpose
13 0:839f52ef7657 louridas
*  =======
14 0:839f52ef7657 louridas
*
15 0:839f52ef7657 louridas
*  DGEMM  performs one of the matrix-matrix operations
16 0:839f52ef7657 louridas
*
17 0:839f52ef7657 louridas
*     C := alpha*op( A )*op( B ) + beta*C,
18 0:839f52ef7657 louridas
*
19 0:839f52ef7657 louridas
*  where  op( X ) is one of
20 0:839f52ef7657 louridas
*
21 0:839f52ef7657 louridas
*     op( X ) = X   or   op( X ) = X',
22 0:839f52ef7657 louridas
*
23 0:839f52ef7657 louridas
*  alpha and beta are scalars, and A, B and C are matrices, with op( A )
24 0:839f52ef7657 louridas
*  an m by k matrix,  op( B )  a  k by n matrix and  C an m by n matrix.
25 0:839f52ef7657 louridas
*
26 0:839f52ef7657 louridas
*  Parameters
27 0:839f52ef7657 louridas
*  ==========
28 0:839f52ef7657 louridas
*
29 0:839f52ef7657 louridas
*  TRANSA - CHARACTER*1.
30 0:839f52ef7657 louridas
*           On entry, TRANSA specifies the form of op( A ) to be used in
31 0:839f52ef7657 louridas
*           the matrix multiplication as follows:
32 0:839f52ef7657 louridas
*
33 0:839f52ef7657 louridas
*              TRANSA = 'N' or 'n',  op( A ) = A.
34 0:839f52ef7657 louridas
*
35 0:839f52ef7657 louridas
*              TRANSA = 'T' or 't',  op( A ) = A'.
36 0:839f52ef7657 louridas
*
37 0:839f52ef7657 louridas
*              TRANSA = 'C' or 'c',  op( A ) = A'.
38 0:839f52ef7657 louridas
*
39 0:839f52ef7657 louridas
*           Unchanged on exit.
40 0:839f52ef7657 louridas
*
41 0:839f52ef7657 louridas
*  TRANSB - CHARACTER*1.
42 0:839f52ef7657 louridas
*           On entry, TRANSB specifies the form of op( B ) to be used in
43 0:839f52ef7657 louridas
*           the matrix multiplication as follows:
44 0:839f52ef7657 louridas
*
45 0:839f52ef7657 louridas
*              TRANSB = 'N' or 'n',  op( B ) = B.
46 0:839f52ef7657 louridas
*
47 0:839f52ef7657 louridas
*              TRANSB = 'T' or 't',  op( B ) = B'.
48 0:839f52ef7657 louridas
*
49 0:839f52ef7657 louridas
*              TRANSB = 'C' or 'c',  op( B ) = B'.
50 0:839f52ef7657 louridas
*
51 0:839f52ef7657 louridas
*           Unchanged on exit.
52 0:839f52ef7657 louridas
*
53 0:839f52ef7657 louridas
*  M      - INTEGER.
54 0:839f52ef7657 louridas
*           On entry,  M  specifies  the number  of rows  of the  matrix
55 0:839f52ef7657 louridas
*           op( A )  and of the  matrix  C.  M  must  be at least  zero.
56 0:839f52ef7657 louridas
*           Unchanged on exit.
57 0:839f52ef7657 louridas
*
58 0:839f52ef7657 louridas
*  N      - INTEGER.
59 0:839f52ef7657 louridas
*           On entry,  N  specifies the number  of columns of the matrix
60 0:839f52ef7657 louridas
*           op( B ) and the number of columns of the matrix C. N must be
61 0:839f52ef7657 louridas
*           at least zero.
62 0:839f52ef7657 louridas
*           Unchanged on exit.
63 0:839f52ef7657 louridas
*
64 0:839f52ef7657 louridas
*  K      - INTEGER.
65 0:839f52ef7657 louridas
*           On entry,  K  specifies  the number of columns of the matrix
66 0:839f52ef7657 louridas
*           op( A ) and the number of rows of the matrix op( B ). K must
67 0:839f52ef7657 louridas
*           be at least  zero.
68 0:839f52ef7657 louridas
*           Unchanged on exit.
69 0:839f52ef7657 louridas
*
70 0:839f52ef7657 louridas
*  ALPHA  - DOUBLE PRECISION.
71 0:839f52ef7657 louridas
*           On entry, ALPHA specifies the scalar alpha.
72 0:839f52ef7657 louridas
*           Unchanged on exit.
73 0:839f52ef7657 louridas
*
74 0:839f52ef7657 louridas
*  A      - DOUBLE PRECISION array of DIMENSION ( LDA, ka ), where ka is
75 0:839f52ef7657 louridas
*           k  when  TRANSA = 'N' or 'n',  and is  m  otherwise.
76 0:839f52ef7657 louridas
*           Before entry with  TRANSA = 'N' or 'n',  the leading  m by k
77 0:839f52ef7657 louridas
*           part of the array  A  must contain the matrix  A,  otherwise
78 0:839f52ef7657 louridas
*           the leading  k by m  part of the array  A  must contain  the
79 0:839f52ef7657 louridas
*           matrix A.
80 0:839f52ef7657 louridas
*           Unchanged on exit.
81 0:839f52ef7657 louridas
*
82 0:839f52ef7657 louridas
*  LDA    - INTEGER.
83 0:839f52ef7657 louridas
*           On entry, LDA specifies the first dimension of A as declared
84 0:839f52ef7657 louridas
*           in the calling (sub) program. When  TRANSA = 'N' or 'n' then
85 0:839f52ef7657 louridas
*           LDA must be at least  max( 1, m ), otherwise  LDA must be at
86 0:839f52ef7657 louridas
*           least  max( 1, k ).
87 0:839f52ef7657 louridas
*           Unchanged on exit.
88 0:839f52ef7657 louridas
*
89 0:839f52ef7657 louridas
*  B      - DOUBLE PRECISION array of DIMENSION ( LDB, kb ), where kb is
90 0:839f52ef7657 louridas
*           n  when  TRANSB = 'N' or 'n',  and is  k  otherwise.
91 0:839f52ef7657 louridas
*           Before entry with  TRANSB = 'N' or 'n',  the leading  k by n
92 0:839f52ef7657 louridas
*           part of the array  B  must contain the matrix  B,  otherwise
93 0:839f52ef7657 louridas
*           the leading  n by k  part of the array  B  must contain  the
94 0:839f52ef7657 louridas
*           matrix B.
95 0:839f52ef7657 louridas
*           Unchanged on exit.
96 0:839f52ef7657 louridas
*
97 0:839f52ef7657 louridas
*  LDB    - INTEGER.
98 0:839f52ef7657 louridas
*           On entry, LDB specifies the first dimension of B as declared
99 0:839f52ef7657 louridas
*           in the calling (sub) program. When  TRANSB = 'N' or 'n' then
100 0:839f52ef7657 louridas
*           LDB must be at least  max( 1, k ), otherwise  LDB must be at
101 0:839f52ef7657 louridas
*           least  max( 1, n ).
102 0:839f52ef7657 louridas
*           Unchanged on exit.
103 0:839f52ef7657 louridas
*
104 0:839f52ef7657 louridas
*  BETA   - DOUBLE PRECISION.
105 0:839f52ef7657 louridas
*           On entry,  BETA  specifies the scalar  beta.  When  BETA  is
106 0:839f52ef7657 louridas
*           supplied as zero then C need not be set on input.
107 0:839f52ef7657 louridas
*           Unchanged on exit.
108 0:839f52ef7657 louridas
*
109 0:839f52ef7657 louridas
*  C      - DOUBLE PRECISION array of DIMENSION ( LDC, n ).
110 0:839f52ef7657 louridas
*           Before entry, the leading  m by n  part of the array  C must
111 0:839f52ef7657 louridas
*           contain the matrix  C,  except when  beta  is zero, in which
112 0:839f52ef7657 louridas
*           case C need not be set on entry.
113 0:839f52ef7657 louridas
*           On exit, the array  C  is overwritten by the  m by n  matrix
114 0:839f52ef7657 louridas
*           ( alpha*op( A )*op( B ) + beta*C ).
115 0:839f52ef7657 louridas
*
116 0:839f52ef7657 louridas
*  LDC    - INTEGER.
117 0:839f52ef7657 louridas
*           On entry, LDC specifies the first dimension of C as declared
118 0:839f52ef7657 louridas
*           in  the  calling  (sub)  program.   LDC  must  be  at  least
119 0:839f52ef7657 louridas
*           max( 1, m ).
120 0:839f52ef7657 louridas
*           Unchanged on exit.
121 0:839f52ef7657 louridas
*
122 0:839f52ef7657 louridas
*
123 0:839f52ef7657 louridas
*  Level 3 Blas routine.
124 0:839f52ef7657 louridas
*
125 0:839f52ef7657 louridas
*  -- Written on 8-February-1989.
126 0:839f52ef7657 louridas
*     Jack Dongarra, Argonne National Laboratory.
127 0:839f52ef7657 louridas
*     Iain Duff, AERE Harwell.
128 0:839f52ef7657 louridas
*     Jeremy Du Croz, Numerical Algorithms Group Ltd.
129 0:839f52ef7657 louridas
*     Sven Hammarling, Numerical Algorithms Group Ltd.
130 0:839f52ef7657 louridas
*
131 0:839f52ef7657 louridas
*
132 0:839f52ef7657 louridas
*     .. External Functions ..
133 0:839f52ef7657 louridas
      LOGICAL            LSAME
134 0:839f52ef7657 louridas
      EXTERNAL           LSAME
135 0:839f52ef7657 louridas
*     .. External Subroutines ..
136 0:839f52ef7657 louridas
      EXTERNAL           XERBLA
137 0:839f52ef7657 louridas
*     .. Intrinsic Functions ..
138 0:839f52ef7657 louridas
      INTRINSIC          MAX
139 0:839f52ef7657 louridas
*     .. Local Scalars ..
140 0:839f52ef7657 louridas
      LOGICAL            NOTA, NOTB
141 0:839f52ef7657 louridas
      INTEGER            I, INFO, J, L, NCOLA, NROWA, NROWB
142 0:839f52ef7657 louridas
      Real(l_)   TEMP
143 0:839f52ef7657 louridas
*     .. Parameters ..
144 0:839f52ef7657 louridas
      Real(l_)   ONE         , ZERO
145 0:839f52ef7657 louridas
      PARAMETER( ONE = 1.0_l_, ZERO = 0.0_l_ )
146 0:839f52ef7657 louridas
*     ..
147 0:839f52ef7657 louridas
*     .. Executable Statements ..
148 0:839f52ef7657 louridas
*
149 0:839f52ef7657 louridas
*     Set  NOTA  and  NOTB  as  true if  A  and  B  respectively are not
150 0:839f52ef7657 louridas
*     transposed and set  NROWA, NCOLA and  NROWB  as the number of rows
151 0:839f52ef7657 louridas
*     and  columns of  A  and the  number of  rows  of  B  respectively.
152 0:839f52ef7657 louridas
*
153 0:839f52ef7657 louridas
      NOTA  = LSAME( TRANSA, 'N' )
154 0:839f52ef7657 louridas
      NOTB  = LSAME( TRANSB, 'N' )
155 0:839f52ef7657 louridas
      IF( NOTA )THEN
156 0:839f52ef7657 louridas
         NROWA = M
157 0:839f52ef7657 louridas
         NCOLA = K
158 0:839f52ef7657 louridas
      ELSE
159 0:839f52ef7657 louridas
         NROWA = K
160 0:839f52ef7657 louridas
         NCOLA = M
161 0:839f52ef7657 louridas
      END IF
162 0:839f52ef7657 louridas
      IF( NOTB )THEN
163 0:839f52ef7657 louridas
         NROWB = K
164 0:839f52ef7657 louridas
      ELSE
165 0:839f52ef7657 louridas
         NROWB = N
166 0:839f52ef7657 louridas
      END IF
167 0:839f52ef7657 louridas
*
168 0:839f52ef7657 louridas
*     Test the input parameters.
169 0:839f52ef7657 louridas
*
170 0:839f52ef7657 louridas
      INFO = 0
171 0:839f52ef7657 louridas
      IF(      ( .NOT.NOTA                 ).AND.
172 0:839f52ef7657 louridas
     $         ( .NOT.LSAME( TRANSA, 'C' ) ).AND.
173 0:839f52ef7657 louridas
     $         ( .NOT.LSAME( TRANSA, 'T' ) )      )THEN
174 0:839f52ef7657 louridas
         INFO = 1
175 0:839f52ef7657 louridas
      ELSE IF( ( .NOT.NOTB                 ).AND.
176 0:839f52ef7657 louridas
     $         ( .NOT.LSAME( TRANSB, 'C' ) ).AND.
177 0:839f52ef7657 louridas
     $         ( .NOT.LSAME( TRANSB, 'T' ) )      )THEN
178 0:839f52ef7657 louridas
         INFO = 2
179 0:839f52ef7657 louridas
      ELSE IF( M  .LT.0               )THEN
180 0:839f52ef7657 louridas
         INFO = 3
181 0:839f52ef7657 louridas
      ELSE IF( N  .LT.0               )THEN
182 0:839f52ef7657 louridas
         INFO = 4
183 0:839f52ef7657 louridas
      ELSE IF( K  .LT.0               )THEN
184 0:839f52ef7657 louridas
         INFO = 5
185 0:839f52ef7657 louridas
      ELSE IF( LDA.LT.MAX( 1, NROWA ) )THEN
186 0:839f52ef7657 louridas
         INFO = 8
187 0:839f52ef7657 louridas
      ELSE IF( LDB.LT.MAX( 1, NROWB ) )THEN
188 0:839f52ef7657 louridas
         INFO = 10
189 0:839f52ef7657 louridas
      ELSE IF( LDC.LT.MAX( 1, M     ) )THEN
190 0:839f52ef7657 louridas
         INFO = 13
191 0:839f52ef7657 louridas
      END IF
192 0:839f52ef7657 louridas
      IF( INFO.NE.0 )THEN
193 0:839f52ef7657 louridas
         CALL XERBLA( 'DGEMM ', INFO )
194 0:839f52ef7657 louridas
         RETURN
195 0:839f52ef7657 louridas
      END IF
196 0:839f52ef7657 louridas
*
197 0:839f52ef7657 louridas
*     Quick return if possible.
198 0:839f52ef7657 louridas
*
199 0:839f52ef7657 louridas
      IF( ( M.EQ.0 ).OR.( N.EQ.0 ).OR.
200 0:839f52ef7657 louridas
     $    ( ( ( ALPHA.EQ.ZERO ).OR.( K.EQ.0 ) ).AND.( BETA.EQ.ONE ) ) )
201 0:839f52ef7657 louridas
     $   RETURN
202 0:839f52ef7657 louridas
*
203 0:839f52ef7657 louridas
*     And if  alpha.eq.zero.
204 0:839f52ef7657 louridas
*
205 0:839f52ef7657 louridas
      IF( ALPHA.EQ.ZERO )THEN
206 0:839f52ef7657 louridas
         IF( BETA.EQ.ZERO )THEN
207 0:839f52ef7657 louridas
!$omp parallel do
208 0:839f52ef7657 louridas
            DO 20, J = 1, N
209 0:839f52ef7657 louridas
               DO 10, I = 1, M
210 0:839f52ef7657 louridas
                  C( I, J ) = ZERO
211 0:839f52ef7657 louridas
   10          CONTINUE
212 0:839f52ef7657 louridas
   20       CONTINUE
213 0:839f52ef7657 louridas
         ELSE
214 0:839f52ef7657 louridas
!$omp parallel do
215 0:839f52ef7657 louridas
            DO 40, J = 1, N
216 0:839f52ef7657 louridas
               DO 30, I = 1, M
217 0:839f52ef7657 louridas
                  C( I, J ) = BETA*C( I, J )
218 0:839f52ef7657 louridas
   30          CONTINUE
219 0:839f52ef7657 louridas
   40       CONTINUE
220 0:839f52ef7657 louridas
         END IF
221 0:839f52ef7657 louridas
         RETURN
222 0:839f52ef7657 louridas
      END IF
223 0:839f52ef7657 louridas
*
224 0:839f52ef7657 louridas
*     Start the operations.
225 0:839f52ef7657 louridas
*
226 0:839f52ef7657 louridas
      IF( NOTB )THEN
227 0:839f52ef7657 louridas
         IF( NOTA )THEN
228 0:839f52ef7657 louridas
*
229 0:839f52ef7657 louridas
*           Form  C := alpha*A*B + beta*C.
230 0:839f52ef7657 louridas
*
231 0:839f52ef7657 louridas
!$omp parallel do private(temp)
232 0:839f52ef7657 louridas
            DO 90, J = 1, N
233 0:839f52ef7657 louridas
               IF( BETA.EQ.ZERO )THEN
234 0:839f52ef7657 louridas
                  DO 50, I = 1, M
235 0:839f52ef7657 louridas
                     C( I, J ) = ZERO
236 0:839f52ef7657 louridas
   50             CONTINUE
237 0:839f52ef7657 louridas
               ELSE IF( BETA.NE.ONE )THEN
238 0:839f52ef7657 louridas
                  DO 60, I = 1, M
239 0:839f52ef7657 louridas
                     C( I, J ) = BETA*C( I, J )
240 0:839f52ef7657 louridas
   60             CONTINUE
241 0:839f52ef7657 louridas
               END IF
242 0:839f52ef7657 louridas
               DO 80, L = 1, K
243 0:839f52ef7657 louridas
                  IF( B( L, J ).NE.ZERO )THEN
244 0:839f52ef7657 louridas
                     TEMP = ALPHA*B( L, J )
245 0:839f52ef7657 louridas
                     DO 70, I = 1, M
246 0:839f52ef7657 louridas
                        C( I, J ) = C( I, J ) + TEMP*A( I, L )
247 0:839f52ef7657 louridas
   70                CONTINUE
248 0:839f52ef7657 louridas
                  END IF
249 0:839f52ef7657 louridas
   80          CONTINUE
250 0:839f52ef7657 louridas
   90       CONTINUE
251 0:839f52ef7657 louridas
         ELSE
252 0:839f52ef7657 louridas
*
253 0:839f52ef7657 louridas
*           Form  C := alpha*A'*B + beta*C
254 0:839f52ef7657 louridas
*
255 0:839f52ef7657 louridas
!$omp parallel do private(temp)
256 0:839f52ef7657 louridas
            DO 120, J = 1, N
257 0:839f52ef7657 louridas
               DO 110, I = 1, M
258 0:839f52ef7657 louridas
                  TEMP = ZERO
259 0:839f52ef7657 louridas
                  DO 100, L = 1, K
260 0:839f52ef7657 louridas
                     TEMP = TEMP + A( L, I )*B( L, J )
261 0:839f52ef7657 louridas
  100             CONTINUE
262 0:839f52ef7657 louridas
                  IF( BETA.EQ.ZERO )THEN
263 0:839f52ef7657 louridas
                     C( I, J ) = ALPHA*TEMP
264 0:839f52ef7657 louridas
                  ELSE
265 0:839f52ef7657 louridas
                     C( I, J ) = ALPHA*TEMP + BETA*C( I, J )
266 0:839f52ef7657 louridas
                  END IF
267 0:839f52ef7657 louridas
  110          CONTINUE
268 0:839f52ef7657 louridas
  120       CONTINUE
269 0:839f52ef7657 louridas
         END IF
270 0:839f52ef7657 louridas
      ELSE
271 0:839f52ef7657 louridas
         IF( NOTA )THEN
272 0:839f52ef7657 louridas
*
273 0:839f52ef7657 louridas
*           Form  C := alpha*A*B' + beta*C
274 0:839f52ef7657 louridas
*
275 0:839f52ef7657 louridas
!$omp parallel do private(temp)
276 0:839f52ef7657 louridas
            DO 170, J = 1, N
277 0:839f52ef7657 louridas
               IF( BETA.EQ.ZERO )THEN
278 0:839f52ef7657 louridas
                  DO 130, I = 1, M
279 0:839f52ef7657 louridas
                     C( I, J ) = ZERO
280 0:839f52ef7657 louridas
  130             CONTINUE
281 0:839f52ef7657 louridas
               ELSE IF( BETA.NE.ONE )THEN
282 0:839f52ef7657 louridas
                  DO 140, I = 1, M
283 0:839f52ef7657 louridas
                     C( I, J ) = BETA*C( I, J )
284 0:839f52ef7657 louridas
  140             CONTINUE
285 0:839f52ef7657 louridas
               END IF
286 0:839f52ef7657 louridas
               DO 160, L = 1, K
287 0:839f52ef7657 louridas
                  IF( B( J, L ).NE.ZERO )THEN
288 0:839f52ef7657 louridas
                     TEMP = ALPHA*B( J, L )
289 0:839f52ef7657 louridas
                     DO 150, I = 1, M
290 0:839f52ef7657 louridas
                        C( I, J ) = C( I, J ) + TEMP*A( I, L )
291 0:839f52ef7657 louridas
  150                CONTINUE
292 0:839f52ef7657 louridas
                  END IF
293 0:839f52ef7657 louridas
  160          CONTINUE
294 0:839f52ef7657 louridas
  170       CONTINUE
295 0:839f52ef7657 louridas
         ELSE
296 0:839f52ef7657 louridas
*
297 0:839f52ef7657 louridas
*           Form  C := alpha*A'*B' + beta*C
298 0:839f52ef7657 louridas
*
299 0:839f52ef7657 louridas
!$omp parallel do private(temp)
300 0:839f52ef7657 louridas
            DO 200, J = 1, N
301 0:839f52ef7657 louridas
               DO 190, I = 1, M
302 0:839f52ef7657 louridas
                  TEMP = ZERO
303 0:839f52ef7657 louridas
                  DO 180, L = 1, K
304 0:839f52ef7657 louridas
                     TEMP = TEMP + A( L, I )*B( J, L )
305 0:839f52ef7657 louridas
  180             CONTINUE
306 0:839f52ef7657 louridas
                  IF( BETA.EQ.ZERO )THEN
307 0:839f52ef7657 louridas
                     C( I, J ) = ALPHA*TEMP
308 0:839f52ef7657 louridas
                  ELSE
309 0:839f52ef7657 louridas
                     C( I, J ) = ALPHA*TEMP + BETA*C( I, J )
310 0:839f52ef7657 louridas
                  END IF
311 0:839f52ef7657 louridas
  190          CONTINUE
312 0:839f52ef7657 louridas
  200       CONTINUE
313 0:839f52ef7657 louridas
         END IF
314 0:839f52ef7657 louridas
      END IF
315 0:839f52ef7657 louridas
*
316 0:839f52ef7657 louridas
      RETURN
317 0:839f52ef7657 louridas
*
318 0:839f52ef7657 louridas
*     End of DGEMM .
319 0:839f52ef7657 louridas
*
320 0:839f52ef7657 louridas
      END