root / synthbench / euroben / mod2ci / rgmres.f
History | View | Annotate | Download (5.1 kB)
1 | 0:839f52ef7657 | louridas | Subroutine rgmres( n, nel, m, indx, rowp, matvals, q, x, b, gamma, |
---|---|---|---|
2 | 0:839f52ef7657 | louridas | & maxit, tol, exnrm, prec ) |
3 | 0:839f52ef7657 | louridas | ! ---------------------------------------------------------------------- |
4 | 0:839f52ef7657 | louridas | ! --- tfqmr does an iterative solve of Ax = b, where A is in CRS format: |
5 | 0:839f52ef7657 | louridas | ! Integer indx(nel), |
6 | 0:839f52ef7657 | louridas | ! Integer rowp(n+1), and |
7 | 0:839f52ef7657 | louridas | ! Real(l_) matvals(nel). |
8 | 0:839f52ef7657 | louridas | ! Real(l_) b(n) : The righthand side. |
9 | 0:839f52ef7657 | louridas | ! Real(l_) x(n) : On input the initial guess of the solution |
10 | 0:839f52ef7657 | louridas | ! On convergence 'x' contains the solution. |
11 | 0:839f52ef7657 | louridas | ! Real(l_) q(n) : Contains a (left) preconditioning vector. |
12 | 0:839f52ef7657 | louridas | ! Real(l_) gamma(m+1) : Polynomial coefficients used in the |
13 | 0:839f52ef7657 | louridas | ! preconditioning. |
14 | 0:839f52ef7657 | louridas | ! Integer maxit : The maximum number of iterations allowed. |
15 | 0:839f52ef7657 | louridas | ! --- Real(l_) tol : Tolerance used as a stop criterium. |
16 | 0:839f52ef7657 | louridas | ! Real(l_) exnrm : Contains the norm of the residual on exit. |
17 | 0:839f52ef7657 | louridas | ! External prec : Name of subroutine performing the |
18 | 0:839f52ef7657 | louridas | ! preconditioning. |
19 | 0:839f52ef7657 | louridas | ! ---------------------------------------------------------------------- |
20 | 0:839f52ef7657 | louridas | Use numerics |
21 | 0:839f52ef7657 | louridas | Use floptime |
22 | 0:839f52ef7657 | louridas | Implicit None |
23 | 0:839f52ef7657 | louridas | |
24 | 0:839f52ef7657 | louridas | Integer :: n, nel, m, maxit |
25 | 0:839f52ef7657 | louridas | Integer :: indx(nel), rowp(n+1) |
26 | 0:839f52ef7657 | louridas | Real(l_) :: matvals(nel) |
27 | 0:839f52ef7657 | louridas | Real(l_) :: q(n), x(n), b(n) |
28 | 0:839f52ef7657 | louridas | Real(l_) :: gamma(m+1), tol, exnrm |
29 | 0:839f52ef7657 | louridas | |
30 | 0:839f52ef7657 | louridas | External prec |
31 | 0:839f52ef7657 | louridas | |
32 | 0:839f52ef7657 | louridas | Integer :: i, iter, j, jc, k, k0, k1 |
33 | 0:839f52ef7657 | louridas | Logical :: conv |
34 | 0:839f52ef7657 | louridas | Integer, Parameter :: ib = 10, ir = 50 |
35 | 0:839f52ef7657 | louridas | Real(l_) :: g(ir+2), rho(ir), rm(ir+1,ir+1), v(ib*n) |
36 | 0:839f52ef7657 | louridas | Real(l_) :: alpha, beta, eta, rhstp, tau1, tau2 |
37 | 0:839f52ef7657 | louridas | Real(l_) :: r(n), w(n), z(n) |
38 | 0:839f52ef7657 | louridas | Real(l_) :: dotpr, nrm2 |
39 | 0:839f52ef7657 | louridas | External dotpr, nrm2 |
40 | 0:839f52ef7657 | louridas | ! ---------------------------------------------------------------------- |
41 | 0:839f52ef7657 | louridas | conv = .FALSE. |
42 | 0:839f52ef7657 | louridas | rhstp = Sqrt( nrm2( n, b ) ) |
43 | 0:839f52ef7657 | louridas | Call spmxv( n, nel, indx, rowp, matvals, x, w ) |
44 | 0:839f52ef7657 | louridas | z = b - w |
45 | 0:839f52ef7657 | louridas | Call prec( n, nel, m, indx, rowp, matvals, z, r, gamma ) |
46 | 0:839f52ef7657 | louridas | beta = Sqrt( nrm2( n, r ) ) |
47 | 0:839f52ef7657 | louridas | Do i = 1, maxit |
48 | 0:839f52ef7657 | louridas | iter = i |
49 | 0:839f52ef7657 | louridas | g(1) = beta |
50 | 0:839f52ef7657 | louridas | g(2) = beta |
51 | 0:839f52ef7657 | louridas | If ( beta == 0.0_l_ ) Then |
52 | 0:839f52ef7657 | louridas | Print *, 'Stop: beta = 0'; Stop |
53 | 0:839f52ef7657 | louridas | End If |
54 | 0:839f52ef7657 | louridas | v(1:n) = r/beta |
55 | 0:839f52ef7657 | louridas | k0 = 0 |
56 | 0:839f52ef7657 | louridas | Do j = 1, ib |
57 | 0:839f52ef7657 | louridas | jc = j |
58 | 0:839f52ef7657 | louridas | Call spmxv( n, nel, indx, rowp, matvals, v(k0+1:k0+n), w ) |
59 | 0:839f52ef7657 | louridas | Call prec( n, nel, m, indx, rowp, matvals, w, z, gamma ) |
60 | 0:839f52ef7657 | louridas | k1 = 0 |
61 | 0:839f52ef7657 | louridas | Do k = 1, j |
62 | 0:839f52ef7657 | louridas | rm(k,j) = Dotpr( n, v(k1+1), z ) |
63 | 0:839f52ef7657 | louridas | k1 = k1 + n |
64 | 0:839f52ef7657 | louridas | End Do |
65 | 0:839f52ef7657 | louridas | k1 = 0 |
66 | 0:839f52ef7657 | louridas | w = 0.0_l_ |
67 | 0:839f52ef7657 | louridas | Do k = 1, j |
68 | 0:839f52ef7657 | louridas | w = w + rm(k,j)*v(k1+1:k1+n) |
69 | 0:839f52ef7657 | louridas | k1 = k1 + n |
70 | 0:839f52ef7657 | louridas | flops = flops + 2*n |
71 | 0:839f52ef7657 | louridas | End Do |
72 | 0:839f52ef7657 | louridas | w = z - w |
73 | 0:839f52ef7657 | louridas | rm(j+1,j) = Sqrt( nrm2( n, w ) ) |
74 | 0:839f52ef7657 | louridas | If ( rm(j+1,j) == 0.0_l_ ) Then |
75 | 0:839f52ef7657 | louridas | Print *, ' Stop: r(j+1,j) = 0'; Stop |
76 | 0:839f52ef7657 | louridas | End If |
77 | 0:839f52ef7657 | louridas | k0 = k0 + n |
78 | 0:839f52ef7657 | louridas | v(k0+1:k0+n) = w/rm(j+1,j) |
79 | 0:839f52ef7657 | louridas | Do k = 1, j - 1 |
80 | 0:839f52ef7657 | louridas | Call rotf( rho(k), alpha, eta ) |
81 | 0:839f52ef7657 | louridas | tau1 = rm(k,j) |
82 | 0:839f52ef7657 | louridas | tau2 = rm(k+1,j) |
83 | 0:839f52ef7657 | louridas | rm(k,j) = alpha*tau1 - eta*tau2 |
84 | 0:839f52ef7657 | louridas | rm(k+1,j) = eta*tau1 + alpha*tau2 |
85 | 0:839f52ef7657 | louridas | flops = flops + 4 |
86 | 0:839f52ef7657 | louridas | End Do |
87 | 0:839f52ef7657 | louridas | Call givens( rm(j,j), rm(j+1,j), alpha, eta ) |
88 | 0:839f52ef7657 | louridas | tau1 = rm(j,j) |
89 | 0:839f52ef7657 | louridas | tau2 = rm(j+1,j) |
90 | 0:839f52ef7657 | louridas | rm(j,j) = alpha*tau1 - eta*tau2 |
91 | 0:839f52ef7657 | louridas | rm(j+1,j) = eta*tau1 + alpha*tau2 |
92 | 0:839f52ef7657 | louridas | flops = flops + 4 |
93 | 0:839f52ef7657 | louridas | Call rotb( rho(j), alpha, eta ) |
94 | 0:839f52ef7657 | louridas | g(j) = g(j)*alpha |
95 | 0:839f52ef7657 | louridas | g(j+1) = g(j+1)*eta |
96 | 0:839f52ef7657 | louridas | g(j+2) = g(j+1) |
97 | 0:839f52ef7657 | louridas | ! |
98 | 0:839f52ef7657 | louridas | ! --- Test for convergence. |
99 | 0:839f52ef7657 | louridas | ! |
100 | 0:839f52ef7657 | louridas | exnrm = Abs( g(j+1) ) |
101 | 0:839f52ef7657 | louridas | conv = exnrm < rhstp |
102 | 0:839f52ef7657 | louridas | If ( conv ) Exit |
103 | 0:839f52ef7657 | louridas | End Do ! <--- End k-loop |
104 | 0:839f52ef7657 | louridas | Call lsqslv( jc, ir+2, rm, g ) |
105 | 0:839f52ef7657 | louridas | k1 = 0 |
106 | 0:839f52ef7657 | louridas | Do k = 1, jc |
107 | 0:839f52ef7657 | louridas | x = x + g(k)*v(k1+1:k1+n) |
108 | 0:839f52ef7657 | louridas | k1 = k1 + n |
109 | 0:839f52ef7657 | louridas | flops = flops + 2*n |
110 | 0:839f52ef7657 | louridas | End Do |
111 | 0:839f52ef7657 | louridas | If ( conv ) Return ! <--- Convergence |
112 | 0:839f52ef7657 | louridas | Call spmxv( n, nel, indx, rowp, matvals, x, w ) |
113 | 0:839f52ef7657 | louridas | z = b - w |
114 | 0:839f52ef7657 | louridas | Call prec( n, nel, m, indx, rowp, matvals, z, r, gamma ) |
115 | 0:839f52ef7657 | louridas | beta = Sqrt( nrm2( n, r ) ) |
116 | 0:839f52ef7657 | louridas | End Do ! <--- End i-loop |
117 | 0:839f52ef7657 | louridas | ! ---------------------------------------------------------------------- |
118 | 0:839f52ef7657 | louridas | ! --- Normally we would end up here and with no convergence issue a |
119 | 0:839f52ef7657 | louridas | ! warning. Because we only want to see the residual value and |
120 | 0:839f52ef7657 | louridas | ! the speed for benchmarking purposes, we comment out the following |
121 | 0:839f52ef7657 | louridas | ! lines: |
122 | 0:839f52ef7657 | louridas | ! |
123 | 0:839f52ef7657 | louridas | ! If ( iter >= maxit ) Then |
124 | 0:839f52ef7657 | louridas | ! iter = maxit |
125 | 0:839f52ef7657 | louridas | ! Print *, 'No convergence in ', maxit, ' iterations;' |
126 | 0:839f52ef7657 | louridas | ! Print *, 'Norm of residual =', exnrm |
127 | 0:839f52ef7657 | louridas | ! Stop |
128 | 0:839f52ef7657 | louridas | ! End If |
129 | 0:839f52ef7657 | louridas | flops = flops + n + 9 + iter*( 2*n + 9 ) ! <--- # of rest flops. |
130 | 0:839f52ef7657 | louridas | ! ---------------------------------------------------------------------- |
131 | 0:839f52ef7657 | louridas | End Subroutine rgmres |