root / synthbench / euroben-dm / mod2ci / rgmres.f @ 0:839f52ef7657
History | View | Annotate | Download (5.6 kB)
1 |
Subroutine rgmres( n, nl, nel, mp, indx, rowp, matvals, q, x, b, |
---|---|
2 |
& gamma, maxit, tol, exnrm, prec ) |
3 |
! ---------------------------------------------------------------------- |
4 |
! --- rgmres does an iterative solve on a local part of Ax = b, where A |
5 |
! is in CRS format: |
6 |
! Integer indx(nel), |
7 |
! Integer rowp(n+1), and |
8 |
! Real(l_) matvals(nel). |
9 |
! --- The vectors b, x, and q are completely in memory but only a local |
10 |
! of x is updated (and combined later). |
11 |
! Real(l_) b(n) : The righthand side. |
12 |
! Real(l_) x(n) : On input the initial guess of the solution |
13 |
! On convergence 'x' contains the solution. |
14 |
! Real(l_) q(n) : Contains a (left) preconditioning vector. |
15 |
! Real(l_) gamma(m+1) : Polynomial coefficients used in the |
16 |
! preconditioning. |
17 |
! Integer maxit : The maximum number of iterations allowed. |
18 |
! --- Real(l_) tol : Tolerance used as a stop criterium. |
19 |
! Real(l_) exnrm : Contains the norm of the residual on exit. |
20 |
! External prec : Name of subroutine performing the |
21 |
! preconditioning. |
22 |
! ---------------------------------------------------------------------- |
23 |
Use numerics |
24 |
Use floptime |
25 |
Use mpi_module |
26 |
Implicit None |
27 |
|
28 |
Integer :: n, nl, nel, mp, maxit |
29 |
Integer :: indx(nel), rowp(nl+1) |
30 |
Real(l_) :: matvals(nel) |
31 |
Real(l_) :: q(n), x(n), b(n) |
32 |
Real(l_) :: gamma(mp+1), tol, exnrm |
33 |
|
34 |
External prec |
35 |
|
36 |
Integer :: i, iter, j, jc, k, k0, k1 |
37 |
Logical :: conv |
38 |
Integer, Parameter :: ib = 10, ir = 50 |
39 |
Real(l_) :: g(ir+2), rho(ir), rm(ir+1,ir+1), v(ib*n) |
40 |
Real(l_) :: alpha, beta, eta, rhstp, tau1, tau2 |
41 |
Real(l_) :: r(n), w(n), z(n) |
42 |
Real(l_) :: dotpr, nrm2 |
43 |
External dotpr, nrm2 |
44 |
! ---------------------------------------------------------------------- |
45 |
conv = .FALSE. |
46 |
rhstp = Sqrt( nrm2( nl, b(lb) ) ) |
47 |
Call spmxv( nl, nel, indx, rowp, matvals, x, w(lb) ) |
48 |
z(lb:gub) = b(lb:gub) - w(lb:gub) |
49 |
Call prec( nl, nel, mp, indx, rowp, matvals, z, r, gamma ) |
50 |
beta = Sqrt( nrm2( nl, r(lb) ) ) |
51 |
Do i = 1, maxit |
52 |
iter = i |
53 |
g(1) = beta |
54 |
g(2) = beta |
55 |
If ( beta == 0.0_l_ ) Then |
56 |
Print *, 'Stop: beta = 0'; Stop |
57 |
End If |
58 |
v(lb:gub) = r(lb:gub)/beta |
59 |
k0 = 0 |
60 |
Do j = 1, ib |
61 |
jc = j |
62 |
Call spmxv( nl, nel, indx, rowp, matvals, v(k0+1:k0+n), |
63 |
& w(lb) ) |
64 |
Call prec( nl, nel, mp, indx, rowp, matvals, w, z, gamma ) |
65 |
Call MPI_Allgatherv( z(lb), nl, rtyp, z, sizes, offset, |
66 |
& rtyp, comm, ierr ) |
67 |
k1 = 0 |
68 |
Do k = 1, j |
69 |
rm(k,j) = dotpr( n, v(k1+1), z ) |
70 |
k1 = k1 + n |
71 |
End Do |
72 |
k1 = 0 |
73 |
w = 0.0_l_ |
74 |
Do k = 1, j |
75 |
w(lb:gub) = w(lb:gub) + rm(k,j)*v(k1+lb:k1+gub) |
76 |
k1 = k1 + n |
77 |
flops = flops + 2*nl |
78 |
End Do |
79 |
w(lb:gub) = z(lb:gub) - w(lb:gub) |
80 |
rm(j+1,j) = Sqrt( nrm2( nl, w(lb) ) ) |
81 |
If ( rm(j+1,j) == 0.0_l_ ) Then |
82 |
Print *, ' Stop: r(j+1,j) = 0'; Stop |
83 |
End If |
84 |
k0 = k0 + n |
85 |
w(lb:gub) = w(lb:gub)/rm(j+1,j) |
86 |
v(k0+lb:k0+gub) = w(lb:gub) |
87 |
Do k = 1, j - 1 |
88 |
Call rotf( rho(k), alpha, eta ) |
89 |
tau1 = rm(k,j) |
90 |
tau2 = rm(k+1,j) |
91 |
rm(k,j) = alpha*tau1 - eta*tau2 |
92 |
rm(k+1,j) = eta*tau1 + alpha*tau2 |
93 |
flops = flops + 4 |
94 |
End Do |
95 |
Call givens( rm(j,j), rm(j+1,j), alpha, eta ) |
96 |
tau1 = rm(j,j) |
97 |
tau2 = rm(j+1,j) |
98 |
rm(j,j) = alpha*tau1 - eta*tau2 |
99 |
rm(j+1,j) = eta*tau1 + alpha*tau2 |
100 |
flops = flops + 4 |
101 |
Call rotb( rho(j), alpha, eta ) |
102 |
g(j) = g(j)*alpha |
103 |
g(j+1) = g(j+1)*eta |
104 |
g(j+2) = g(j+1) |
105 |
! |
106 |
! --- Test for convergence. |
107 |
! |
108 |
exnrm = Abs( g(j+1) ) |
109 |
conv = exnrm < rhstp |
110 |
If ( conv ) Exit |
111 |
End Do ! <--- End j-loop |
112 |
Call lsqslv( jc, ir+2, rm, g ) |
113 |
k1 = 0 |
114 |
Do k = 1, jc |
115 |
x(lb:gub) = x(lb:gub) + g(k)*v(k1+lb:k1+gub) |
116 |
k1 = k1 + n |
117 |
flops = flops + 2*nl |
118 |
End Do |
119 |
If ( conv ) Return ! <--- Convergence |
120 |
Call spmxv( nl, nel, indx, rowp, matvals, x, w(lb) ) |
121 |
z(lb:gub) = b(lb:gub) - w(lb:gub) |
122 |
Call prec( nl, nel, mp, indx, rowp, matvals, z, r, gamma ) |
123 |
beta = Sqrt( nrm2( nl, r(lb) ) ) |
124 |
End Do ! <--- End i-loop |
125 |
! ---------------------------------------------------------------------- |
126 |
! --- Normally we would end up here and with no convergence issue a |
127 |
! warning. Because we only want to see the residual value and |
128 |
! the speed for benchmarking purposes, we comment out the following |
129 |
! lines: |
130 |
! |
131 |
! If ( iter >= maxit ) Then |
132 |
! iter = maxit |
133 |
! Print *, 'No convergence in ', maxit, ' iterations;' |
134 |
! Print *, 'Norm of residual =', exnrm |
135 |
! Stop |
136 |
! End If |
137 |
flops = flops + nl + 9 + iter*( 2*nl + 9 ) ! <--- # of rest flops. |
138 |
! ---------------------------------------------------------------------- |
139 |
End Subroutine rgmres |