root / synthbench / euroben-dm / mod2ci / .svn / text-base / tfqmr.f.svn-base @ 0:839f52ef7657
History | View | Annotate | Download (5.5 kB)
1 |
Subroutine tfqmr( n, nl, nel, m, indx, rowp, matvals, q, x, b, |
---|---|
2 |
& gamma, maxit, tol, exnrm, prec ) |
3 |
! ---------------------------------------------------------------------- |
4 |
! --- tfqmr does an iterative solve of Ax = b, where A is in CRS format: |
5 |
! Integer indx(nel), |
6 |
! Integer rowp(nl+1), and |
7 |
! Real(l_) matvals(nel). |
8 |
! Real(l_) b(n) : The righthand side. |
9 |
! Real(l_) x(n) : On input the initial guess of the solution |
10 |
! On convergence 'x' contains the solution. |
11 |
! Real(l_) q(n) : Contains a (left) preconditioning vector. |
12 |
! Real(l_) gamma(m+1) : Polynomial coefficients used in the |
13 |
! preconditioning. |
14 |
! Integer maxit : The maximum number of iterations allowed. |
15 |
! --- Real(l_) tol : Tolerance used as a stop criterium. |
16 |
! External prec : Name of subroutine performing the |
17 |
! preconditioning. |
18 |
! ---------------------------------------------------------------------- |
19 |
Use numerics |
20 |
Use floptime |
21 |
Use mpi_module |
22 |
Implicit None |
23 |
|
24 |
Integer :: n, nl, nel, m, maxit |
25 |
Integer :: indx(nel), rowp(nl+1) |
26 |
Real(l_) :: matvals(nel) |
27 |
Real(l_) :: q(n), x(n), b(n) |
28 |
Real(l_) :: gamma(m+1), tol, exnrm |
29 |
|
30 |
External prec |
31 |
|
32 |
Integer :: i, im, im0, iter |
33 |
Real(l_) :: alpha, beta, c, eta, eta0, kappa, rho, rho0, rhstp, |
34 |
& sigma, tau, tau0, theta, theta0 |
35 |
Real(l_) :: d(n), g(n), h(n), p(n), r(n), rt(n), v(n), w(n), |
36 |
& y(n), y0(n), z(n) |
37 |
Real(l_) :: dotpr, nrm2 |
38 |
External dotpr, nrm2 |
39 |
! ---------------------------------------------------------------------- |
40 |
Call spmxv( nl, nel, indx, rowp, matvals, x, w(lb) ) |
41 |
z(lb:gub) = b(lb:gub) - w(lb:gub) |
42 |
Call prec( nl, nel, m, indx, rowp, matvals, z, r, gamma ) |
43 |
w(lb:gub) = r(lb:gub) |
44 |
y(lb:gub) = r(lb:gub) |
45 |
Call spmxv( nl, nel, indx, rowp, matvals, y, z(lb) ) |
46 |
Call prec( nl, nel, m, indx, rowp, matvals, z, g, gamma ) |
47 |
v(lb:gub) = g(lb:gub) |
48 |
d = 0.0_l_ |
49 |
tau = Sqrt( nrm2( nl, r(lb) ) ) |
50 |
theta = 0.0_l_ |
51 |
eta = 0.0_l_ |
52 |
rt(lb:gub) = r(lb:gub) |
53 |
! |
54 |
! --- We can use 'nrm2' instead of dotpr because rt = r. |
55 |
! |
56 |
rho = nrm2( nl, r(lb) ) |
57 |
rhstp = tol*Sqrt( nrm2( nl, b(lb) ) ) |
58 |
im0 = 1 |
59 |
Do i = 1, maxit |
60 |
iter = i |
61 |
Call MPI_Allgatherv( rt(lb), nl, rtyp, rt, sizes, offset, |
62 |
& rtyp, comm, ierr ) |
63 |
Call MPI_Allgatherv( v(lb), nl, rtyp, v, sizes, offset, |
64 |
& rtyp, comm, ierr ) |
65 |
sigma = dotpr( n, rt, v ) |
66 |
If ( sigma == 0.0_l_ ) Then |
67 |
Print *, 'Stop: sigma = 0'; Stop |
68 |
End If |
69 |
alpha = rho/sigma |
70 |
y0(lb:gub) = y(lb:gub) |
71 |
y(lb:gub) = y(lb:gub) - alpha*v(lb:gub) |
72 |
Call spmxv( nl, nel, indx, rowp, matvals, y, z(lb) ) |
73 |
Call prec( nl, nel, m, indx, rowp, matvals, z, h, gamma ) |
74 |
Do im = im0, im0 + 1 |
75 |
w(lb:gub) = w(lb:gub) - alpha*g(lb:gub) |
76 |
theta0 = theta |
77 |
tau0 = tau |
78 |
If ( tau0 == 0.0_l_ ) Then |
79 |
Print *, 'Stop: tau0 = 0'; Stop |
80 |
End If |
81 |
theta = Sqrt( nrm2( nl, w(lb) ) )/tau |
82 |
c = 1.0_l_/Sqrt( 1.0_l_ + theta*theta ) |
83 |
tau = tau0*theta*c |
84 |
eta0 = eta |
85 |
eta = c*c*alpha |
86 |
If ( alpha == 0.0_l_ ) Then |
87 |
Print *, 'Stop: alpha = 0'; Stop |
88 |
End If |
89 |
d(lb:gub) = y0(lb:gub)+(theta0*theta0*eta0/alpha)*d(lb:gub) |
90 |
x(lb:gub) = x(lb:gub) + eta*d(lb:gub) |
91 |
exnrm = Sqrt( nrm2( nl, r(lb) ) ) |
92 |
kappa = Sqrt( Real( im + 1, l_ ) )*tau |
93 |
If ( kappa < tol ) Then |
94 |
Call spmxv( nl, nel, indx, rowp, matvals, x, p(lb) ) |
95 |
z(lb:gub) = b(lb:gub) - p(lb:gub) |
96 |
Call prec( nl, nel, m, indx, rowp, matvals, z, r, gamma ) |
97 |
exnrm = Sqrt( nrm2( nl, r(lb) ) ) |
98 |
If ( exnrm < rhstp ) Go To 10 ! <--- Convergence. |
99 |
flops = flops + n + 9 |
100 |
End If |
101 |
y0(lb:gub) = y(lb:gub) |
102 |
g(lb:gub) = h(lb:gub) |
103 |
End Do ! <--- im-loop |
104 |
rho0 = rho |
105 |
Call MPI_Allgatherv( w(lb), nl, rtyp, w, sizes, offset, |
106 |
& rtyp, comm, ierr ) |
107 |
rho = dotpr( n, rt, w ) |
108 |
If ( rho0 == 0.0_l_ ) Then |
109 |
Print *, 'Stop: rho0 = 0'; Stop |
110 |
End If |
111 |
beta = rho/rho0 |
112 |
y(lb:gub) = w(lb:gub) + beta*y0(lb:gub) |
113 |
Call spmxv( nl, nel, indx, rowp, matvals, y, z(lb) ) |
114 |
Call prec( nl, nel, m, indx, rowp, matvals, z, g, gamma ) |
115 |
v(lb:gub) = g(lb:gub) + beta*( h(lb:gub) + beta*v(lb:gub) ) |
116 |
im0 = im0 + 2 |
117 |
End Do ! <--- i-loop |
118 |
! ---------------------------------------------------------------------- |
119 |
! --- Normally we would end up here and with no convergence issue a |
120 |
! warning. Because we only want to see the residual value and |
121 |
! the speed for benchmarking purposes, we comment out the following |
122 |
! lines: |
123 |
! |
124 |
! If ( iter >= maxit ) Then |
125 |
! iter = maxit |
126 |
! Print *, 'No convergence in ', maxit, ' iterations;' |
127 |
! Print *, 'Norm of residual =', exnrm |
128 |
! Stop |
129 |
! End If |
130 |
10 flops = flops + nl + 10 + iter*( 22*nl + 79 ) ! <--- # of flops. |
131 |
! ---------------------------------------------------------------------- |
132 |
End Subroutine tfqmr |