copyright (C) 2001 MSC-RPN COMM %%%MC2%%%
#include "precision.cdk"
subroutine fgmres (n,im,rhs,sol,i,vv,w,wk2,wrk8,nnk, 1,9
* eps,maxits,iout,icode,its)
implicit none
#include "partopo.cdk"
#include "nbcpu.cdk"
integer n, im, maxits, iout, icode, nnk, its
FLOAT rhs(*), sol(*), vv(n,im+1),w(n,im), wk2(n), eps
real*8 wrk8(nnk)
c-----------------------------------------------------------------------
c
c flexible GMRES routine which allows a variable preconditioner.
c Implemented with a reverse communication protocol for flexibility -
c explicit (exact) residual norms for restarts
c
c written by Y. Saad, modified by A. Malevsky, version February 1, 1995
c
c-----------------------------------------------------------------------
c
c Reverse Communication Implementation.
c
c-------------------------------------------------
c USAGE: (see also comments for icode below). CGMRES
c should be put in a loop and the loop should be active for as
c long as icode is not equal to 0. On return fgmres will
c 1) either be requesting the new preconditioned vector applied
c to wk1 in case icode.eq.1 (result should be put in wk2)
c 2) or be requesting the product of A applied to the vector wk1
c in case icode.eq.2 (result should be put in wk2)
c 3) or be terminated in case icode .eq. 0.
c on entry always set icode = 0. So icode should be set back to zero
c upon convergence.
c-----------------------------------------------------------------------
c Here is a typical way of running fgmres:
c
c icode = 0
c 1 continue
c call fgmres (n,im,rhs,sol,i,vv,w,wk1,wk2,eps,maxits,iout,icode)
c
c if (icode .eq. 1) then
c call precon(n, wk1, wk2) <--- user's variable preconditioning
c goto 1
c else if (icode .ge. 2) then
c call matvec (n,wk1, wk2) <--- user's matrix vector product.
c goto 1
c else
c ----- done ----
c .........
c
c----------------------------------- INPUT -----------------------------
c
c n == integer. the dimension of the problem
c
c im == size of Krylov subspace: should not exceed 50 in this
c version (can be reset in code. looking at comment below)
c
c rhs == vector of length n containing the right hand side
c
c sol == initial guess on input, approximate solution on output
c
c vv == work space of size n x (im+1)
c
c w == work space of length n x im
c
c wk1,
c wk2, == two work vectors of length n each used for the reverse
c communication protocole. When on return (icode .ne. 1)
c the user should call fgmres again with wk2 = precon * wk1
c and icode untouched. When icode.eq.1 then it means that
c convergence has taken place.
c
c eps == tolerance for stopping criterion. process is stopped
c as soon as ( ||.|| is the euclidean norm):
c || current residual||/||initial residual|| <= eps
c
c maxits== maximum number of iterations allowed
c
c iout == output unit number number for printing intermediate results
c if (iout .le. 0) no statistics are printed.
c
c icode = integer. indicator for the reverse communication protocole.
c ON ENTRY : icode should be set to icode = 0.
c ON RETURN:
c * icode .eq. 1 value means that fgmres has not finished
c and that it is requesting a preconditioned vector before
c continuing. The user must compute M**(-1) wk1, where M is
c the preconditioing matrix (may vary at each call) and wk1 is
c the vector as provided by fgmres upun return, and put the
c result in wk2. Then fgmres must be called again without
c changing any other argument.
c * icode .eq. 2 value means that fgmres has not finished
c and that it is requesting a matrix vector product before
c continuing. The user must compute A * wk1, where A is the
c coefficient matrix and wk1 is the vector provided by
c upon return. The result of the operation is to be put in
c the vector wk2. Then fgmres must be called again without
c changing any other argument.
c * icode .eq. 0 means that fgmres has finished and sol contains
c the approximate solution.
c comment: typically fgmres must be implemented in a loop
c with fgmres being called as long icode is returned with
c a value .ne. 0.
c------------------------------ CALLS ----------------------------------
c
c local variables
FLOAT hh(51,50), c(50), s(50), rs(51), t, ro
real*8 fdotp,fdotp2
integer i,j,k,i1,k1,n1,ii,jj
real epsmac,eps1,r0,gam
common /gmresf/ hh,c,s,rs,ro
common /gmresr/ epsmac,eps1,r0,gam
common /gmresi/ i1,k1,ii,jj
* data epsmac/1.d-16/
data epsmac/1.d-6/
*
*---------------------------------------------------------------------
*
goto (100,200,300) icode + 1
*
100 if (myproc .eq. 0) then
print*, 'FGMRES solver has converge but ',
$ 'solution was not considered by callee -- ABORT --'
call mc2stop
(-1)
endif
*
* outer loop starts here..
*
*--------------compute initial residual vector --------------
*
200 do j=1,n
vv(j,1) = rhs(j) - wk2(j)
end do
*
20 t = fdotp2
(n, vv, 1, vv, 1, wrk8, nnk)
ro = SQRT (t)
*
if (ro .eq. 0.0e0) goto 999
t = 1.0e0/ ro
*
call vecmcte
(vv,t,n)
*
if (its .eq. 0) eps1=eps*ro
if (its .eq. 0) r0 = ro
if (iout .gt. 0 .and. myproc .eq. 0) write(iout, 199) its, ro/r0
*
* initialize 1-st term of rhs of hessenberg system..
*
rs(1) = ro
i = 0
*
4 i = i + 1
its = its + 1
i1 = i + 1
*
icode = 2
goto 9991
*
* first call to ope corresponds to intialization goto back to 11.
* modified gram - schmidt...
*
300 do j=1, i
t = fdotp2
(n, vv(1,j), 1, vv(1,i1), 1, wrk8, nnk)
hh(j,i) = t
call myxpy2
(n, -t, vv(1,j), vv(1,i1))
end do
*
t = SQRT ( fdotp2
(n, vv(1,i1), 1, vv(1,i1), 1, wrk8, nnk) )
hh(i1,i) = t
if (t .eq. 0.0e0) goto 58
t = 1.0e0 / t
call vecmcte
(vv(1,i1),t,n)
*
* done with modified gram schimdt and arnoldi step.
* now update factorization of hh
*
58 if (i .eq. 1) goto 121
*
* perfrom previous transformations on i-th column of h
do k=2,i
k1 = k-1
t = hh(k1,i)
hh(k1,i) = c(k1)*t + s(k1)*hh(k,i)
hh(k ,i) = -s(k1)*t + c(k1)*hh(k,i)
end do
*
121 gam = SQRT(hh(i,i)**2 + hh(i1,i)**2)
if (gam .eq. 0.0e0) gam = epsmac
*
* determine next plane rotation
*
c(i) = hh( i,i)/gam
s(i) = hh(i1,i)/gam
rs(i1) = -s(i)*rs(i)
rs(i ) = c(i)*rs(i)
*
* determine residual norm and test for convergence
*
hh(i,i) = c(i)*hh(i,i) + s(i)*hh(i1,i)
ro = abs(rs(i1))
*
if (iout .gt. 0 .and. myproc .eq. 0) write(iout, 199) its, ro/r0
if (i .lt. im .and. (ro .gt. eps1)) goto 4
*
* now compute solution. first solve upper triangular system.
rs(i) = rs(i)/hh(i,i)
do ii=2,i
k = i-ii+1
k1 = k+1
t = rs(k)
do j=k1,i
t = t-hh(k,j)*rs(j)
end do
rs(k) = t/hh(k,k)
end do
* done with back substitution..
* now form linear combination to get solution
*
do j=1, i
t = rs(j)
call myxpy2
(n, t, w(1,j), sol)
end do
*
* test for return
*
if (ro .le. eps1 .or. its .ge. maxits) goto 999
*
* else compute residual vector and continue..
*
do j=1,i
jj = i1-j+1
rs(jj-1) = -s(jj-1)*rs(jj)
rs(jj ) = c(jj-1)*rs(jj)
end do
do j=1,i1
t = rs(j)
if (j .eq. 1) t = t-1.0e0
call myxpy2
(n, t, vv(1,j), vv)
end do
*
* restart outer loop.
*
goto 20
*
999 icode = 0
*
199 format(' -- fmgres its =', i4, ' res. norm =', d20.6)
*
*---------------------------------------------------------------------
*
9991 return
end
*
#include "precision.cdk"
subroutine mycopy (n, x, y)
implicit none
integer n,j
FLOAT x(n),y(n)
*
#ifdef NEC
do j = 1,n
y(j) = x(j)
end do
#else
call COPY (n, x, 1, y, 1)
#endif
*
return
end
#include "precision.cdk"
subroutine mycopy2 (n, x, y)
implicit none
#include "nbcpu.cdk"
integer n,j
FLOAT x(n),y(n)
*
#ifdef NEC
*PDIR PARDO FOR=ncpudyn
do j = 1,n
y(j) = x(j)
end do
#else
call COPY (n, x, 1, y, 1)
#endif
*
return
end
*
#include "precision.cdk"
subroutine myxpy (n, c, x, y)
implicit none
integer n,j
FLOAT x(n),y(n),c
*
#ifdef NEC
do j = 1,n
y(j) = y(j) + c*x(j)
end do
#else
call AXPY (n, c, x, 1, y, 1)
#endif
*
return
end
#include "precision.cdk"
subroutine myxpy2 (n, c, x, y) 3
implicit none
#include "nbcpu.cdk"
integer n,j
FLOAT x(n),y(n),c
*
#ifdef NEC
*PDIR PARDO FOR=ncpudyn
do j = 1,n
y(j) = y(j) + c*x(j)
end do
#else
call AXPY (n, c, x, 1, y, 1)
#endif
*
return
end
*
#include "precision.cdk"
subroutine v1mv2 (d,s1,s2,n)
implicit none
#include "nbcpu.cdk"
integer n,j
FLOAT d(*),s1(*),s2(*)
*PDIR PARDO FOR=ncpudyn
do j=1,n
d(j) = s1(j) - s2(j)
end do
*
return
end
#include "precision.cdk"
subroutine vecmcte (d,cte,n) 2
implicit none
#include "nbcpu.cdk"
integer n,j
FLOAT d(*),cte
*PDIR PARDO FOR=ncpudyn
do j=1,n
d(j) = d(j) * cte
end do
*
return
end