subroutine gcrk ( p,wk,wk2,x,ax,nis,maxits,diagres,icode,its ) 1,8
implicit none
*
logical diagres
integer nis, maxits, icode, its
real*8 p(nis), x(nis,*), ax(nis,*), wk(nis,3), wk2(nis)
*
#include "gcrk.cdk"
#include "partopo.cdk"
*
integer k, err
**
*---------------------------------------------------------------------
*
* Computed GOTO
*
GOTO (10,20,30) icode + 1
*
10 if (myproc .eq. 0) then
print*, 'GCRK solver has converge but ',
$ 'solution was not considered by callee -- ABORT --'
call mc2stop
(-1)
endif
*
20 continue
!$omp do
do k = 1, nn
ax(k,1) = wk2(k)
x(k,1) = wk(k,2)
end do
!$omp enddo
*
if (diagres) then
call dotp_ab
(res,wk,wk,numproc)
!$omp single
if (myproc.eq.0) WRITE (6, '(A,I4,A,E20.6)')
$ ' -- gcr(k) its = ', its, ' res. norm = ', sqrt(res)
!$omp end single
endif
*
* GCR(k) iteration loop
*
5 continue
!$omp barrier
!$omp single
l = 0
!$omp end single
4 continue
!$omp barrier
!$omp single
l = l + 1
its = its + 1
!$omp end single
*
call dotp_ab_bb
(beta,ax2(l),wk,ax(1,l),numproc)
*
!$omp do
do k=1,nn
p(k) = p(k) + beta* x(k,l)
wk(k,1) = wk(k,1) - beta*ax(k,l)
end do
!$omp enddo
*
call dotp_ab
(dvmx,wk,wk,numproc)
*
!$omp single
dvmx = sqrt(dvmx)
!$omp end single
*
IF ( dvmx <= eps .OR. its >= maxits ) GOTO 999
*
icode = 2
c RETURN
goto 888
*
30 continue
*
IF ( l < nksp ) THEN
*
IF ( l.eq.1 ) THEN
call dotp_ab
(axar,ax,wk2,numproc)
!$omp single
del(1) = -axar(1)/ax2(1)
!$omp end single
!$omp do
do k=1,nn
ax(k,2) = wk2(k ) + del(1)*ax(k,1)
x(k,2) = wk (k,2) + del(1)* x(k,1)
end do
!$omp enddo
ENDIF
*
IF ( l.eq.2 ) THEN
call dotp_ab_cb
(ax,wk2,ax(1,2),numproc)
!$omp do
do k=1,nn
ax(k,3) = wk2(k ) + del(1)*ax(k,1) + del(2)*ax(k,2)
x (k,3) = wk(k,2) + del(1)* x(k,1) + del(2)* x(k,2)
end do
!$omp enddo
endif
*
ELSE
*
call dotp_ab_cb_db
(ax,wk2,ax(1,2),ax(1,3),numproc)
!$omp do
do k=1,nn
ax(k,1) = wk2(k ) + del(1)*ax(k,1) + del(2)*ax(k,2)
$ + del(3)*ax(k,3)
x (k,1) = wk(k,2) + del(1)* x(k,1) + del(2)* x(k,2)
$ + del(3)* x(k,3)
end do
!$omp enddo
*
ENDIF
*
if (diagres) then
call dotp_ab
(res,wk,wk,numproc)
!$omp single
if (myproc.eq.0) WRITE (6, '(A,I4,A,E20.6)')
+ ' -- gcr(k) its = ', its, ' res. norm = ', sqrt(res)
!$omp end single
endif
*
IF ( l < nksp ) GOTO 4
*
IF ( its < maxits ) GOTO 5
*
999 icode = 0
888 continue
*---------------------------------------------------------------------
*
return
end
*
subroutine dotp_ab_bb (d1,d2,v1,v2,nmpi) 1
implicit none
*
integer nmpi
real*8 v1(*),v2(*),d1,d2
*
#include "gcrk.cdk"
integer i,k,unroll,np,nl,nr,kd,kf,err
real*8 dpi(2), dpo(2), epa
parameter (epa = 1.d-30)
include 'mpif.h'
*
*---------------------------------------------------------------------
unroll = 6
nl = nn / nthreads
nr = mod(nn,nthreads)
*
!$omp do
do i=1,nthreads
kd=(i-1)*nl + 1
kf=kd+nl-1
if (i.eq.nthreads) kf = kf + nr
np = (kf-kd+1)/unroll
a1(i) = 0.0d0
b1(i) = 0.0d0
a2(i) = 0.0d0
b2(i) = 0.0d0
a3(i) = 0.0d0
b3(i) = 0.0d0
a4(i) = 0.0d0
b4(i) = 0.0d0
a5(i) = 0.0d0
b5(i) = 0.0d0
a6(i) = 0.0d0
b6(i) = 0.0d0
a7(i) = 0.0d0
b7(i) = 0.0d0
do k=kd,kd+np*unroll-1,unroll
a1(i) = a1(i) + v1(k )*v2(k )
b1(i) = b1(i) + v2(k )*v2(k )
a2(i) = a2(i) + v1(k+1)*v2(k+1)
b2(i) = b2(i) + v2(k+1)*v2(k+1)
a3(i) = a3(i) + v1(k+2)*v2(k+2)
b3(i) = b3(i) + v2(k+2)*v2(k+2)
a4(i) = a4(i) + v1(k+3)*v2(k+3)
b4(i) = b4(i) + v2(k+3)*v2(k+3)
a5(i) = a5(i) + v1(k+4)*v2(k+4)
b5(i) = b5(i) + v2(k+4)*v2(k+4)
a6(i) = a6(i) + v1(k+5)*v2(k+5)
b6(i) = b6(i) + v2(k+5)*v2(k+5)
end do
do k=kd+np*unroll,(kf-kd+1)
a7(i) = a7(i) + v1(k)*v2(k)
b7(i) = b7(i) + v2(k)*v2(k)
end do
end do
!$omp enddo
!$omp single
d1 = 0.0d0
d2 = 0.0d0
do i = 1,nthreads
d1 = d1 + a1(i)+a2(i)+a3(i)+a4(i)+a5(i)+a6(i)+a7(i)
d2 = d2 + b1(i)+b2(i)+b3(i)+b4(i)+b5(i)+b6(i)+b7(i)
end do
dpi(1) = d1
dpi(2) = d2
if (nmpi.gt.1) then
call MPI_Allreduce (dpi, dpo, 2, MPI_double_precision,
$ MPI_sum, MPI_comm_world, err )
d1 = dpo(1)
d2 = dpo(2)
else
d1 = dpi(1)
d2 = dpi(2)
endif
d2 = DMAX1 (epa, d2)
d1 = d1/d2
!$omp end single
*
*---------------------------------------------------------------------
return
end
*
subroutine dotp_ab_cb (v1,v2,v3,nmpi) 1
implicit none
*
#include "gcrk.cdk"
integer nmpi
real*8 v1(*),v2(*),v3(*)
*
integer i,k,unroll,np,nl,nr,kd,kf,err
real*8 dpo(2)
include 'mpif.h'
*
*---------------------------------------------------------------------
unroll = 6
nl = nn / nthreads
nr = mod(nn,nthreads)
*
!$omp do
do i=1,nthreads
kd=(i-1)*nl + 1
kf=kd+nl-1
if (i.eq.nthreads) kf = kf + nr
np = (kf-kd+1)/unroll
a1(i) = 0.0d0
b1(i) = 0.0d0
a2(i) = 0.0d0
b2(i) = 0.0d0
a3(i) = 0.0d0
b3(i) = 0.0d0
a4(i) = 0.0d0
b4(i) = 0.0d0
a5(i) = 0.0d0
b5(i) = 0.0d0
a6(i) = 0.0d0
b6(i) = 0.0d0
a7(i) = 0.0d0
b7(i) = 0.0d0
do k=kd,kd+np*unroll-1,unroll
a1(i) = a1(i) + v1(k )*v2(k )
b1(i) = b1(i) + v3(k )*v2(k )
a2(i) = a2(i) + v1(k+1)*v2(k+1)
b2(i) = b2(i) + v3(k+1)*v2(k+1)
a3(i) = a3(i) + v1(k+2)*v2(k+2)
b3(i) = b3(i) + v3(k+2)*v2(k+2)
a4(i) = a4(i) + v1(k+3)*v2(k+3)
b4(i) = b4(i) + v3(k+3)*v2(k+3)
a5(i) = a5(i) + v1(k+4)*v2(k+4)
b5(i) = b5(i) + v3(k+4)*v2(k+4)
a6(i) = a6(i) + v1(k+5)*v2(k+5)
b6(i) = b6(i) + v3(k+5)*v2(k+5)
end do
do k=kd+np*unroll,(kf-kd+1)
a7(i) = a7(i) + v1(k)*v2(k)
b7(i) = b7(i) + v3(k)*v2(k)
end do
end do
!$omp enddo
!$omp single
axar(1) = 0.0d0
axar(2) = 0.0d0
do i = 1,nthreads
axar(1) = axar(1) + a1(i)+a2(i)+a3(i)+a4(i)+a5(i)+a6(i)+a7(i)
axar(2) = axar(2) + b1(i)+b2(i)+b3(i)+b4(i)+b5(i)+b6(i)+b7(i)
end do
if (nmpi.gt.1) then
call MPI_Allreduce ( axar, dpo, 2, MPI_double_precision,
$ MPI_sum, MPI_comm_world, err )
axar(1) = dpo(1)
axar(2) = dpo(2)
endif
del(1) = -axar(1)/ax2(1)
del(2) = -axar(2)/ax2(2)
!$omp end single
*
*---------------------------------------------------------------------
return
end
*
subroutine dotp_ab_cb_db (v1,v2,v3,v4,nmpi) 1
implicit none
*
#include "gcrk.cdk"
integer nmpi
real*8 v1(*),v2(*),v3(*),v4(*)
*
integer i,k,unroll,np,nl,nr,kd,kf,err
real*8 dpo(3)
include 'mpif.h'
*
*---------------------------------------------------------------------
unroll = 6
nl = nn / nthreads
nr = mod(nn,nthreads)
*
!$omp do
do i=1,nthreads
kd=(i-1)*nl + 1
kf=kd+nl-1
if (i.eq.nthreads) kf = kf + nr
np = (kf-kd+1)/unroll
a1(i) = 0.0d0
b1(i) = 0.0d0
c1(i) = 0.0d0
a2(i) = 0.0d0
b2(i) = 0.0d0
c2(i) = 0.0d0
a3(i) = 0.0d0
b3(i) = 0.0d0
c3(i) = 0.0d0
a4(i) = 0.0d0
b4(i) = 0.0d0
c4(i) = 0.0d0
a5(i) = 0.0d0
b5(i) = 0.0d0
c5(i) = 0.0d0
a6(i) = 0.0d0
b6(i) = 0.0d0
c6(i) = 0.0d0
a7(i) = 0.0d0
b7(i) = 0.0d0
c7(i) = 0.0d0
do k=kd,kd+np*unroll-1,unroll
a1(i) = a1(i) + v1(k )*v2(k )
b1(i) = b1(i) + v3(k )*v2(k )
c1(i) = c1(i) + v4(k )*v2(k )
a2(i) = a2(i) + v1(k+1)*v2(k+1)
b2(i) = b2(i) + v3(k+1)*v2(k+1)
c2(i) = c2(i) + v4(k+1)*v2(k+1)
a3(i) = a3(i) + v1(k+2)*v2(k+2)
b3(i) = b3(i) + v3(k+2)*v2(k+2)
c3(i) = c3(i) + v4(k+2)*v2(k+2)
a4(i) = a4(i) + v1(k+3)*v2(k+3)
b4(i) = b4(i) + v3(k+3)*v2(k+3)
c4(i) = c4(i) + v4(k+3)*v2(k+3)
a5(i) = a5(i) + v1(k+4)*v2(k+4)
b5(i) = b5(i) + v3(k+4)*v2(k+4)
c5(i) = c5(i) + v4(k+4)*v2(k+4)
a6(i) = a6(i) + v1(k+5)*v2(k+5)
b6(i) = b6(i) + v3(k+5)*v2(k+5)
c6(i) = c6(i) + v4(k+5)*v2(k+5)
end do
do k=kd+np*unroll,(kf-kd+1)
a7(i) = a7(i) + v1(k)*v2(k)
b7(i) = b7(i) + v3(k)*v2(k)
c7(i) = c7(i) + v4(k)*v2(k)
end do
end do
!$omp enddo
!$omp single
axar(1) = 0.0d0
axar(2) = 0.0d0
axar(3) = 0.0d0
do i = 1,nthreads
axar(1) = axar(1) + a1(i)+a2(i)+a3(i)+a4(i)+a5(i)+a6(i)+a7(i)
axar(2) = axar(2) + b1(i)+b2(i)+b3(i)+b4(i)+b5(i)+b6(i)+b7(i)
axar(3) = axar(3) + c1(i)+c2(i)+c3(i)+c4(i)+c5(i)+c6(i)+c7(i)
end do
if (nmpi.gt.1) then
call MPI_Allreduce ( axar, dpo, 3, MPI_double_precision,
$ MPI_sum, MPI_comm_world, err )
axar(1) = dpo(1)
axar(2) = dpo(2)
axar(3) = dpo(3)
endif
del(1) = -axar(1)/ax2(1)
del(2) = -axar(2)/ax2(2)
del(3) = -axar(3)/ax2(3)
!$omp end single
*
*---------------------------------------------------------------------
return
end
*
subroutine dotp_ab (d1,v1,v2,nmpi) 4
implicit none
*
integer nmpi
real*8 v1(*),v2(*),d1
*
#include "gcrk.cdk"
integer i,k,unroll,np,nl,nr,kd,kf,err
real*8 t1
include 'mpif.h'
*
*---------------------------------------------------------------------
unroll = 6
nl = nn / nthreads
nr = mod(nn,nthreads)
*
!$omp do
do i=1,nthreads
kd=(i-1)*nl + 1
kf=kd+nl-1
if (i.eq.nthreads) kf = kf + nr
np = (kf-kd+1)/unroll
a1(i) = 0.0d0
a2(i) = 0.0d0
a3(i) = 0.0d0
a4(i) = 0.0d0
a5(i) = 0.0d0
a6(i) = 0.0d0
a7(i) = 0.0d0
do k=kd,kd+np*unroll-1,unroll
a1(i) = a1(i) + v1(k )*v2(k )
a2(i) = a2(i) + v1(k+1)*v2(k+1)
a3(i) = a3(i) + v1(k+2)*v2(k+2)
a4(i) = a4(i) + v1(k+3)*v2(k+3)
a5(i) = a5(i) + v1(k+4)*v2(k+4)
a6(i) = a6(i) + v1(k+5)*v2(k+5)
end do
do k=kd+np*unroll,(kf-kd+1)
a7(i) = a7(i) + v1(k)*v2(k)
end do
end do
!$omp enddo
!$omp single
d1 = 0.0d0
do i = 1,nthreads
d1 = d1 + a1(i)+a2(i)+a3(i)+a4(i)+a5(i)+a6(i)+a7(i)
end do
if (nmpi.gt.1) then
call MPI_Allreduce ( d1, t1, 1, MPI_double_precision,
$ MPI_sum, MPI_comm_world, err )
d1 = t1
endif
!$omp end single
*
*---------------------------------------------------------------------
return
end