copyright (C) 2001 MSC-RPN COMM %%%MC2%%%
#include "precision.cdk"
real*8 function fdotp2 (nloc, x, incx, y, incy, wrk8, ncpu) 3,1
implicit none
integer nloc, incx, incy, ncpu
FLOAT x(nloc),y(nloc)
real*8 wrk8(ncpu)
c_________________________________________________________________c
c c
c Computes a distributed dot product of two vectors c
c c
c____________________________ INPUT ______________________________c
c c
c x,y - input vectors c
c nloc - length of these vectors c
c incx, incy - increments (usefull only on none SX4 plateforms c
c wrk8 - working space c
c ncpu - number of CPUs for OpenMP tasking c
c c
c____________________________ OUTPUT _____________________________c
c c
c returns the dot product c
c c
#include "partopo.cdk"
c
include 'mpif.h'
c
integer k,ierr
real*8 tsum,dtemp
common /fdotp/ tsum,dtemp
#ifndef NEC
FLOAT DOT
external DOT
#endif
#ifdef NEC
call mdotp
(nloc, x, y, wrk8, ncpu)
dtemp = 0.0d0
do k = 1,ncpu
dtemp = dtemp + wrk8(k)
end do
#else
dtemp = DOT (nloc, x, incx, y, incy)
#endif
if (numproc.gt.1) then
call MPI_Allreduce(dtemp, tsum, 1, MPI_double_precision,
$ MPI_sum, MPI_comm_world, ierr)
dtemp = tsum
endif
fdotp2 = dtemp
*
return
end
*
subroutine mdotp (nloc, x, y, wrk, ncpu) 1
implicit none
*
integer nloc,ncpu
FLOAT x(nloc), y(nloc)
real*8 wrk(ncpu)
*
integer i,k,nl,nr,kd,kf
*
nl = nloc / ncpu
nr = mod(nloc,ncpu)
*
do i=1,ncpu
wrk(i) = 0.0d0
kd = (i-1)*nl + 1
kf = kd+nl-1
if (i.eq.ncpu) kf = kf + nr
do k=kd,kf
wrk(i) = wrk(i) + x(k)*y(k)
end do
end do
*
return
end
*