-
Dave Whipp authoredDave Whipp authored
build_system_wsmp.f90 15.73 KiB
!------------------------------------------------------------------------------|
!------------------------------------------------------------------------------|
! |
! ||===\\ |
! || \\ |
! || || //==\\ || || //==|| ||/==\\ |
! || || || || || || || || || || |
! || // || || || || || || || |
! ||===// \\==// \\==\\ \\==\\ || |
! |
!------------------------------------------------------------------------------|
!------------------------------------------------------------------------------|
! |
! BUILD_SYSTEM_WSMP & IFIND_LOC OCT. 2008 |
! |
!------------------------------------------------------------------------------|
!------------------------------------------------------------------------------|
subroutine build_system_wsmp (n,n_iproc,n_iproc_st,nz_loc,iproc_col,ia,ja, &
ldb,nrhs,avals,b,params,osolve,ndof,mat,vo, &
istep,iter,iter_nl,threadinfo,weightel_glob)
!------------------------------------------------------------------------------|
!(((((((((((((((( Purpose of the routine ))))))))))))))))))))))))))))))))))))))
!------------------------------------------------------------------------------|
! This routine is used to build the information on the system of equations
! and the elemental matrices that will be sent to the solver.
! It first uses the make_cut routine to build the fe matrices and the rhs
! vector taking into account cut cells
! Note that parallelism of the solver also reauires parallelism of the
! matrix building phase. In other words not all elements are built in all
! processors. However because the problem is divided in columns, that is in
! degrees of freedom, it is necessary that some elements are computed by more
! than one processor.
! In the current version, build_system is called (with ndof=3) to form the viscous
! stiffness matrix and (with ndof=1) to form the temperature stiffness matrix.
!------------------------------------------------------------------------------|
!(((((((((((((((( declaration of the subroutine arguments ))))))))))))))))))))
!------------------------------------------------------------------------------|
use threads
use definitions
implicit none
integer n,nz_loc,n_iproc,n_iproc_st
logical iproc_col(n)
integer ia(n_iproc+1)
integer ja(nz_loc)
integer ldb,nrhs
double precision avals(nz_loc)
double precision b(ldb,nrhs)
type(parameters) params
type(octreesolve) osolve
integer ndof
type (material) mat(0:params%nmat)
type (void) vo
integer istep
integer iter
integer iter_nl
type (thread) threadinfo
double precision weightel_glob(osolve%nleaves)
!------------------------------------------------------------------------------|
!(((((((((((((((( declaration of the subroutine internal variables )))))))))))))
!------------------------------------------------------------------------------|
integer, external :: ifind_loc_wsmp
double precision,dimension(:,:),allocatable::lsf_el
double precision ael(8*ndof,8*ndof),bel(8*ndof),patch1(5,5),patch2(3,3)
double precision,dimension(:),allocatable::penalty_loc,penalty_glob,eviscosity,weightel
double precision r0,s0,t0,rst,amin
integer i,j,k,itodo,idof,i1,i2,iii,jjj,err,jj,i1loc,k1,k2,ii
integer ileaves,icut,iloc,idof1,idof2,nelem_iproc,nelem_min,nelem_max
integer inode(5)
integer levelmax,level
integer iproc,nproc,ierr
double precision rhs(osolve%nnode*ndof)
character*72 :: shift
logical, dimension(:),allocatable :: is_plastic,vbounded,vbounded2
integer, dimension(:), allocatable :: kfix
!-------------------------------------------------------------------------------
!-------------------------------------------------------------------------------
INCLUDE 'mpif.h'
call mpi_comm_size (mpi_comm_world,nproc,ierr)
call mpi_comm_rank (mpi_comm_world,iproc,ierr)
data patch1 / 1., 1., 1., 1.,-4., &
1., 1., 1., 1.,-4., &
1., 1., 1., 1.,-4., &
1., 1., 1., 1.,-4., &
-4.,-4.,-4.,-4.,16./
data patch2 / 1.,-2., 1., &
-2., 4.,-2., &
1.,-2., 1./
if (ndof==3) then
allocate(kfix(osolve%nnode*3))
kfix=osolve%kfix
elseif (ndof==1) then
allocate(kfix(osolve%nnode))
kfix=osolve%kfixt
end if
allocate (is_plastic(osolve%nleaves),stat=threadinfo%err)
call heap (threadinfo,'is_plastic','build_system',size(is_plastic),'bool',+1)
allocate (vbounded(osolve%nleaves),stat=threadinfo%err)
call heap (threadinfo,'vbounded','build_system',size(vbounded),'bool',+1)
allocate (vbounded2(osolve%nleaves),stat=threadinfo%err)
call heap (threadinfo,'vbounded','build_system',size(vbounded2),'bool',+1)
allocate (eviscosity(osolve%nleaves),stat=threadinfo%err)
call heap (threadinfo,'eviscosity','build_system',size(eviscosity),'dp',+1)
allocate (weightel(osolve%nleaves),stat=threadinfo%err)
call heap (threadinfo,'weightel','build_system',size(weightel),'dp',+1)
shift=' '
is_plastic=.false.
vbounded=.false.
vbounded2=.false.
eviscosity=0.d0
avals=0.d0
b=0.d0
nelem_iproc=0
weightel=0.d0
allocate (lsf_el(params%mpe,osolve%nlsf),stat=err)
if (err.ne.0) call stop_run ('Error alloc lsf_el in build_system$')
amin=1.d6
do ileaves=1,osolve%nleaves
! first test if in the void
if (vo%leaf(ileaves).eq.0) then
! then test if this leaf touches the colums given to the current proc
itodo=0
do k=1,params%mpe
do idof=1,ndof
i1=(vo%ftr(osolve%icon(k,ileaves))-1)*ndof+idof
if (iproc_col(i1)) itodo=1
enddo
enddo
! what follows is only performed for a subset of elements
if (itodo.eq.1) then
nelem_iproc=nelem_iproc+1
level=0
levelmax=params%levelcut
do k=1,params%mpe
do i=1,osolve%nlsf
lsf_el(k,i)=osolve%lsf(osolve%icon(k,ileaves),i)
enddo
enddo
r0=-1.d0
s0=-1.d0
t0=-1.d0
rst=2.d0
! make_cut is used to build the elemental matrix taking into
! account the cut cells
call make_cut (level,levelmax,ndof,ael,bel,osolve%icon(1,ileaves), &
osolve%x,osolve%y,osolve%z,kfix,mat, &
osolve%u,osolve%v,osolve%w,osolve%temp, &
osolve%spressure(ileaves),osolve%strain,is_plastic(ileaves),&
osolve%nnode,rhs,lsf_el,osolve%nlsf,r0,s0,t0,rst,icut, &
ileaves,eviscosity(ileaves),vbounded(ileaves),params,threadinfo, &
weightel(ileaves))
! transfer elemental matrix and rhs into wsmpmatrix structure
! different for symmetric or nonsymmetric matrices
do k1=1,params%mpe
do idof1=1,ndof
i1=(vo%ftr(osolve%icon(k1,ileaves))-1)*ndof+idof1
if (iproc_col(i1)) then
iii=(k1-1)*ndof+idof1
i1loc=i1-n_iproc_st+1
do k2=1,params%mpe
do idof2=1,ndof
i2=(vo%ftr(osolve%icon(k2,ileaves))-1)*ndof+idof2
jjj=(k2-1)*ndof+idof2
if (i2.ge.i1.or.ndof.eq.1) then
iloc=ifind_loc_wsmp(i1loc,i2,ja,ia,n_iproc,nz_loc)
avals(iloc)=avals(iloc)+ael(jjj,iii)
if (iii.eq.jjj) amin=min(amin,abs(ael(jjj,iii)))
endif
enddo
enddo
b(i1loc,1)=b(i1loc,1)+bel(iii)
endif
enddo
enddo
endif
endif
enddo
!print*,'check diag',iproc,amin
deallocate(lsf_el)
deallocate(kfix)
call mpi_reduce(nelem_iproc,nelem_min,1,mpi_integer,mpi_min,0,mpi_comm_world,ierr)
call mpi_reduce(nelem_iproc,nelem_max,1,mpi_integer,mpi_max,0,mpi_comm_world,ierr)
if (iproc.eq.0) write (*,'(a,2i12)') shift//'nelem per proc (min/max)',nelem_min,nelem_max
if (ndof==3) then
osolve%eviscosity=0.d0
call mpi_allreduce (eviscosity,osolve%eviscosity,osolve%nleaves,mpi_double_precision,mpi_max,mpi_comm_world,ierr)
if (iproc.eq.0) then
write (*,'(a,2es13.5)') shift//'viscosity range ', minval(osolve%eviscosity,osolve%eviscosity>0.d0), maxval(osolve%eviscosity)
endif
osolve%is_plastic=.false.
call mpi_allreduce (is_plastic,osolve%is_plastic,osolve%nleaves,mpi_logical,mpi_lor,mpi_comm_world,ierr)
call mpi_allreduce (vbounded,vbounded2,osolve%nleaves,mpi_logical,mpi_lor,mpi_comm_world,ierr)
if (iproc.eq.0) write (*,'(a,i10)') shift//'viscosity capped in', count(vbounded2)
call mpi_allreduce (weightel,weightel_glob,osolve%nleaves,mpi_double_precision,mpi_min,mpi_comm_world,ierr)
end if
call heap (threadinfo,'is_plastic','build_system',size(is_plastic),'bool',-1)
deallocate (is_plastic)
call heap (threadinfo,'vbounded','build_system',size(vbounded),'bool',-1)
deallocate (vbounded)
call heap (threadinfo,'vbounded2','build_system',size(vbounded2),'bool',-1)
deallocate (vbounded2)
call heap (threadinfo,'eviscosity','build_system',size(eviscosity),'dp',-1)
deallocate (eviscosity)
call heap (threadinfo,'weightel','build_system',size(weightel),'dp',-1)
deallocate (weightel)
!=====================
!=====[bad faces]=====
!=====================
if (osolve%nface.eq.0) return
allocate (penalty_loc(osolve%nface),stat=threadinfo%err)
call heap (threadinfo,'penalty_loc','build_system',size(penalty_loc),'dp',+1)
allocate (penalty_glob(osolve%nface),stat=threadinfo%err)
call heap (threadinfo,'penalty_glob','build_system',size(penalty_glob),'dp',+1)
if (params%penalty.gt.0.d0) then
! first case assumes that a unique, uniform value is used
! for the penalty parameter
penalty_glob=params%penalty
else
! second case assumes that the penalty parameter is scaled according to
! the value of the stiffness matrix
! first go around the values of the stiffness matrix that are going to be
! touched by the linear constraints
! and deduce from it the value of a local penalty parameter (penalty_loc)
! penalty_loc=0.d0
! do i=1,osolve%nface
! if (vo%face(i).eq.0) then
! ! First the 4 constraints along the edges
! penalty_loc(i)=0.d0
! do j=1,4
! inode(1)=vo%ftr(osolve%iface(j,i))
! inode(2)=vo%ftr(osolve%iface(j+4,i))
! inode(3)=vo%ftr(osolve%iface(1+mod(j,4),i))
! do idof=1,ndof
! do ii=1,3
! i1=(inode(ii)-1)*ndof+idof
! if (wsmp%iproc_col(i1)) then
! do jj=1,3
! i2=(inode(jj)-1)*ndof+idof
! if (i1.ge.i2.or.ndof.eq.1) then
! iloc=ifind_loc_wsmp(i1,i2,ja,ia,vo%nnode*ndof,nz_loc)
! penalty_loc(i)=max(penalty_loc(i),abs(wsmp%avals(iloc)))
! endif
! enddo
! endif
! enddo
! enddo
! enddo
! ! then the other constraints across the four corners and the centre
! inode(1)=vo%ftr(osolve%iface(1,i))
! inode(2)=vo%ftr(osolve%iface(2,i))
! inode(3)=vo%ftr(osolve%iface(3,i))
! inode(4)=vo%ftr(osolve%iface(4,i))
! inode(5)=vo%ftr(osolve%iface(9,i))
! do k=1,ndof
! do ii=1,5
! i1=(inode(ii)-1)*ndof+k
! if (wsmp%iproc_col(i1)) then
! do jj=1,5
! i2=(inode(jj)-1)*ndof+k
! if (i1.ge.i2.or.ndof.eq.1) then
! iloc=ifind_loc_wsmp(i1,i2,ja,ia,vo%nnode*ndof,nz_loc)
! penalty_loc(i)=max(penalty_loc(i),abs(wsmp%avals(iloc)))
! endif
! enddo
! endif
! enddo
! enddo
! endif
! enddo
! applies an additional factor 10^5 to the resulting local penalty parameter
! penalty_loc=penalty_loc*1.d5
! call mpi_allreduce (penalty_loc,penalty_glob,osolve%nface,mpi_double_precision,mpi_max,mpi_comm_world,ierr)
endif
! now, in both cases, applies the resulting constraints by multiplying the
! stiffness matrix by a pattern (patch1 et patch2) scaled by the penalty parameter
do i=1,osolve%nface
if (vo%face(i).eq.0) then
! First the 4 constraints along the edges
do j=1,4
inode(1)=vo%ftr(osolve%iface(j,i))
inode(2)=vo%ftr(osolve%iface(j+4,i))
inode(3)=vo%ftr(osolve%iface(1+mod(j,4),i))
do k=1,ndof
do ii=1,3
i1=(inode(ii)-1)*ndof+k
i1loc=i1-n_iproc_st+1
if (iproc_col(i1)) then
do jj=1,3
i2=(inode(jj)-1)*ndof+k
if (i2.ge.i1.or.ndof.eq.1) then
iloc=ifind_loc_wsmp(i1loc,i2,ja,ia,n_iproc,nz_loc)
avals(iloc)=avals(iloc)+patch2(ii,jj)*penalty_glob(i)
endif
enddo
endif
enddo
enddo
enddo
! then the other constraints across the four corners and the centre
inode(1)=vo%ftr(osolve%iface(1,i))
inode(2)=vo%ftr(osolve%iface(2,i))
inode(3)=vo%ftr(osolve%iface(3,i))
inode(4)=vo%ftr(osolve%iface(4,i))
inode(5)=vo%ftr(osolve%iface(9,i))
do idof=1,ndof
do ii=1,5
i1=(inode(ii)-1)*ndof+idof
i1loc=i1-n_iproc_st+1
if (iproc_col(i1)) then
do jj=1,5
i2=(inode(jj)-1)*ndof+idof
if (i2.ge.i1.or.ndof.eq.1) then
iloc=ifind_loc_wsmp(i1loc,i2,ja,ia,n_iproc,nz_loc)
avals(iloc)=avals(iloc)+patch1(ii,jj)*penalty_glob(i)
endif
enddo
endif
enddo
enddo
endif
enddo
call heap (threadinfo,'penalty_glob','build_system',size(penalty_glob),'dp',-1) ; deallocate (penalty_glob)
call heap (threadinfo,'penalty_loc','build_system',size(penalty_loc),'dp',-1) ; deallocate (penalty_loc)
end subroutine build_system_wsmp
!-------------------------------------------------------------------------------
!-------------------------------------------------------------------------------
! function used to navigate through the global matrix
! i and j are the global indices in the global matrix
! irn and jcn are needed by MUMPS to
! locate the nonzero numbers of the global stiffness matrix
! n is the total number of dofs
! nz is the total number of nonzero numbers in the global matrix
! there is an internal check that could be removed now that this
! routine and the storage algorithm developed have been fully tested
! but we will leave it in for the time being
integer function ifind_loc_wsmp (i1,i2,ja,ia,n_iproc,nz_loc)
implicit none
integer i1,i2
integer ia(n_iproc+1),ja(nz_loc)
integer n_iproc,nz_loc
integer istart,k
istart=ia(i1)
1 continue
if (ja(istart).eq.i2) then
ifind_loc_wsmp=istart
return
endif
istart=istart+1
if (istart.le.nz_loc) goto 1
!print*,'istart=',istart
!print*,'nz',nz
!print*,'i',i
!print*,'idg(i)',idg(i)
!print*,'j',j
!print*,(ja(k),k=idg(i),idg(i+1)-1)
STOP 'error in ifind_loc_wsmp'
end function ifind_loc_wsmp
!------------------------------------------------------------------------------|
!------------------------------------------------------------------------------|