!------------------------------------------------------------------------------|
!------------------------------------------------------------------------------|
!                                                                              |
!              ||===\\                                                         | 
!              ||    \\                                                        |
!              ||     ||   //==\\   ||  ||   //==||  ||/==\\                   |
!              ||     ||  ||    ||  ||  ||  ||   ||  ||    ||                  |
!              ||    //   ||    ||  ||  ||  ||   ||  ||                        |
!              ||===//     \\==//    \\==\\  \\==\\  ||                        |
!                                                                              |
!------------------------------------------------------------------------------|
!------------------------------------------------------------------------------|
!                                                                              |
!              BUILD_SYSTEM_WSMP & IFIND_LOC    OCT. 2008                      |
!                                                                              |
!------------------------------------------------------------------------------|
!------------------------------------------------------------------------------|

subroutine build_system_wsmp (n,n_iproc,n_iproc_st,nz_loc,iproc_col,ia,ja,ldb, &
                             nrhs,avals,b,params,osolve,ndof,mat,vo,threadinfo,&
                             weightel_glob)

!------------------------------------------------------------------------------|
!(((((((((((((((( Purpose of the routine  ))))))))))))))))))))))))))))))))))))))
!------------------------------------------------------------------------------|
! This routine is used to build the information on the system of equations
! and the elemental matrices that will be sent to the solver.
! It first uses the make_cut routine to build the fe matrices and the rhs
! vector taking into account cut cells
! Note that parallelism of the solver also reauires parallelism of the 
! matrix building phase. In other words not all elements are built in all
! processors. However because the problem is divided in columns, that is in
! degrees of freedom, it is necessary that some elements are computed by more
! than one processor.
! In the current version, build_system is called (with ndof=3) to form the viscous 
! stiffness matrix and (with ndof=1) to form the temperature stiffness matrix.

!------------------------------------------------------------------------------|
!((((((((((((((((  declaration of the subroutine arguments  ))))))))))))))))))))
!------------------------------------------------------------------------------|

use definitions
!use mpi
use threads

implicit none

include 'mpif.h'

integer n,nz_loc,n_iproc,n_iproc_st
logical iproc_col(n)
integer ia(n_iproc+1)
integer ja(nz_loc)
integer ldb,nrhs
double precision avals(nz_loc)
double precision b(ldb,nrhs)
type(parameters) params
type(octreesolve) osolve
integer ndof 
type (material) mat(0:params%nmat)
type (void) vo
type (thread) threadinfo
double precision weightel_glob(osolve%nleaves)

!------------------------------------------------------------------------------|
!(((((((((((((((( declaration of the subroutine internal variables )))))))))))))
!------------------------------------------------------------------------------|

integer, external :: ifind_loc_wsmp
double precision,dimension(:,:),allocatable::lsf_el
double precision ael(8*ndof,8*ndof),bel(8*ndof),patch1(5,5),patch2(3,3),elem_dz
double precision,dimension(:),allocatable::penalty_loc,penalty_glob,eviscosity,weightel
double precision r0,s0,t0,rst,amin
integer i,j,k,itodo,idof,i1,i2,iii,jjj,err,jj,i1loc,k1,k2,ii
integer ileaves,icut,iloc,idof1,idof2,nelem_iproc,nelem_min,nelem_max
integer inode(5)
integer levelmax,level
integer iproc,nproc,ierr
double precision rhs(osolve%nnode*ndof)
character*72  :: shift
logical, dimension(:),allocatable :: is_plastic,vbounded,vbounded2
integer, dimension(:),allocatable :: kfix,matnum

!-------------------------------------------------------------------------------
!-------------------------------------------------------------------------------

call mpi_comm_size (mpi_comm_world,nproc,ierr)
call mpi_comm_rank (mpi_comm_world,iproc,ierr)

data patch1 / 1., 1., 1., 1.,-4., &
              1., 1., 1., 1.,-4., &
              1., 1., 1., 1.,-4., &
              1., 1., 1., 1.,-4., &
             -4.,-4.,-4.,-4.,16./

data patch2 / 1.,-2., 1., &
             -2., 4.,-2., &
              1.,-2., 1./

if (ndof==3) then
   allocate(kfix(osolve%nnode*3))
   kfix=osolve%kfix
elseif (ndof==1) then
   allocate(kfix(osolve%nnode))
   kfix=osolve%kfixt
end if

allocate (is_plastic(osolve%nleaves),stat=threadinfo%err) 
if (params%debug.gt.1) call heap (threadinfo,'is_plastic','build_system',size(is_plastic),'bool',+1)
allocate (vbounded(osolve%nleaves),stat=threadinfo%err) 
if (params%debug.gt.1) call heap (threadinfo,'vbounded','build_system',size(vbounded),'bool',+1)
allocate (vbounded2(osolve%nleaves),stat=threadinfo%err)
if (params%debug.gt.1) call heap (threadinfo,'vbounded','build_system',size(vbounded2),'bool',+1)
allocate (eviscosity(osolve%nleaves),stat=threadinfo%err)
if (params%debug.gt.1) call heap (threadinfo,'eviscosity','build_system',size(eviscosity),'dp',+1)
allocate (weightel(osolve%nleaves),stat=threadinfo%err)
if (params%debug.gt.1) call heap (threadinfo,'weightel','build_system',size(weightel),'dp',+1)
allocate (matnum(osolve%nleaves),stat=threadinfo%err) 
if (params%debug.gt.1) call heap (threadinfo,'matnum','build_system',size(matnum),'int',+1)

shift=' '
is_plastic=.false.
vbounded=.false.
vbounded2=.false.
eviscosity=0.d0
avals=0.d0
b=0.d0
nelem_iproc=0
weightel=0.d0
matnum=0

allocate (lsf_el(params%mpe,osolve%nlsf),stat=err)
if (err.ne.0) call stop_run ('Error alloc lsf_el in build_system$')

amin=1.d6

do ileaves=1,osolve%nleaves
   ! first test if in the void
   if (vo%leaf(ileaves).eq.0) then
      ! then test if this leaf touches the colums given to the current proc
      itodo=0
      do k=1,params%mpe
         do idof=1,ndof
            i1=(vo%ftr(osolve%icon(k,ileaves))-1)*ndof+idof
            if (iproc_col(i1)) itodo=1
         enddo
      enddo
      ! what follows is only performed for a subset of elements
      if (itodo.eq.1) then
         nelem_iproc=nelem_iproc+1
         level=0
         levelmax=params%levelcut
         do k=1,params%mpe
            do i=1,osolve%nlsf
               lsf_el(k,i)=osolve%lsf(osolve%icon(k,ileaves),i)
            enddo
         enddo
         r0=-1.d0
         s0=-1.d0
         t0=-1.d0
         rst=2.d0
         ! make_cut is used to build the elemental matrix taking into 
         ! account the cut cells

         call make_cut (level,levelmax,ndof,ael,bel,osolve%icon(1,ileaves),    &
                        osolve%x,osolve%y,osolve%z,kfix,mat,osolve%u,osolve%v, &
                        osolve%w,osolve%temp,osolve%spressure(ileaves),        &
                        osolve%strain,is_plastic(ileaves),osolve%nnode,rhs,    &
                        lsf_el,osolve%nlsf,r0,s0,t0,rst,icut,ileaves,          &
                        eviscosity(ileaves),vbounded(ileaves),params,          &
                        threadinfo,weightel(ileaves),matnum(ileaves))
         ! transfer elemental matrix and rhs into wsmpmatrix structure
         ! different for symmetric or nonsymmetric matrices

         do k1=1,params%mpe
            do idof1=1,ndof
               i1=(vo%ftr(osolve%icon(k1,ileaves))-1)*ndof+idof1
               if (iproc_col(i1)) then
                  iii=(k1-1)*ndof+idof1
                  i1loc=i1-n_iproc_st+1
                  do k2=1,params%mpe
                     do idof2=1,ndof
                        i2=(vo%ftr(osolve%icon(k2,ileaves))-1)*ndof+idof2
                        jjj=(k2-1)*ndof+idof2
                        if (i2.ge.i1.or.ndof.eq.1) then
                           iloc=ifind_loc_wsmp(i1loc,i2,ja,ia,n_iproc,nz_loc)
                           avals(iloc)=avals(iloc)+ael(jjj,iii)
                           if (iii.eq.jjj) amin=min(amin,abs(ael(jjj,iii)))
                        endif
                     enddo
                  enddo
                  if (idof1 == 3) then
                    if (kfix((osolve%icon(k1,ileaves)-1)*ndof+idof1) < 1) then
                      iloc=ifind_loc_wsmp(i1loc,i1,ja,ia,n_iproc,nz_loc)
                      elem_dz=(osolve%z(osolve%icon(5,ileaves))-osolve%z(osolve%icon(1,ileaves)))*params%vex
                      if (k1 < 5) then
                        write (*,*) '****************'
                        write (*,*) 'k1: ',k1
                        write (*,*) 'avals before for proc ',iproc,': ',avals
                        avals(iloc)=avals(iloc)-(params%dt*weightel(ileaves))/(2.d0*elem_dz)
                        write (*,*) 'avals after for proc ',iproc,': ',avals
                        write (*,*) '****************'
                      else
                        write (*,*) '****************'
                        write (*,*) 'k1: ',k1
                        write (*,*) 'avals before for proc ',iproc,': ',avals
                        avals(iloc)=avals(iloc)+(params%dt*weightel(ileaves))/(2.d0*elem_dz)
                        write (*,*) 'avals after for proc ',iproc,': ',avals
                        write (*,*) '****************'
                      endif
                    endif
                  endif
                  b(i1loc,1)=b(i1loc,1)+bel(iii)
               endif
            enddo
         enddo

      endif
   endif
enddo

!print*,'check diag',iproc,amin

deallocate(lsf_el)
deallocate(kfix)

call mpi_reduce(nelem_iproc,nelem_min,1,mpi_integer,mpi_min,0,mpi_comm_world,ierr)
call mpi_reduce(nelem_iproc,nelem_max,1,mpi_integer,mpi_max,0,mpi_comm_world,ierr)
if (iproc.eq.0) write (*,'(a,2i12)') shift//'nelem per proc (min/max)',nelem_min,nelem_max

if (ndof==3) then
   osolve%eviscosity=0.d0
   call mpi_allreduce (eviscosity,osolve%eviscosity,osolve%nleaves,mpi_double_precision,mpi_max,mpi_comm_world,ierr)
   if (iproc.eq.0) then
   write (*,'(a,2es13.5)') shift//'viscosity range ', minval(osolve%eviscosity,osolve%eviscosity>0.d0), maxval(osolve%eviscosity)
   endif
   osolve%is_plastic=.false.
   call mpi_allreduce (is_plastic,osolve%is_plastic,osolve%nleaves,mpi_logical,mpi_lor,mpi_comm_world,ierr)
   osolve%matnum=0
   call mpi_allreduce (matnum,osolve%matnum,osolve%nleaves,mpi_integer,mpi_max,mpi_comm_world,ierr)
   call mpi_allreduce (vbounded,vbounded2,osolve%nleaves,mpi_logical,mpi_lor,mpi_comm_world,ierr)
   if (iproc.eq.0) write (*,'(a,i10)') shift//'viscosity capped in', count(vbounded2)
   call mpi_allreduce (weightel,weightel_glob,osolve%nleaves,mpi_double_precision,mpi_min,mpi_comm_world,ierr)
end if

!call mpi_allreduce (weightel,weightel_glob,osolve%nleaves,mpi_double_precision,mpi_min,mpi_comm_world,ierr)

if (params%debug.gt.1) call heap (threadinfo,'is_plastic','build_system',size(is_plastic),'bool',-1)
deallocate (is_plastic)
if (params%debug.gt.1) call heap (threadinfo,'vbounded','build_system',size(vbounded),'bool',-1)
deallocate (vbounded)
if (params%debug.gt.1) call heap (threadinfo,'vbounded2','build_system',size(vbounded2),'bool',-1)
deallocate (vbounded2)
if (params%debug.gt.1) call heap (threadinfo,'eviscosity','build_system',size(eviscosity),'dp',-1)
deallocate (eviscosity)
if (params%debug.gt.1) call heap (threadinfo,'weightel','build_system',size(weightel),'dp',-1)
deallocate (weightel)
if (params%debug.gt.1) call heap (threadinfo,'matnum','build_system',size(matnum),'int',-1)
deallocate (matnum)

!=====================
!=====[bad faces]=====
!=====================

if (osolve%nface.eq.0) return

allocate (penalty_loc(osolve%nface),stat=threadinfo%err)
if (params%debug.gt.1) call heap (threadinfo,'penalty_loc','build_system',size(penalty_loc),'dp',+1)
allocate (penalty_glob(osolve%nface),stat=threadinfo%err)
if (params%debug.gt.1) call heap (threadinfo,'penalty_glob','build_system',size(penalty_glob),'dp',+1)

if (params%penalty.gt.0.d0) then

   ! first case assumes that a unique, uniform value is used
   ! for the penalty parameter

   penalty_glob=params%penalty

else

   ! second case assumes that the penalty parameter is scaled according to
   ! the value of the stiffness matrix

   ! first go around the values of the stiffness matrix that are going to be
   ! touched by the linear constraints
   ! and deduce from it the value of a local penalty parameter (penalty_loc)

!   penalty_loc=0.d0
!   do i=1,osolve%nface
!      if (vo%face(i).eq.0) then
!         ! First the 4 constraints along the edges
!         penalty_loc(i)=0.d0
!         do j=1,4
!            inode(1)=vo%ftr(osolve%iface(j,i))
!            inode(2)=vo%ftr(osolve%iface(j+4,i))
!            inode(3)=vo%ftr(osolve%iface(1+mod(j,4),i))
!            do idof=1,ndof
!               do ii=1,3
!                  i1=(inode(ii)-1)*ndof+idof
!                  if (wsmp%iproc_col(i1)) then
!                     do jj=1,3
!                        i2=(inode(jj)-1)*ndof+idof
!                        if (i1.ge.i2.or.ndof.eq.1) then
!                           iloc=ifind_loc_wsmp(i1,i2,ja,ia,vo%nnode*ndof,nz_loc)
!                           penalty_loc(i)=max(penalty_loc(i),abs(wsmp%avals(iloc)))
!                        endif
!                     enddo
!                  endif
!               enddo
!            enddo
!         enddo
!        ! then the other constraints across the four corners and the centre
!         inode(1)=vo%ftr(osolve%iface(1,i))
!         inode(2)=vo%ftr(osolve%iface(2,i))
!         inode(3)=vo%ftr(osolve%iface(3,i))
!         inode(4)=vo%ftr(osolve%iface(4,i))
!         inode(5)=vo%ftr(osolve%iface(9,i))
!         do k=1,ndof
!            do ii=1,5
!               i1=(inode(ii)-1)*ndof+k
!               if (wsmp%iproc_col(i1)) then
!                  do jj=1,5
!                     i2=(inode(jj)-1)*ndof+k
!                     if (i1.ge.i2.or.ndof.eq.1) then
!                        iloc=ifind_loc_wsmp(i1,i2,ja,ia,vo%nnode*ndof,nz_loc)
!                        penalty_loc(i)=max(penalty_loc(i),abs(wsmp%avals(iloc)))
!                     endif
!                  enddo
!               endif
!            enddo
!         enddo
!      endif
!   enddo

   ! applies an additional factor 10^5 to the resulting local penalty parameter

!   penalty_loc=penalty_loc*1.d5
!   call mpi_allreduce (penalty_loc,penalty_glob,osolve%nface,mpi_double_precision,mpi_max,mpi_comm_world,ierr)

endif

! now, in both cases, applies the resulting constraints by multiplying the
! stiffness matrix by a pattern (patch1 et patch2) scaled by the penalty parameter

do i=1,osolve%nface
   if (vo%face(i).eq.0) then
      ! First the 4 constraints along the edges
      do j=1,4
         inode(1)=vo%ftr(osolve%iface(j,i))
         inode(2)=vo%ftr(osolve%iface(j+4,i))
         inode(3)=vo%ftr(osolve%iface(1+mod(j,4),i))
         do k=1,ndof
            do ii=1,3
               i1=(inode(ii)-1)*ndof+k
               i1loc=i1-n_iproc_st+1
               if (iproc_col(i1)) then
                  do jj=1,3
                     i2=(inode(jj)-1)*ndof+k
                     if (i2.ge.i1.or.ndof.eq.1) then
                        iloc=ifind_loc_wsmp(i1loc,i2,ja,ia,n_iproc,nz_loc)
                        avals(iloc)=avals(iloc)+patch2(ii,jj)*penalty_glob(i)
                     endif
                  enddo
               endif
            enddo
         enddo
      enddo
      ! then the other constraints across the four corners and the centre
      inode(1)=vo%ftr(osolve%iface(1,i))
      inode(2)=vo%ftr(osolve%iface(2,i))
      inode(3)=vo%ftr(osolve%iface(3,i))
      inode(4)=vo%ftr(osolve%iface(4,i))
      inode(5)=vo%ftr(osolve%iface(9,i))
      do idof=1,ndof
         do ii=1,5
            i1=(inode(ii)-1)*ndof+idof
            i1loc=i1-n_iproc_st+1
            if (iproc_col(i1)) then
               do jj=1,5
                  i2=(inode(jj)-1)*ndof+idof
                  if (i2.ge.i1.or.ndof.eq.1) then
                     iloc=ifind_loc_wsmp(i1loc,i2,ja,ia,n_iproc,nz_loc)
                     avals(iloc)=avals(iloc)+patch1(ii,jj)*penalty_glob(i)
                  endif
               enddo
            endif
         enddo
      enddo
   endif
enddo


if (params%debug.gt.1) call heap (threadinfo,'penalty_glob','build_system',size(penalty_glob),'dp',-1)
deallocate (penalty_glob)
if (params%debug.gt.1) call heap (threadinfo,'penalty_loc','build_system',size(penalty_loc),'dp',-1)
deallocate (penalty_loc)

end subroutine build_system_wsmp

!-------------------------------------------------------------------------------
!-------------------------------------------------------------------------------

! function used to navigate through the global matrix
! i and j are the global indices in the global matrix
! irn and jcn are needed by MUMPS to
! locate the nonzero numbers of the global stiffness matrix
! n is the total number of dofs
! nz is the total number of nonzero numbers in the global matrix
! there is an internal check that could be removed now that this
! routine and the storage algorithm developed have been fully tested
! but we will leave it in for the time being

integer function ifind_loc_wsmp (i1,i2,ja,ia,n_iproc,nz_loc)
implicit none

integer i1,i2
integer ia(n_iproc+1),ja(nz_loc)
integer n_iproc,nz_loc

integer istart,k

istart=ia(i1)

1 continue

if (ja(istart).eq.i2) then
   ifind_loc_wsmp=istart
   return
endif

istart=istart+1

if (istart.le.nz_loc) goto 1

!print*,'istart=',istart
!print*,'nz',nz
!print*,'i',i
!print*,'idg(i)',idg(i)
!print*,'j',j
!print*,(ja(k),k=idg(i),idg(i+1)-1)

STOP 'error in ifind_loc_wsmp'

end function ifind_loc_wsmp

!------------------------------------------------------------------------------|
!------------------------------------------------------------------------------|