!==============================================================================!
!                                                                              |
!              ||===\\                                                         | 
!              ||    \\                                                        |
!              ||     ||   //==\\   ||  ||   //==||  ||/==\\                   |
!              ||     ||  ||    ||  ||  ||  ||   ||  ||    ||                  |
!              ||    //   ||    ||  ||  ||  ||   ||  ||                        |
!              ||===//     \\==//    \\==\\  \\==\\  ||                        |
!                                                                              |
!==============================================================================!
!==============================================================================!
!                                                                              |
!              WSMP_SETUP1      Oct. 2008                                      |
!              WSMP_SETUP2      Oct. 2008                                      |
!                                                                              |
!==============================================================================!
!==============================================================================!

subroutine wsmp_setup1 (n,n_iproc_st,n_iproc_end,n_iproc,nz_loc,ldb,ndof,vo,osolve,tpl,params,threadinfo,istep,iter)
use definitions
use threads
implicit none

integer n,n_iproc_st,n_iproc_end,n_iproc,nz_loc,ldb
integer ndof
type (void) vo
type (topology) :: tpl(n)
type (octreesolve) osolve
type (parameters) params
type (thread) threadinfo
integer istep,iter

logical, dimension(:), allocatable :: iproc_col
integer icol(100000),kk,nn,n0,jproc,idg_locp,nz
integer ie,k1,k2,j,i2,i1,idof1,idof2,k,err,i3,k3,i
integer iproc,nproc,ierr
character*72  :: shift
integer, dimension(:), allocatable :: irn,jcn

INCLUDE 'mpif.h'

call mpi_comm_size (mpi_comm_world,nproc,ierr)
call mpi_comm_rank (mpi_comm_world,iproc,ierr)

shift=' '

!==============================================================================!
!==============================================================================!
! n is the size of the system/matrix
! nz is the total number of non-zero terms in the global matrix
! n_iproc is the number of columns on given thread iproc
! n_iproc_st is the first column on given thread iproc, between 1 and n
! n_iproc_end is the last column on given thread iproc, between 1 and n
! nz_loc is the total number of non-zero terms on given thread iproc
! ia is an integer array of size n_iproc+1 (see wsmp manual)
! ja is an interer array of size nz_loc
! avals is a double precision array of size nz_loc containing the values
! b is a double precision array of size n_iproc containing the rhs

!==============================================================================!
!=====[allocate memory for topology]===========================================!
!==============================================================================!

tpl%nheight=0

do i=1,n
   tpl(i)%icol(:)=0
enddo

!call heap (threadinfo,'tpl(:)%icol(:)','wsmp_setup',wsmp%n*tpl%nheightmax,'int',+1)
 
!==============================================================================!
!=====[find_connectivity_dimension]============================================!
!==============================================================================!
! the list of non-zero terms is computed for every column
! i1 is a column index
! i2 is a row index
! for symmetric systems, we only build and store the lower triangular part 
! of the matrix, i.e. i2.ge.i1

do ie=1,osolve%nleaves
   if (vo%leaf(ie).eq.0) then
      do k1=1,params%mpe
         do idof1=1,ndof
            i1=(vo%ftr(osolve%icon(k1,ie))-1)*ndof+idof1
            do k2=1,params%mpe
               do idof2=1,ndof
                  i2=(vo%ftr(osolve%icon(k2,ie))-1)*ndof+idof2
                  if (i1.gt.i2.and.ndof.gt.1) goto 2
                  do j=1,tpl(i1)%nheight
                     if (i2.eq.tpl(i1)%icol(j)) goto 2
                  enddo
                  if (tpl(i1)%nheight.eq.tpl(i1)%nheightmax) then
                     do k=1,tpl(i1)%nheight
                        icol(k)=tpl(i1)%icol(k)
                     enddo
                     if (tpl(i1)%nheight.gt.0) deallocate (tpl(i1)%icol)
                     tpl(i1)%nheightmax=tpl(i1)%nheightmax+ndof
                     allocate (tpl(i1)%icol(tpl(i1)%nheightmax),stat=err)
                     tpl(i1)%icol(:)=0
                     if (err.ne.0) call stop_run ('Error alloc tpl(i1)%icol$')
                     do k=1,tpl(i1)%nheight
                        tpl(i1)%icol(k)=icol(k)
                     enddo
                  endif
                  tpl(i1)%nheight=tpl(i1)%nheight+1
                  tpl(i1)%icol(tpl(i1)%nheight)=i2
2                 continue
               enddo
            enddo
         enddo
      enddo
   endif
enddo

!print *,iproc,'nface=',osolve%nface

do ie=1,osolve%nface
   if (vo%face(ie).eq.0) then
      do k1=1,9
         do idof1=1,ndof
            i1=(vo%ftr(osolve%iface(k1,ie))-1)*ndof+idof1
            do k2=1,9
               do idof2=1,ndof
                  i2=(vo%ftr(osolve%iface(k2,ie))-1)*ndof+idof2
                  if (i1.gt.i2.and.ndof.gt.1) goto 3             !!!!!!NEW
                  do j=1,tpl(i1)%nheight
                     if (i2.eq.tpl(i1)%icol(j)) goto 3
                  enddo
                  if (tpl(i1)%nheight.eq.tpl(i1)%nheightmax) then
                     do k=1,tpl(i1)%nheight
                        icol(k)=tpl(i1)%icol(k)
                     enddo
                     if (tpl(i1)%nheight.gt.0) deallocate (tpl(i1)%icol)
                     tpl(i1)%nheightmax=tpl(i1)%nheightmax+ndof
                     allocate (tpl(i1)%icol(tpl(i1)%nheightmax),stat=err)
                     if (err.ne.0) call stop_run ('Error alloc tpl(i1)%icol$')
                     do k=1,tpl(i1)%nheight
                        tpl(i1)%icol(k)=icol(k)
                     enddo
                  endif
                  tpl(i1)%nheight=tpl(i1)%nheight+1
                  tpl(i1)%icol(tpl(i1)%nheight)=i2
3                 continue
               enddo
            enddo
         enddo
      enddo
   endif
enddo

!print *,iproc,'min/max tpl%nheight',minval(tpl(:)%nheight),maxval(tpl(:)%nheight)
!print *,iproc,'avrg tpl%nheight',sum(tpl(:)%nheight)/dble(wsmp%n)

!==============================================================================!
!=====[computing total number of non-zero terms]===============================!
!==============================================================================!
! the total number of non-zero terms is the sum of the height of each column

nz=sum(tpl(1:vo%nnode*ndof)%nheight)

if (iproc.eq.0) write(*,'(a,i9)') shift//'n =',n
if (iproc.eq.0) write(*,'(a,i9)') shift//'nz=',nz

!==============================================================================!
!=====[sorting icol's]=========================================================!
!==============================================================================!
! wsmp requires the indices of the row contained in each column to be sorted

do i=1,n
   call iqsort_s (tpl(i)%icol,tpl(i)%nheight)
end do

!==============================================================================!
!=====[find_processors]========================================================!
!==============================================================================!

nn  =(n-1)/nproc+1
n_iproc_st     = nn*iproc+1
n_iproc_end    = min(n_iproc_st+nn-1,n)
n_iproc        = n_iproc_end - n_iproc_st +1

ldb  = n_iproc

allocate(iproc_col(n))
iproc_col=.false.
iproc_col(n_iproc_st:n_iproc_end)=.true.
nz_loc=sum(tpl(:)%nheight,iproc_col)
deallocate(iproc_col) 

!==============================================================================!
!=====[outputs visual reprenstation of matrix]=================================!
!==============================================================================!

if (params%visualise_matrix) then
   allocate(irn(nz))      ; call heap (threadinfo,'irn','wsmp_setup',size(irn),'int',+1)
   allocate(jcn(nz))      ; call heap (threadinfo,'jcn','wsmp_setup',size(jcn),'int',+1)
   irn=0
   jcn=0
   ! the coordinates (irn,jcn) of the non zero terms in the global matrix are 
   ! computed, as well as the indices of the diagonal terms in the list of dof's.
   kk=0
   do i=1,n
      do j=1,tpl(i)%nheight
         kk=kk+1
         if (kk.gt.nz) call stop_run ('error in find_connectivity$')
         irn(kk)=tpl(i)%icol(j)
         jcn(kk)=i
      enddo
   enddo
!   call visualise_matrix (nz,irn,jcn,n,istep,iter,ndof)
   call heap (threadinfo,'irn','wsmp_setup',size(irn),'int',-1)    ; deallocate (irn)
   call heap (threadinfo,'jcn','wsmp_setup',size(jcn),'int',-1)    ; deallocate (jcn)
end if

end subroutine wsmp_setup1




!==============================================================================!
!==============================================================================!
!
!              WSMP_SETUP2      Oct. 2008                                      |
!
!==============================================================================!
!==============================================================================!

subroutine wsmp_setup2 (n,n_iproc,n_iproc_st,n_iproc_end,nz_loc,iproc_col,     &
                        ia,ja,ndof,vo,osolve,tpl,params,threadinfo,istep,iter)

use definitions
use threads
implicit none

integer n,nz_loc,n_iproc,n_iproc_st,n_iproc_end
logical iproc_col(n)
integer ia(n_iproc+1)
integer ja(nz_loc)
integer ndof
type (void) vo
type (topology) :: tpl(n)
type (octreesolve) osolve
type (parameters) params
type (thread) threadinfo
integer istep,iter

integer iloc,iatemp,nheightp,i,k
integer iproc,nproc,ierr

INCLUDE 'mpif.h'

call mpi_comm_size (mpi_comm_world,nproc,ierr)
call mpi_comm_rank (mpi_comm_world,iproc,ierr)

iproc_col=.false.
iproc_col(n_iproc_st:n_iproc_end)=.true.

!==============================================================================!
!=====[find_connectivity_local]================================================!
!==============================================================================!

iloc=1

do i=1,n
   if (iproc_col(i)) then
      do k=1,tpl(i)%nheight
         ja(iloc+k-1)=tpl(i)%icol(k)
      enddo
      iloc=iloc+tpl(i)%nheight
   endif
enddo

!==============================================================================!
!=====[build ia,ja]============================================================!
!==============================================================================!

iatemp=0
k=0
do i=n_iproc_st,n_iproc_end
   k=k+1
   if (iatemp==0) then
      ia(k)=1
   else
      ia(k)=iatemp+nheightp
   end if
   iatemp=ia(k)
   nheightp=tpl(i)%nheight
end do 
k=k+1
ia(k)=iatemp+nheightp

end subroutine wsmp_setup2

!==============================================================================!
!==============================================================================!