Skip to content
Snippets Groups Projects
solve_with_pwssmp.f90 20.6 KiB
Newer Older
!==============================================================================!
!                                                                              |
!              ||===\\                                                         | 
!              ||    \\                                                        |
!              ||     ||   //==\\   ||  ||   //==||  ||/==\\                   |
!              ||     ||  ||    ||  ||  ||  ||   ||  ||    ||                  |
!              ||    //   ||    ||  ||  ||  ||   ||  ||                        |
!              ||===//     \\==//    \\==\\  \\==\\  ||                        |
!                                                                              |
!==============================================================================!
!==============================================================================!
!                                                                              |
!              SOLVE_WITH_WSMP    Oct. 2008                                    |
!                                                                              |
!==============================================================================!
!==============================================================================!

Dave Whipp's avatar
Dave Whipp committed
subroutine solve_with_pwssmp (n,nz_loc,n_iproc,n_iproc_st,n_iproc_end,ia,ja,   &
                             ldb,nrhs,avals,b,params,osolve,ov,vo,threadinfo,  &
                             ndof,istep,iter,iter_nl,weightel)

use definitions
use threads

implicit none

!==============================================================================!
!==============================================================================!

integer n,nz_loc,n_iproc,n_iproc_st,n_iproc_end
integer ia(n_iproc+1)
integer ja(nz_loc)
integer ldb,nrhs
double precision avals(nz_loc)
double precision b(ldb,nrhs)
type (parameters) params
type (octreesolve) osolve
type (octreev) ov
type (void) vo
type (thread) threadinfo
integer ndof
integer istep,iter,iter_nl
double precision weightel(osolve%nleaves)

!==============================================================================!
!==============================================================================!

integer iproc,nproc,ierr
character*3   :: ciproc
integer i,j,k,iip,ist,iend,iwidth,tag,wsmpdpmem
double precision time1,time2
double precision time_st,time_end
double precision, dimension(:), allocatable :: solution
integer,dimension(MPI_STATUS_SIZE)::statut 
character*72  :: shift

integer iparm(64)
double precision dparm(64)
integer, dimension(:), allocatable :: perm,invp 
double precision aux, diag
integer mrp,naux

integer badnode,badproc,cnt,idof,rbadnode,badindex
integer,dimension(:),allocatable :: bad_elems

!==============================================================================!
!==============================================================================!

call mpi_comm_size (mpi_comm_world,nproc,ierr)
call mpi_comm_rank (mpi_comm_world,iproc,ierr)

allocate(perm(n),stat=threadinfo%err)
if (params%debug.gt.1) call heap (threadinfo,'perm','solve_with',size(perm),'int',+1)
allocate(invp(n),stat=threadinfo%err)
if (params%debug.gt.1) call heap (threadinfo,'invp','solve_with',size(invp),'int',+1)
allocate (solution(vo%nnode*ndof),stat=threadinfo%err)
if (params%debug.gt.1) call heap (threadinfo,'solution','solve_with',size(solution),'dp',+1)

invp=0
perm=0
mrp=0
naux = 0

shift=' '

call int_to_char(ciproc,3,iproc)

!open(unit=770+iproc,file='ia3_'//ciproc//'.dat',status='replace')
!do k=1,n_iproc+1
!   write(770+iproc,*) ia(k)
!end do
!close(770+iproc)
!open(unit=770+iproc,file='ja3_'//ciproc//'.dat',status='replace')
!do k=1,nz_loc
!   write(770+iproc,*) ja(k),avals(k) 
!end do
!close(770+iproc)
!open(unit=770+iproc,file='b3_'//ciproc//'.dat',status='replace')
!do k=1,n_iproc
!   write(770+iproc,*) b(k,1)
!end do
!close(770+iproc)

!open(unit=770+iproc,file='matreconstr3_'//ciproc//'.dat',status='replace')
!do k=1,n_iproc
!   do j=ia(k),ia(k+1)-1
!      write(770+iproc,*) k+n_iproc_st-1,ja(j),avals(j)
!   end do
!end do
!close(770+iproc)

time_st=mpi_wtime()

CALL WSETMAXTHRDS(1)

!CALL PWSMP_INITIALIZE()

if (iproc.eq.0) then
   write(*,'(a,f15.10,a)') shift//'================================='
end if

!==============================================================================!
!=====[WSMP: initialisation]===================================================!
!==============================================================================!

time1=mpi_wtime()

iparm(1)=0
iparm(2)=0
iparm(3)=0

! Enable scaling of the WSMP matrices for LL^T factorization
if (params%wsmp_scaling) then
  iparm(10)=1
endif

! Skip ordering
iparm(16)=-1

Dave Whipp's avatar
Dave Whipp committed
call pwssmp(n_iproc,ia,ja,avals,diag,perm,invp,b,ldb,nrhs,aux,naux,mrp,iparm,dparm)

time2=mpi_wtime()

if (iproc.eq.0) then
   if (iparm(64).ne.0) then
      print *,'WSMP-> initialisation pb',iparm(64)
      call stop_run ('WSMP pb$')
   else
      write(*,'(a,f15.7,a)') shift//'wsmp: initialisation time:   ',time2-time1,' s'
      write(*,'(a,e15.7)') shift//'wsmp: signularity threshold (dparm(10)): ',dparm(10)
!==============================================================================!
!=====[WSMP: ordering]=========================================================!
!==============================================================================!

time1=mpi_wtime()

iparm(2)=1
iparm(3)=1

Dave Whipp's avatar
Dave Whipp committed
call pwssmp(n_iproc,ia,ja,avals,diag,perm,invp,b,ldb,nrhs,aux,naux,mrp,iparm,dparm)

time2=mpi_wtime()

if (iproc.eq.0) then
   if (iparm(64).ne.0) then
      print *,'WSMP-> ordering pb',iparm(64)
      stop 'WSMP pb'
   else
      write(*,'(a,f15.7,a)') shift//'wsmp: ordering time:         ',time2-time1,' s'
   end if
end if

!==============================================================================!
!=====[WSMP: symbolic factorisation]===========================================!
!==============================================================================!

time1=mpi_wtime()

iparm(2)=2
iparm(3)=2

Dave Whipp's avatar
Dave Whipp committed
call pwssmp(n_iproc,ia,ja,avals,diag,perm,invp,b,ldb,nrhs,aux,naux,mrp,iparm,dparm)

time2=mpi_wtime()

if (iproc.eq.0) then
   if (iparm(64).ne.0) then
      print *,'WSMP-> symbolic factorisation pb',iparm(64)
      stop 
   else
      write(*,'(a,f15.7,a)') shift//'wsmp: symbolic fact. time:   ',time2-time1,' s'
      write(*,'(a,e15.7,a,e15.7)') shift//'wsmp: range of avals: ',minval(avals),' - ',maxval(avals)
   end if
end if

wsmpdpmem=iparm(23)
if (params%debug.gt.1) call heap (threadinfo,'wsmpdpmem','solve_with',wsmpdpmem,'dp',+1)
!==============================================================================!
!=====[Cholesky Factorization]=================================================!
!==============================================================================!

time1=mpi_wtime()

iparm(2) = 3
iparm(3) = 3

Dave Whipp's avatar
Dave Whipp committed
call pwssmp(n_iproc,ia,ja,avals,diag,perm,invp,b,ldb,nrhs,aux,naux,mrp,iparm,dparm)
write(*,'(a,e15.7)') shift//'wsmp: avals min on main diagonal (dparm(5)): ',dparm(5)
write(*,'(a,e15.7)') shift//'wsmp: avals max on main diagonal (dparm(4)): ',dparm(4)

if (iproc.eq.0) then
      write (*,*) 'Cholesky factorization problem at location ',iparm(64),' in WSMP matrix (iparm(64))'
   elseif (iparm(64).lt.0) then
      call write_streepjes(6,1)
      write (*,*) 'Cholesky factorization problem ',iparm(64),' in WSMP'
      call write_streepjes(6,1)
   endif
endif

if (iparm(64).gt.0) then
   if (iparm(64).ge.n_iproc_st .and. iparm(64).le.n_iproc_end) then
      badproc=iproc
      write (*,'(a36,i15,a1)') 'WSMP failed on MPI process ',badproc,'.'



   ! List values +/- 50 rows/columns around bad value
   if (iproc == 0) write(*,'(a,i15)') '+/- 50 values surrounding bad index ',iparm(64)
   do i=iparm(64)-50,iparm(64)+50
     if (i.ge.n_iproc_st .and. i.le.n_iproc_end) badproc=iproc
     if (iproc.eq.badproc) then      
       write (*,'(a,i15,a,e15.7,a,i15)') 'avals at row/col ',i,': ',avals(ia(i-n_iproc_st+1)),' from process ',badproc
     endif
   enddo
   if (iparm(64).ge.n_iproc_st .and. iparm(64).le.n_iproc_end) badproc=iproc



   
        write (*,'(a36,i15,a1)') 'Debug output from process ',badproc,':'
        write (*,'(a36,e15.7)') 'Bad main diagonal value (avals): ',avals(ia(iparm(64)-n_iproc_st+1))


        !write (*,'(a36,e15.7)') 'Permuted bad main diagonal value (avals): ',avals(ia(iparm(64)-n_iproc_st+1))
        !write (*,'(a36,e15.7)') 'Invp bad main diagonal value (avals): ',avals(ia(iparm(64)-n_iproc_st+1))


        ! Do inverse permutation to find original location of bad value in octree


        !badindex=invp(iparm(64))
        badindex=iparm(64)



        idof=mod(badindex,ndof)
        write (*,'(a36,i15)') 'Node in DOUAR octree with bad value: ',badnode
        allocate (bad_elems(8))
        cnt=0
        do i=1,osolve%nleaves
          do j=1,params%mpe
            if (osolve%icon(j,i).eq.badnode) then
              cnt=cnt+1
              bad_elems(cnt)=i
            endif
          enddo
        enddo
        call write_streepjes(6,1)
        do i=1,cnt
          write (*,'(a,i15,a)') '-- Output for element ',bad_elems(i),' --'
          write (*,'(a32,e15.7)') 'Pressure: ',osolve%pressure(bad_elems(i))
          write (*,'(a32,e15.7)') 'Smoothed pressure: ',osolve%spressure(bad_elems(i))
          write (*,'(a32,e15.7)') 'Effective viscosity: ',osolve%eviscosity(bad_elems(i))
          write (*,'(a32,e15.7)') 'Effective strain rate: ',osolve%e2d(bad_elems(i))
          elem_dz=(osolve%z(osolve%icon(5,bad_elems(i)))-osolve%z(osolve%icon(1,bad_elems(i))))*params%vex
          write (*,'(a32,e15.7)') 'Surface damping contribution: ',params%damp_factor*(params%dt*(-weightel(bad_elems(i))))/(2.d0*elem_dz)
          write (*,'(a)') '-- Nodal output --'
          write (*,'(a16,8i15)') 'Node number: ',osolve%icon(1,bad_elems(i)),  &
                osolve%icon(2,bad_elems(i)),osolve%icon(3,bad_elems(i)),       &
                osolve%icon(4,bad_elems(i)),osolve%icon(5,bad_elems(i)),       &
                osolve%icon(6,bad_elems(i)),osolve%icon(7,bad_elems(i)),       &
                osolve%icon(8,bad_elems(i))
                osolve%x(osolve%icon(1,bad_elems(i))),                         &
                osolve%x(osolve%icon(2,bad_elems(i))),                         &
                osolve%x(osolve%icon(3,bad_elems(i))),                         &
                osolve%x(osolve%icon(4,bad_elems(i))),                         &
                osolve%x(osolve%icon(5,bad_elems(i))),                         &
                osolve%x(osolve%icon(6,bad_elems(i))),                         &
                osolve%x(osolve%icon(7,bad_elems(i))),                         &
                osolve%x(osolve%icon(8,bad_elems(i)))
                osolve%y(osolve%icon(1,bad_elems(i))),                         &
                osolve%y(osolve%icon(2,bad_elems(i))),                         &
                osolve%y(osolve%icon(3,bad_elems(i))),                         &
                osolve%y(osolve%icon(4,bad_elems(i))),                         &
                osolve%y(osolve%icon(5,bad_elems(i))),                         &
                osolve%y(osolve%icon(6,bad_elems(i))),                         &
                osolve%y(osolve%icon(7,bad_elems(i))),                         &
                osolve%y(osolve%icon(8,bad_elems(i)))
                osolve%z(osolve%icon(1,bad_elems(i))),                         &
                osolve%z(osolve%icon(2,bad_elems(i))),                         &
                osolve%z(osolve%icon(3,bad_elems(i))),                         &
                osolve%z(osolve%icon(4,bad_elems(i))),                         &
                osolve%z(osolve%icon(5,bad_elems(i))),                         &
                osolve%z(osolve%icon(6,bad_elems(i))),                         &
                osolve%z(osolve%icon(7,bad_elems(i))),                         &
                osolve%z(osolve%icon(8,bad_elems(i)))
                osolve%u(osolve%icon(1,bad_elems(i))),                         &
                osolve%u(osolve%icon(2,bad_elems(i))),                         &
                osolve%u(osolve%icon(3,bad_elems(i))),                         &
                osolve%u(osolve%icon(4,bad_elems(i))),                         &
                osolve%u(osolve%icon(5,bad_elems(i))),                         &
                osolve%u(osolve%icon(6,bad_elems(i))),                         &
                osolve%u(osolve%icon(7,bad_elems(i))),                         &
                osolve%u(osolve%icon(8,bad_elems(i)))
                osolve%v(osolve%icon(1,bad_elems(i))),                         &
                osolve%v(osolve%icon(2,bad_elems(i))),                         &
                osolve%v(osolve%icon(3,bad_elems(i))),                         &
                osolve%v(osolve%icon(4,bad_elems(i))),                         &
                osolve%v(osolve%icon(5,bad_elems(i))),                         &
                osolve%v(osolve%icon(6,bad_elems(i))),                         &
                osolve%v(osolve%icon(7,bad_elems(i))),                         &
                osolve%v(osolve%icon(8,bad_elems(i)))
                osolve%w(osolve%icon(1,bad_elems(i))),                         &
                osolve%w(osolve%icon(2,bad_elems(i))),                         &
                osolve%w(osolve%icon(3,bad_elems(i))),                         &
                osolve%w(osolve%icon(4,bad_elems(i))),                         &
                osolve%w(osolve%icon(5,bad_elems(i))),                         &
                osolve%w(osolve%icon(6,bad_elems(i))),                         &
                osolve%w(osolve%icon(7,bad_elems(i))),                         &
                osolve%w(osolve%icon(8,bad_elems(i)))
      deallocate(bad_elems)
   endif
endif

if (iproc.eq.0) then
   if (iparm(64).ne.0) then
      !call mpi_abort(mpi_comm_world,ierr)
      !call stop_run ('Cholesky factorization problem in WSMP$')
      stop
   else
      write(*,'(a,f15.7,a)') shift//'wsmp: Cholesky fact. time:   ',time2-time1,' s'
      write(*,'(a,f15.7,a)') shift//'wsmp: Factorisation          ',(dparm(23) * 1.d-6) / (time2-time1),' Mflops'
   end if
end if


!==============================================================================!
!=====[Back substitution]======================================================!
!==============================================================================!

time1=mpi_wtime()

iparm(2) = 4
iparm(3) = 4

Dave Whipp's avatar
Dave Whipp committed
call pwssmp(n_iproc,ia,ja,avals,diag,perm,invp,b,ldb,nrhs,aux,naux,mrp,iparm,dparm)

time2=mpi_wtime()

if (iproc.eq.0) then
   if (iparm(64) .ne. 0) then
      print *,'WSMP-> back substitution pb',iparm(64)
      return
   else
      write(*,'(a,f15.7,a)') shift//'wsmp: back substitution time:',time2-time1,' s'
   end if
end if

if (params%debug.gt.1) call heap (threadinfo,'perm','solve_with',size(perm),'int',-1)
deallocate(perm)
if (params%debug.gt.1) call heap (threadinfo,'invp','solve_with',size(invp),'int',-1)
deallocate(invp)
if (params%debug.gt.1) call heap (threadinfo,'wsmpdpmem','solve_with',wsmpdpmem,'dp',-1)
!==============================================================================!
!=====[transfer b's in solution vector]========================================!
!==============================================================================!

time1=mpi_wtime()

tag=111
solution=1.d31

if (iproc.eq.0) solution(1:n_iproc)=b(:,1)

do iip=1,nproc-1
!  call mpi_sendrecv(n_iproc_st,1,mpi_integer,0,tag,ist,1,mpi_integer,iip,tag,&
!       mpi_comm_world,statut,ierr)
!  call mpi_sendrecv(n_iproc_end,1,mpi_integer,0,tag,iend,1,mpi_integer,iip,tag,&
!       mpi_comm_world,statut,ierr)
!  call mpi_sendrecv(n_iproc,1,mpi_integer,0,tag,iwidth,1,mpi_integer,iip,tag,&
!       mpi_comm_world,statut,ierr)
!  call mpi_sendrecv(b(:,1),n_iproc,mpi_double_precision,0,tag,&
!       solution(ist:iend),iwidth,mpi_double_precision,iip,tag,mpi_comm_world,&
!       statut,ierr)
   if (iproc==iip) then
      call mpi_ssend (n_iproc_st,1,mpi_integer,0,tag,mpi_comm_world,ierr)  
      call mpi_ssend (n_iproc_end,1,mpi_integer,0,tag,mpi_comm_world,ierr)
      call mpi_ssend (n_iproc,1,mpi_integer,0,tag,mpi_comm_world,ierr)
      call mpi_ssend (b(:,1),n_iproc,mpi_double_precision,0,tag,mpi_comm_world,ierr)
   elseif (iproc==0) then
      call mpi_recv (ist,1,mpi_integer,iip,tag,mpi_comm_world,statut,ierr)
      call mpi_recv (iend,1,mpi_integer,iip,tag,mpi_comm_world,statut,ierr)
      call mpi_recv (iwidth,1,mpi_integer,iip,tag,mpi_comm_world,statut,ierr)
      call mpi_recv (solution(ist:iend),iwidth,mpi_double_precision,iip,tag,mpi_comm_world,statut,ierr)
   end if
!   if (iproc==iip) then
!      call mpi_send (n_iproc_st,1,mpi_integer,0,tag,mpi_comm_world,ierr)  
!      call mpi_send (n_iproc_end,1,mpi_integer,0,tag,mpi_comm_world,ierr)
!      call mpi_send (n_iproc,1,mpi_integer,0,tag,mpi_comm_world,ierr)
!      call mpi_send (b(:,1),n_iproc,mpi_double_precision,0,tag,mpi_comm_world,ierr)
!   elseif (iproc==0) then
!      call mpi_recv (ist,1,mpi_integer,iip,tag,mpi_comm_world,statut,ierr)
!      call mpi_recv (iend,1,mpi_integer,iip,tag,mpi_comm_world,statut,ierr)
!      call mpi_recv (iwidth,1,mpi_integer,iip,tag,mpi_comm_world,statut,ierr)
!      call mpi_recv (solution(ist:iend),iwidth,mpi_double_precision,iip,tag,mpi_comm_world,statut,ierr)
!   end if
end do
call mpi_bcast (solution,n,mpi_double_precision,0,mpi_comm_world,ierr)

time2=mpi_wtime()

if (iproc.eq.0) then
   write(*,'(a,f15.7,a)') shift//'wsmp: from b to rhs vector:  ',time2-time1,' s'
end if

!write(*,*) iproc,'solution',minval(solution),maxval(solution)

time_end=mpi_wtime()

if (iproc.eq.0) then
   write(*,'(a,f15.7,a)') shift//'---------------------------------'
   write(*,'(a,f15.7,a)') shift//'wsmp: total solve time:      ',time_end-time_st,' s' 
end if

if (iproc.eq.0) then
   write(*,'(a,f15.7,a)') shift//'================================='
end if


!==============================================================================!
!=====[transfer the solution onto ov for next step or iteration]===============!
!==============================================================================!

ov%unode(1:osolve%nnode)=0.d0
ov%vnode(1:osolve%nnode)=0.d0
ov%wnode(1:osolve%nnode)=0.d0
do i=1,vo%nnode
   j=vo%rtf(i)
   ov%unode(j)=solution((i-1)*ndof+1)
   ov%vnode(j)=solution((i-1)*ndof+2)
   ov%wnode(j)=solution((i-1)*ndof+3)
enddo

if (iproc.eq.0) then
   if (params%debug>=1) then
   write(*,'(a,2e13.5)') shift//'u :',minval(ov%unode),maxval(ov%unode)
   write(*,'(a,2e13.5)') shift//'v :',minval(ov%vnode),maxval(ov%vnode)
   write(*,'(a,2e13.5)') shift//'w :',minval(ov%wnode),maxval(ov%wnode)
   write(*,'(a,f15.7,a)') shift//'================================='
   end if
   write (8,*) 'raw solution',minval(solution),maxval(solution)
   write (8,'(a,f30.20,a,f30.20)') 'min ov%unode ',minval(ov%unode),'maxval ov%unode',maxval(ov%unode)
   write (8,'(a,f30.20,a,f30.20)') 'min ov%vnode ',minval(ov%vnode),'maxval ov%vnode',maxval(ov%vnode)
   write (8,'(a,f30.20,a,f30.20)') 'min ov%wnode ',minval(ov%wnode),'maxval ov%wnode',maxval(ov%wnode)
end if

if (params%debug.gt.1) call heap (threadinfo,'solution','solve_with',size(solution),'dp',-1)
deallocate (solution)

call pwsmp_clear()

!==============================================================================!
!=====[DoRuRe: output octree and solver statistics]============================!
!==============================================================================!

call DoRuRe_octree_stats (osolve,params,istep,iter,iter_nl)
!call DoRuRe_solver_stats (params%doDoRuRe,istep,iter,iter_nl,id%n,id%nz,id%rinfog(3),id%infog(3),time1,time2,time3,osolve%octree(2))

end subroutine solve_with_pwssmp

!==============================================================================!
Dave Whipp's avatar
Dave Whipp committed
!==============================================================================!