Newer
Older
!==============================================================================!
! |
! ||===\\ |
! || \\ |
! || || //==\\ || || //==|| ||/==\\ |
! || || || || || || || || || || |
! || // || || || || || || || |
! ||===// \\==// \\==\\ \\==\\ || |
! |
!==============================================================================!
!==============================================================================!
! |
! SOLVE_WITH_WSMP Oct. 2008 |
! |
!==============================================================================!
!==============================================================================!
subroutine solve_with_pwssmp (n,nz_loc,n_iproc,n_iproc_st,n_iproc_end,ia,ja, &
ldb,nrhs,avals,b,params,osolve,ov,vo,threadinfo, &
ndof,istep,iter,iter_nl,weightel)
!use mpi
include 'mpif.h'
!==============================================================================!
!==============================================================================!
integer n,nz_loc,n_iproc,n_iproc_st,n_iproc_end
integer ia(n_iproc+1)
integer ja(nz_loc)
integer ldb,nrhs
double precision avals(nz_loc)
double precision b(ldb,nrhs)
type (parameters) params
type (octreesolve) osolve
type (octreev) ov
type (void) vo
type (thread) threadinfo
integer ndof
integer istep,iter,iter_nl
double precision weightel(osolve%nleaves)
!==============================================================================!
!==============================================================================!
integer iproc,nproc,ierr
character*3 :: ciproc
integer i,j,k,iip,ist,iend,iwidth,tag,wsmpdpmem
double precision time1,time2
double precision time_st,time_end
double precision, dimension(:), allocatable :: solution
integer,dimension(MPI_STATUS_SIZE)::statut
character*72 :: shift
integer iparm(64)
double precision dparm(64)
integer, dimension(:), allocatable :: perm,invp
double precision aux, diag
integer mrp,naux
double precision elem_dz
Dave Whipp
committed
integer badnode,badproc,cnt,idof,rbadnode,badindex
integer,dimension(:),allocatable :: bad_elems
!==============================================================================!
!==============================================================================!
call mpi_comm_size (mpi_comm_world,nproc,ierr)
call mpi_comm_rank (mpi_comm_world,iproc,ierr)
allocate(perm(n),stat=threadinfo%err)
if (params%debug.gt.1) call heap (threadinfo,'perm','solve_with',size(perm),'int',+1)
allocate(invp(n),stat=threadinfo%err)
if (params%debug.gt.1) call heap (threadinfo,'invp','solve_with',size(invp),'int',+1)
allocate (solution(vo%nnode*ndof),stat=threadinfo%err)
if (params%debug.gt.1) call heap (threadinfo,'solution','solve_with',size(solution),'dp',+1)
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
invp=0
perm=0
mrp=0
naux = 0
shift=' '
call int_to_char(ciproc,3,iproc)
!open(unit=770+iproc,file='ia3_'//ciproc//'.dat',status='replace')
!do k=1,n_iproc+1
! write(770+iproc,*) ia(k)
!end do
!close(770+iproc)
!open(unit=770+iproc,file='ja3_'//ciproc//'.dat',status='replace')
!do k=1,nz_loc
! write(770+iproc,*) ja(k),avals(k)
!end do
!close(770+iproc)
!open(unit=770+iproc,file='b3_'//ciproc//'.dat',status='replace')
!do k=1,n_iproc
! write(770+iproc,*) b(k,1)
!end do
!close(770+iproc)
!open(unit=770+iproc,file='matreconstr3_'//ciproc//'.dat',status='replace')
!do k=1,n_iproc
! do j=ia(k),ia(k+1)-1
! write(770+iproc,*) k+n_iproc_st-1,ja(j),avals(j)
! end do
!end do
!close(770+iproc)
time_st=mpi_wtime()
CALL WSETMAXTHRDS(1)
!CALL PWSMP_INITIALIZE()
if (iproc.eq.0) then
write(*,'(a,f15.10,a)') shift//'================================='
end if
!==============================================================================!
!=====[WSMP: initialisation]===================================================!
!==============================================================================!
time1=mpi_wtime()
iparm(1)=0
iparm(2)=0
iparm(3)=0
Dave Whipp
committed
! Enable scaling of the WSMP matrices for LL^T factorization
if (params%wsmp_scaling) then
iparm(10)=1
endif
! Skip ordering
iparm(16)=-1
call pwssmp(n_iproc,ia,ja,avals,diag,perm,invp,b,ldb,nrhs,aux,naux,mrp,iparm,dparm)
time2=mpi_wtime()
if (iproc.eq.0) then
if (iparm(64).ne.0) then
print *,'WSMP-> initialisation pb',iparm(64)
else
write(*,'(a,f15.7,a)') shift//'wsmp: initialisation time: ',time2-time1,' s'
Dave Whipp
committed
write(*,'(a,e15.7)') shift//'wsmp: signularity threshold (dparm(10)): ',dparm(10)
!==============================================================================!
!=====[WSMP: ordering]=========================================================!
!==============================================================================!
time1=mpi_wtime()
iparm(2)=1
iparm(3)=1
call pwssmp(n_iproc,ia,ja,avals,diag,perm,invp,b,ldb,nrhs,aux,naux,mrp,iparm,dparm)
time2=mpi_wtime()
if (iproc.eq.0) then
if (iparm(64).ne.0) then
print *,'WSMP-> ordering pb',iparm(64)
stop 'WSMP pb'
else
write(*,'(a,f15.7,a)') shift//'wsmp: ordering time: ',time2-time1,' s'
end if
end if
!==============================================================================!
!=====[WSMP: symbolic factorisation]===========================================!
!==============================================================================!
time1=mpi_wtime()
iparm(2)=2
iparm(3)=2
call pwssmp(n_iproc,ia,ja,avals,diag,perm,invp,b,ldb,nrhs,aux,naux,mrp,iparm,dparm)
time2=mpi_wtime()
if (iproc.eq.0) then
if (iparm(64).ne.0) then
print *,'WSMP-> symbolic factorisation pb',iparm(64)
stop
else
write(*,'(a,f15.7,a)') shift//'wsmp: symbolic fact. time: ',time2-time1,' s'
Dave Whipp
committed
write(*,'(a,e15.7,a,e15.7)') shift//'wsmp: range of avals: ',minval(avals),' - ',maxval(avals)
end if
end if
wsmpdpmem=iparm(23)
if (params%debug.gt.1) call heap (threadinfo,'wsmpdpmem','solve_with',wsmpdpmem,'dp',+1)
!==============================================================================!
!=====[Cholesky Factorization]=================================================!
!==============================================================================!
time1=mpi_wtime()
iparm(2) = 3
iparm(3) = 3
call pwssmp(n_iproc,ia,ja,avals,diag,perm,invp,b,ldb,nrhs,aux,naux,mrp,iparm,dparm)
Dave Whipp
committed
write(*,'(a,e15.7)') shift//'wsmp: avals min on main diagonal (dparm(5)): ',dparm(5)
write(*,'(a,e15.7)') shift//'wsmp: avals max on main diagonal (dparm(4)): ',dparm(4)
if (iparm(64) .gt. 0) then
call write_streepjes(6,1)
Dave Whipp
committed
write (*,*) 'Cholesky factorization problem at location ',iparm(64),' in WSMP matrix (iparm(64))'
call write_streepjes(6,1)
elseif (iparm(64).lt.0) then
call write_streepjes(6,1)
write (*,*) 'Cholesky factorization problem ',iparm(64),' in WSMP'
call write_streepjes(6,1)
endif
endif
if (iparm(64).gt.0) then
if (iparm(64).ge.n_iproc_st .and. iparm(64).le.n_iproc_end) then
badproc=iproc
Dave Whipp
committed
write (*,'(a36,i15,a1)') 'WSMP failed on MPI process ',badproc,'.'
endif
Dave Whipp
committed
! List values +/- 50 rows/columns around bad value
if (iproc == 0) write(*,'(a,i15)') '+/- 50 values surrounding bad index ',iparm(64)
do i=iparm(64)-50,iparm(64)+50
if (i.ge.n_iproc_st .and. i.le.n_iproc_end) badproc=iproc
if (iproc.eq.badproc) then
write (*,'(a,i15,a,e15.7,a,i15)') 'avals at row/col ',i,': ',avals(ia(i-n_iproc_st+1)),' from process ',badproc
endif
enddo
if (iparm(64).ge.n_iproc_st .and. iparm(64).le.n_iproc_end) badproc=iproc
if (iproc.eq.badproc) then
if (params%debug.gt.0) then
Dave Whipp
committed
write (*,'(a36,i15,a1)') 'Debug output from process ',badproc,':'
write (*,'(a36,e15.7)') 'Bad main diagonal value (avals): ',avals(ia(iparm(64)-n_iproc_st+1))
!write (*,'(a36,e15.7)') 'Permuted bad main diagonal value (avals): ',avals(ia(iparm(64)-n_iproc_st+1))
!write (*,'(a36,e15.7)') 'Invp bad main diagonal value (avals): ',avals(ia(iparm(64)-n_iproc_st+1))
! Do inverse permutation to find original location of bad value in octree
!badindex=invp(iparm(64))
badindex=iparm(64)
idof=mod(badindex,ndof)
if (idof.eq.0) idof=3
Dave Whipp
committed
rbadnode = ((badindex-idof)/ndof)+1
badnode = vo%rtf(rbadnode)
Dave Whipp
committed
write (*,'(a36,i15)') 'Node in DOUAR octree with bad value: ',badnode
allocate (bad_elems(8))
cnt=0
do i=1,osolve%nleaves
do j=1,params%mpe
if (osolve%icon(j,i).eq.badnode) then
cnt=cnt+1
bad_elems(cnt)=i
endif
enddo
enddo
call write_streepjes(6,1)
do i=1,cnt
write (*,*) ''
Dave Whipp
committed
write (*,'(a,i15,a)') '-- Output for element ',bad_elems(i),' --'
write (*,'(a32,e15.7)') 'Pressure: ',osolve%pressure(bad_elems(i))
write (*,'(a32,e15.7)') 'Smoothed pressure: ',osolve%spressure(bad_elems(i))
write (*,'(a32,e15.7)') 'Effective viscosity: ',osolve%eviscosity(bad_elems(i))
write (*,'(a32,e15.7)') 'Effective strain rate: ',osolve%e2d(bad_elems(i))
elem_dz=(osolve%z(osolve%icon(5,bad_elems(i)))-osolve%z(osolve%icon(1,bad_elems(i))))*params%vex
Dave Whipp
committed
write (*,'(a32,e15.7)') 'Surface damping contribution: ',params%damp_factor*(params%dt*(-weightel(bad_elems(i))))/(2.d0*elem_dz)
write (*,'(a)') '-- Nodal output --'
Dave Whipp
committed
write (*,'(a16,8i15)') 'Node number: ',osolve%icon(1,bad_elems(i)), &
osolve%icon(2,bad_elems(i)),osolve%icon(3,bad_elems(i)), &
osolve%icon(4,bad_elems(i)),osolve%icon(5,bad_elems(i)), &
osolve%icon(6,bad_elems(i)),osolve%icon(7,bad_elems(i)), &
osolve%icon(8,bad_elems(i))
Dave Whipp
committed
write (*,'(a16,8e15.7)') 'x-coordinate: ', &
osolve%x(osolve%icon(1,bad_elems(i))), &
osolve%x(osolve%icon(2,bad_elems(i))), &
osolve%x(osolve%icon(3,bad_elems(i))), &
osolve%x(osolve%icon(4,bad_elems(i))), &
osolve%x(osolve%icon(5,bad_elems(i))), &
osolve%x(osolve%icon(6,bad_elems(i))), &
osolve%x(osolve%icon(7,bad_elems(i))), &
osolve%x(osolve%icon(8,bad_elems(i)))
Dave Whipp
committed
write (*,'(a16,8e15.7)') 'y-coordinate: ', &
osolve%y(osolve%icon(1,bad_elems(i))), &
osolve%y(osolve%icon(2,bad_elems(i))), &
osolve%y(osolve%icon(3,bad_elems(i))), &
osolve%y(osolve%icon(4,bad_elems(i))), &
osolve%y(osolve%icon(5,bad_elems(i))), &
osolve%y(osolve%icon(6,bad_elems(i))), &
osolve%y(osolve%icon(7,bad_elems(i))), &
osolve%y(osolve%icon(8,bad_elems(i)))
Dave Whipp
committed
write (*,'(a16,8e15.7)') 'z-coordinate: ', &
osolve%z(osolve%icon(1,bad_elems(i))), &
osolve%z(osolve%icon(2,bad_elems(i))), &
osolve%z(osolve%icon(3,bad_elems(i))), &
osolve%z(osolve%icon(4,bad_elems(i))), &
osolve%z(osolve%icon(5,bad_elems(i))), &
osolve%z(osolve%icon(6,bad_elems(i))), &
osolve%z(osolve%icon(7,bad_elems(i))), &
osolve%z(osolve%icon(8,bad_elems(i)))
Dave Whipp
committed
write (*,'(a16,8e15.7)') 'u-velocity: ', &
osolve%u(osolve%icon(1,bad_elems(i))), &
osolve%u(osolve%icon(2,bad_elems(i))), &
osolve%u(osolve%icon(3,bad_elems(i))), &
osolve%u(osolve%icon(4,bad_elems(i))), &
osolve%u(osolve%icon(5,bad_elems(i))), &
osolve%u(osolve%icon(6,bad_elems(i))), &
osolve%u(osolve%icon(7,bad_elems(i))), &
osolve%u(osolve%icon(8,bad_elems(i)))
Dave Whipp
committed
write (*,'(a16,8e15.7)') 'v-velocity: ', &
osolve%v(osolve%icon(1,bad_elems(i))), &
osolve%v(osolve%icon(2,bad_elems(i))), &
osolve%v(osolve%icon(3,bad_elems(i))), &
osolve%v(osolve%icon(4,bad_elems(i))), &
osolve%v(osolve%icon(5,bad_elems(i))), &
osolve%v(osolve%icon(6,bad_elems(i))), &
osolve%v(osolve%icon(7,bad_elems(i))), &
osolve%v(osolve%icon(8,bad_elems(i)))
Dave Whipp
committed
write (*,'(a16,8e15.7)') 'w-velocity: ', &
osolve%w(osolve%icon(1,bad_elems(i))), &
osolve%w(osolve%icon(2,bad_elems(i))), &
osolve%w(osolve%icon(3,bad_elems(i))), &
osolve%w(osolve%icon(4,bad_elems(i))), &
osolve%w(osolve%icon(5,bad_elems(i))), &
osolve%w(osolve%icon(6,bad_elems(i))), &
osolve%w(osolve%icon(7,bad_elems(i))), &
osolve%w(osolve%icon(8,bad_elems(i)))
enddo
endif
call write_streepjes(6,1)
deallocate(bad_elems)
endif
endif
if (iproc.eq.0) then
if (iparm(64).ne.0) then
!call mpi_abort(mpi_comm_world,ierr)
!call stop_run ('Cholesky factorization problem in WSMP$')
stop
else
write(*,'(a,f15.7,a)') shift//'wsmp: Cholesky fact. time: ',time2-time1,' s'
write(*,'(a,f15.7,a)') shift//'wsmp: Factorisation ',(dparm(23) * 1.d-6) / (time2-time1),' Mflops'
end if
end if
!==============================================================================!
!=====[Back substitution]======================================================!
!==============================================================================!
time1=mpi_wtime()
iparm(2) = 4
iparm(3) = 4
call pwssmp(n_iproc,ia,ja,avals,diag,perm,invp,b,ldb,nrhs,aux,naux,mrp,iparm,dparm)
time2=mpi_wtime()
if (iproc.eq.0) then
if (iparm(64) .ne. 0) then
print *,'WSMP-> back substitution pb',iparm(64)
return
else
write(*,'(a,f15.7,a)') shift//'wsmp: back substitution time:',time2-time1,' s'
end if
end if
if (params%debug.gt.1) call heap (threadinfo,'perm','solve_with',size(perm),'int',-1)
deallocate(perm)
if (params%debug.gt.1) call heap (threadinfo,'invp','solve_with',size(invp),'int',-1)
deallocate(invp)
if (params%debug.gt.1) call heap (threadinfo,'wsmpdpmem','solve_with',wsmpdpmem,'dp',-1)
!==============================================================================!
!=====[transfer b's in solution vector]========================================!
!==============================================================================!
time1=mpi_wtime()
tag=111
solution=1.d31
if (iproc.eq.0) solution(1:n_iproc)=b(:,1)
do iip=1,nproc-1
Dave Whipp
committed
! call mpi_sendrecv(n_iproc_st,1,mpi_integer,0,tag,ist,1,mpi_integer,iip,tag,&
! mpi_comm_world,statut,ierr)
! call mpi_sendrecv(n_iproc_end,1,mpi_integer,0,tag,iend,1,mpi_integer,iip,tag,&
! mpi_comm_world,statut,ierr)
! call mpi_sendrecv(n_iproc,1,mpi_integer,0,tag,iwidth,1,mpi_integer,iip,tag,&
! mpi_comm_world,statut,ierr)
! call mpi_sendrecv(b(:,1),n_iproc,mpi_double_precision,0,tag,&
! solution(ist:iend),iwidth,mpi_double_precision,iip,tag,mpi_comm_world,&
! statut,ierr)
if (iproc==iip) then
call mpi_ssend (n_iproc_st,1,mpi_integer,0,tag,mpi_comm_world,ierr)
call mpi_ssend (n_iproc_end,1,mpi_integer,0,tag,mpi_comm_world,ierr)
call mpi_ssend (n_iproc,1,mpi_integer,0,tag,mpi_comm_world,ierr)
call mpi_ssend (b(:,1),n_iproc,mpi_double_precision,0,tag,mpi_comm_world,ierr)
elseif (iproc==0) then
call mpi_recv (ist,1,mpi_integer,iip,tag,mpi_comm_world,statut,ierr)
call mpi_recv (iend,1,mpi_integer,iip,tag,mpi_comm_world,statut,ierr)
call mpi_recv (iwidth,1,mpi_integer,iip,tag,mpi_comm_world,statut,ierr)
call mpi_recv (solution(ist:iend),iwidth,mpi_double_precision,iip,tag,mpi_comm_world,statut,ierr)
end if
! if (iproc==iip) then
! call mpi_send (n_iproc_st,1,mpi_integer,0,tag,mpi_comm_world,ierr)
! call mpi_send (n_iproc_end,1,mpi_integer,0,tag,mpi_comm_world,ierr)
! call mpi_send (n_iproc,1,mpi_integer,0,tag,mpi_comm_world,ierr)
! call mpi_send (b(:,1),n_iproc,mpi_double_precision,0,tag,mpi_comm_world,ierr)
! elseif (iproc==0) then
! call mpi_recv (ist,1,mpi_integer,iip,tag,mpi_comm_world,statut,ierr)
! call mpi_recv (iend,1,mpi_integer,iip,tag,mpi_comm_world,statut,ierr)
! call mpi_recv (iwidth,1,mpi_integer,iip,tag,mpi_comm_world,statut,ierr)
! call mpi_recv (solution(ist:iend),iwidth,mpi_double_precision,iip,tag,mpi_comm_world,statut,ierr)
! end if
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
end do
call mpi_bcast (solution,n,mpi_double_precision,0,mpi_comm_world,ierr)
time2=mpi_wtime()
if (iproc.eq.0) then
write(*,'(a,f15.7,a)') shift//'wsmp: from b to rhs vector: ',time2-time1,' s'
end if
!write(*,*) iproc,'solution',minval(solution),maxval(solution)
time_end=mpi_wtime()
if (iproc.eq.0) then
write(*,'(a,f15.7,a)') shift//'---------------------------------'
write(*,'(a,f15.7,a)') shift//'wsmp: total solve time: ',time_end-time_st,' s'
end if
if (iproc.eq.0) then
write(*,'(a,f15.7,a)') shift//'================================='
end if
!==============================================================================!
!=====[transfer the solution onto ov for next step or iteration]===============!
!==============================================================================!
ov%unode(1:osolve%nnode)=0.d0
ov%vnode(1:osolve%nnode)=0.d0
ov%wnode(1:osolve%nnode)=0.d0
do i=1,vo%nnode
j=vo%rtf(i)
ov%unode(j)=solution((i-1)*ndof+1)
ov%vnode(j)=solution((i-1)*ndof+2)
ov%wnode(j)=solution((i-1)*ndof+3)
enddo
if (iproc.eq.0) then
if (params%debug>=1) then
write(*,'(a,2e13.5)') shift//'u :',minval(ov%unode),maxval(ov%unode)
write(*,'(a,2e13.5)') shift//'v :',minval(ov%vnode),maxval(ov%vnode)
write(*,'(a,2e13.5)') shift//'w :',minval(ov%wnode),maxval(ov%wnode)
write(*,'(a,f15.7,a)') shift//'================================='
end if
write (8,*) 'raw solution',minval(solution),maxval(solution)
write (8,'(a,f30.20,a,f30.20)') 'min ov%unode ',minval(ov%unode),'maxval ov%unode',maxval(ov%unode)
write (8,'(a,f30.20,a,f30.20)') 'min ov%vnode ',minval(ov%vnode),'maxval ov%vnode',maxval(ov%vnode)
write (8,'(a,f30.20,a,f30.20)') 'min ov%wnode ',minval(ov%wnode),'maxval ov%wnode',maxval(ov%wnode)
end if
if (params%debug.gt.1) call heap (threadinfo,'solution','solve_with',size(solution),'dp',-1)
deallocate (solution)
!==============================================================================!
!=====[DoRuRe: output octree and solver statistics]============================!
!==============================================================================!
call DoRuRe_octree_stats (osolve,params,istep,iter,iter_nl)
!call DoRuRe_solver_stats (params%doDoRuRe,istep,iter,iter_nl,id%n,id%nz,id%rinfog(3),id%infog(3),time1,time2,time3,osolve%octree(2))
end subroutine solve_with_pwssmp
!==============================================================================!
!==============================================================================!