Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
!==============================================================================!
! |
! ||===\\ |
! || \\ |
! || || //==\\ || || //==|| ||/==\\ |
! || || || || || || || || || || |
! || // || || || || || || || |
! ||===// \\==// \\==\\ \\==\\ || |
! |
!==============================================================================!
!==============================================================================!
! |
! SOLVE_WITH_WSMP Oct. 2008 |
! |
!==============================================================================!
!==============================================================================!
subroutine solve_with_pwssmp (n,nz_loc,n_iproc,n_iproc_st,n_iproc_end,ia,ja, &
ldb,nrhs,avals,b,params,osolve,ov,vo,threadinfo,ndof,istep,iter,iter_nl)
use definitions
use threads
implicit none
INCLUDE 'mpif.h'
!==============================================================================!
!==============================================================================!
integer n,nz_loc,n_iproc,n_iproc_st,n_iproc_end
integer ia(n_iproc+1)
integer ja(nz_loc)
integer ldb,nrhs
double precision avals(nz_loc)
double precision b(ldb,nrhs)
type (parameters) params
type (octreesolve) osolve
type (octreev) ov
type (void) vo
type (thread) threadinfo
integer ndof
integer istep,iter,iter_nl
!==============================================================================!
!==============================================================================!
integer iproc,nproc,ierr
character*3 :: ciproc
integer i,j,k,iip,ist,iend,iwidth,tag,wsmpdpmem
double precision time1,time2
double precision time_st,time_end
double precision, dimension(:), allocatable :: solution
integer,dimension(MPI_STATUS_SIZE)::statut
character*72 :: shift
integer iparm(64)
double precision dparm(64)
integer, dimension(:), allocatable :: perm,invp
double precision aux, diag
integer mrp,naux
!==============================================================================!
!==============================================================================!
call mpi_comm_size (mpi_comm_world,nproc,ierr)
call mpi_comm_rank (mpi_comm_world,iproc,ierr)
allocate(perm(n),stat=threadinfo%err) ; call heap (threadinfo,'perm','solve_with',size(perm),'int',+1)
allocate(invp(n),stat=threadinfo%err) ; call heap (threadinfo,'invp','solve_with',size(invp),'int',+1)
allocate (solution(vo%nnode*ndof),stat=threadinfo%err) ; call heap (threadinfo,'solution','solve_with',size(solution),'dp',+1)
invp=0
perm=0
mrp=0
naux = 0
shift=' '
call int_to_char(ciproc,3,iproc)
!open(unit=770+iproc,file='ia3_'//ciproc//'.dat',status='replace')
!do k=1,n_iproc+1
! write(770+iproc,*) ia(k)
!end do
!close(770+iproc)
!open(unit=770+iproc,file='ja3_'//ciproc//'.dat',status='replace')
!do k=1,nz_loc
! write(770+iproc,*) ja(k),avals(k)
!end do
!close(770+iproc)
!open(unit=770+iproc,file='b3_'//ciproc//'.dat',status='replace')
!do k=1,n_iproc
! write(770+iproc,*) b(k,1)
!end do
!close(770+iproc)
!open(unit=770+iproc,file='matreconstr3_'//ciproc//'.dat',status='replace')
!do k=1,n_iproc
! do j=ia(k),ia(k+1)-1
! write(770+iproc,*) k+n_iproc_st-1,ja(j),avals(j)
! end do
!end do
!close(770+iproc)
time_st=mpi_wtime()
CALL WSETMAXTHRDS(1)
!CALL PWSMP_INITIALIZE()
if (iproc.eq.0) then
write(*,'(a,f15.10,a)') shift//'================================='
end if
!==============================================================================!
!=====[WSMP: initialisation]===================================================!
!==============================================================================!
time1=mpi_wtime()
iparm(1)=0
iparm(2)=0
iparm(3)=0
!iparm(20)=2
call pwssmp (n_iproc, ia, ja, avals, diag, perm, invp, b, &
ldb, nrhs, aux, naux, mrp, iparm, dparm)
time2=mpi_wtime()
if (iproc.eq.0) then
if (iparm(64).ne.0) then
print *,'WSMP-> initialisation pb',iparm(64)
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
else
write(*,'(a,f15.7,a)') shift//'wsmp: initialisation time: ',time2-time1,' s'
end if
end if
!==============================================================================!
!=====[WSMP: ordering]=========================================================!
!==============================================================================!
time1=mpi_wtime()
iparm(2)=1
iparm(3)=1
call pwssmp (n_iproc, ia, ja, avals, diag, perm, invp, b, &
ldb, nrhs, aux, naux, mrp, iparm, dparm)
time2=mpi_wtime()
if (iproc.eq.0) then
if (iparm(64).ne.0) then
print *,'WSMP-> ordering pb',iparm(64)
stop 'WSMP pb'
else
write(*,'(a,f15.7,a)') shift//'wsmp: ordering time: ',time2-time1,' s'
end if
end if
!==============================================================================!
!=====[WSMP: symbolic factorisation]===========================================!
!==============================================================================!
time1=mpi_wtime()
iparm(2)=2
iparm(3)=2
call pwssmp (n_iproc, ia, ja, avals, diag, perm, invp, b, &
ldb, nrhs, aux, naux, mrp, iparm, dparm)
time2=mpi_wtime()
if (iproc.eq.0) then
if (iparm(64).ne.0) then
print *,'WSMP-> symbolic factorisation pb',iparm(64)
stop
else
write(*,'(a,f15.7,a)') shift//'wsmp: symbolic fact. time: ',time2-time1,' s'
end if
end if
wsmpdpmem=iparm(23)
call heap (threadinfo,'wsmpdpmem','solve_with',wsmpdpmem,'dp',+1)
!==============================================================================!
!=====[Cholesky Factorization]=================================================!
!==============================================================================!
time1=mpi_wtime()
iparm(2) = 3
iparm(3) = 3
call pwssmp (n_iproc, ia, ja, avals, diag, perm, invp, b, &
ldb, nrhs, aux, naux, mrp, iparm, dparm)
time2=mpi_wtime()
if (iproc.eq.0) then
if (iparm(64) .ne. 0) then
print *,'WSMP-> Cholesky factorisation pb',iparm(64)
stop
else
write(*,'(a,f15.7,a)') shift//'wsmp: Cholesky fact. time: ',time2-time1,' s'
write(*,'(a,f15.7,a)') shift//'wsmp: Factorisation ',(dparm(23) * 1.d-6) / (time2-time1),' Mflops'
end if
end if
!==============================================================================!
!=====[Back substitution]======================================================!
!==============================================================================!
time1=mpi_wtime()
iparm(2) = 4
iparm(3) = 4
call pwssmp (n_iproc, ia, ja, avals, diag, perm, invp, b, &
ldb, nrhs, aux, naux, mrp, iparm, dparm)
time2=mpi_wtime()
if (iproc.eq.0) then
if (iparm(64) .ne. 0) then
print *,'WSMP-> back substitution pb',iparm(64)
return
else
write(*,'(a,f15.7,a)') shift//'wsmp: back substitution time:',time2-time1,' s'
end if
end if
call heap (threadinfo,'perm','solve_with',size(perm),'int',-1) ; deallocate(perm)
call heap (threadinfo,'invp','solve_with',size(invp),'int',-1) ; deallocate(invp)
call heap (threadinfo,'wsmpdpmem','solve_with',wsmpdpmem,'dp',-1)
!==============================================================================!
!=====[transfer b's in solution vector]========================================!
!==============================================================================!
time1=mpi_wtime()
tag=111
solution=1.d31
if (iproc.eq.0) solution(1:n_iproc)=b(:,1)
do iip=1,nproc-1
call mpi_sendrecv(n_iproc_st,1,mpi_integer,0,tag,ist,1,mpi_integer,iip,tag,&
mpi_comm_world,statut,ierr)
call mpi_sendrecv(n_iproc_end,1,mpi_integer,0,tag,iend,1,mpi_integer,iip,tag,&
mpi_comm_world,statut,ierr)
call mpi_sendrecv(n_iproc,1,mpi_integer,0,tag,iwidth,1,mpi_integer,iip,tag,&
mpi_comm_world,statut,ierr)
call mpi_sendrecv(b(:,1),n_iproc,mpi_double_precision,0,tag,&
solution(ist:iend),iwidth,mpi_double_precision,iip,tag,mpi_comm_world,&
statut,ierr)
! if (iproc==iip) then
! call mpi_send (n_iproc_st,1,mpi_integer,0,tag,mpi_comm_world,ierr)
! call mpi_send (n_iproc_end,1,mpi_integer,0,tag,mpi_comm_world,ierr)
! call mpi_send (n_iproc,1,mpi_integer,0,tag,mpi_comm_world,ierr)
! call mpi_send (b(:,1),n_iproc,mpi_double_precision,0,tag,mpi_comm_world,ierr)
! elseif (iproc==0) then
! call mpi_recv (ist,1,mpi_integer,iip,tag,mpi_comm_world,statut,ierr)
! call mpi_recv (iend,1,mpi_integer,iip,tag,mpi_comm_world,statut,ierr)
! call mpi_recv (iwidth,1,mpi_integer,iip,tag,mpi_comm_world,statut,ierr)
! call mpi_recv (solution(ist:iend),iwidth,mpi_double_precision,iip,tag,mpi_comm_world,statut,ierr)
! end if
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
end do
call mpi_bcast (solution,n,mpi_double_precision,0,mpi_comm_world,ierr)
time2=mpi_wtime()
if (iproc.eq.0) then
write(*,'(a,f15.7,a)') shift//'wsmp: from b to rhs vector: ',time2-time1,' s'
end if
!write(*,*) iproc,'solution',minval(solution),maxval(solution)
time_end=mpi_wtime()
if (iproc.eq.0) then
write(*,'(a,f15.7,a)') shift//'---------------------------------'
write(*,'(a,f15.7,a)') shift//'wsmp: total solve time: ',time_end-time_st,' s'
end if
if (iproc.eq.0) then
write(*,'(a,f15.7,a)') shift//'================================='
end if
!==============================================================================!
!=====[transfer the solution onto ov for next step or iteration]===============!
!==============================================================================!
ov%unode(1:osolve%nnode)=0.d0
ov%vnode(1:osolve%nnode)=0.d0
ov%wnode(1:osolve%nnode)=0.d0
do i=1,vo%nnode
j=vo%rtf(i)
ov%unode(j)=solution((i-1)*ndof+1)
ov%vnode(j)=solution((i-1)*ndof+2)
ov%wnode(j)=solution((i-1)*ndof+3)
enddo
if (iproc.eq.0) then
if (params%debug>=1) then
write(*,'(a,2e13.5)') shift//'u :',minval(ov%unode),maxval(ov%unode)
write(*,'(a,2e13.5)') shift//'v :',minval(ov%vnode),maxval(ov%vnode)
write(*,'(a,2e13.5)') shift//'w :',minval(ov%wnode),maxval(ov%wnode)
write(*,'(a,f15.7,a)') shift//'================================='
end if
write (8,*) 'raw solution',minval(solution),maxval(solution)
write (8,'(a,f30.20,a,f30.20)') 'min ov%unode ',minval(ov%unode),'maxval ov%unode',maxval(ov%unode)
write (8,'(a,f30.20,a,f30.20)') 'min ov%vnode ',minval(ov%vnode),'maxval ov%vnode',maxval(ov%vnode)
write (8,'(a,f30.20,a,f30.20)') 'min ov%wnode ',minval(ov%wnode),'maxval ov%wnode',maxval(ov%wnode)
end if
call heap (threadinfo,'solution','solve_with',size(solution),'dp',-1) ; deallocate (solution)
call pwsmp_clear()
!==============================================================================!
!=====[DoRuRe: output octree and solver statistics]============================!
!==============================================================================!
call DoRuRe_octree_stats (osolve,params,istep,iter,iter_nl)
!call DoRuRe_solver_stats (params%doDoRuRe,istep,iter,iter_nl,id%n,id%nz,id%rinfog(3),id%infog(3),time1,time2,time3,osolve%octree(2))
end subroutine solve_with_pwssmp
!==============================================================================!
!==============================================================================!