FrontISTR 5.2.0
Large-scale structural analysis program with finit element method
Loading...
Searching...
No Matches
hecmw_local_matrix.f90
Go to the documentation of this file.
1!-------------------------------------------------------------------------------
2! Copyright (c) 2019 FrontISTR Commons
3! This software is released under the MIT License, see LICENSE.txt
4!-------------------------------------------------------------------------------
5
7 use hecmw_util
9
10 private
11 public :: hecmwst_local_matrix
12 public :: hecmw_localmat_write
14 public :: hecmw_localmat_free
15 public :: hecmw_localmat_mulvec
16 public :: hecmw_trimatmul_ttkt
20 public :: hecmw_localmat_add
26
28 integer :: nr, nc, nnz, ndof
29 integer(kind=kint), pointer :: index(:)
30 integer(kind=kint), pointer :: item(:)
31 real(kind=kreal), pointer :: a(:)
33
34 integer(kind=kint), parameter :: cncol_item = 2
35 integer(kind=kint), parameter :: clid = 1
36 integer(kind=kint), parameter :: crank = 2
37 integer(kind=kint), parameter :: cgid = 3
38
39 integer(kind=kint), parameter :: debug = 0
40 integer(kind=kint), parameter :: timer = 0
41
42contains
43
44 subroutine hecmw_localmat_write(Tmat,iunit)
45 implicit none
46 type (hecmwst_local_matrix), intent(in) :: tmat
47 integer(kind=kint), intent(in) :: iunit
48 integer(kind=kint) :: nr, nc, nnz, ndof, ndof2, i, js, je, j, jj
49 nr=tmat%nr
50 nc=tmat%nc
51 nnz=tmat%nnz
52 ndof=tmat%ndof
53 ndof2=ndof*ndof
54 write(iunit,*) 'nr, nc, nnz, ndof', nr, nc, nnz, ndof
55 write(iunit,*) 'i, j, A'
56 do i=1,nr
57 js=tmat%index(i-1)+1
58 je=tmat%index(i)
59 do j=js,je
60 jj=tmat%item(j)
61 if (ndof==1) then
62 write(iunit,*) i, jj, tmat%A(j)
63 else
64 write(iunit,*) i, jj
65 write(iunit,*) tmat%A((j-1)*ndof2+1:j*ndof2)
66 endif
67 enddo
68 enddo
69 end subroutine hecmw_localmat_write
70
71 subroutine hecmw_localmat_write_ij(Tmat,iunit)
72 implicit none
73 type (hecmwst_local_matrix), intent(in) :: tmat
74 integer(kind=kint), intent(in) :: iunit
75 integer(kind=kint) :: nr, nc, nnz, ndof, i, js, je, j, jj
76 nr=tmat%nr
77 nc=tmat%nc
78 nnz=tmat%nnz
79 ndof=tmat%ndof
80 write(iunit,*) 'nr, nc, nnz, ndof', nr, nc, nnz, ndof
81 write(iunit,*) 'i, j'
82 do i=1,nr
83 js=tmat%index(i-1)+1
84 je=tmat%index(i)
85 do j=js,je
86 jj=tmat%item(j)
87 write(iunit,*) i, jj
88 enddo
89 enddo
90 end subroutine hecmw_localmat_write_ij
91
92 subroutine hecmw_localmat_blocking(Tmat, ndof, BTmat)
93 implicit none
94 type (hecmwst_local_matrix), intent(in) :: tmat
95 integer, intent(in) :: ndof
96 type (hecmwst_local_matrix), intent(out) :: btmat
97 integer, allocatable :: iw(:)
98 integer :: ndof2, i, icnt, idof, idx, ls, le, l, j, jb, k, lb0, jdof, ks, ke
99 ndof2=ndof*ndof
100
101 if (mod(tmat%nr, ndof) /= 0 .or. mod(tmat%nc, ndof) /= 0) then
102 write(0,*) tmat%nr, tmat%nc, ndof
103 stop 'ERROR: blocking_Tmat failed'
104 endif
105 btmat%nr=tmat%nr/ndof
106 btmat%nc=tmat%nc/ndof
107 btmat%ndof=ndof
108
109 allocate(iw(btmat%nc))
110 allocate(btmat%index(0:btmat%nr))
111
112 btmat%index(0)=0
113 do i=1,btmat%nr
114 icnt=0
115 do idof=1,ndof
116 idx=(i-1)*ndof+idof
117 ls=tmat%index(idx-1)+1
118 le=tmat%index(idx)
119 lcol: do l=ls,le
120 j=tmat%item(l)
121 jb=(j-1)/ndof+1
122 do k=1,icnt
123 if (iw(k)==jb) cycle lcol
124 enddo
125 icnt=icnt+1
126 iw(icnt)=jb
127 enddo lcol
128 enddo
129 btmat%index(i)=btmat%index(i-1)+icnt
130 enddo
131
132 btmat%nnz=btmat%index(btmat%nr)
133 allocate(btmat%item(btmat%nnz))
134 allocate(btmat%A(btmat%nnz*ndof2))
135 btmat%A=0.d0
136
137 do i=1,btmat%nr
138 icnt=0
139 do idof=1,ndof
140 idx=(i-1)*ndof+idof
141 ls=tmat%index(idx-1)+1
142 le=tmat%index(idx)
143 lcol2: do l=ls,le
144 j=tmat%item(l)
145 jb=(j-1)/ndof+1
146 do k=1,icnt
147 if (iw(k)==jb) cycle lcol2
148 enddo
149 icnt=icnt+1
150 iw(icnt)=jb
151 enddo lcol2
152 enddo
153 ! if (icnt /= BTmat%index(i)-BTmat%index(i-1)) stop 'ERROR: blocking Tmat'
154 ! ! call qsort(iw, 1, icnt)
155 lb0=btmat%index(i-1)
156 do k=1,icnt
157 btmat%item(lb0+k)=iw(k)
158 enddo
159 do idof=1,ndof
160 idx=(i-1)*ndof+idof
161 ls=tmat%index(idx-1)+1
162 le=tmat%index(idx)
163 lcol3: do l=ls,le
164 j=tmat%item(l)
165 jb=(j-1)/ndof+1
166 jdof=mod((j-1), ndof)+1
167 ks=btmat%index(i-1)+1
168 ke=btmat%index(i)
169 do k=ks,ke
170 if (btmat%item(k)==jb) then
171 btmat%A((k-1)*ndof2+(idof-1)*ndof+jdof)=tmat%A(l)
172 cycle lcol3
173 endif
174 enddo
175 stop 'ERROR: something wrong in blocking Tmat'
176 enddo lcol3
177 enddo
178 enddo
179 end subroutine hecmw_localmat_blocking
180
181 subroutine hecmw_localmat_free(Tmat)
182 implicit none
183 type (hecmwst_local_matrix), intent(inout) :: tmat
184 deallocate(tmat%index)
185 if (associated(tmat%item)) deallocate(tmat%item)
186 if (associated(tmat%A)) deallocate(tmat%A)
187 tmat%nr=0
188 tmat%nc=0
189 tmat%nnz=0
190 tmat%ndof=0
191 end subroutine hecmw_localmat_free
192
193 subroutine hecmw_trimatmul_ttkt(hecMESH, BTtmat, hecMAT, BTmat, &
194 iwS, num_lagrange, hecTKT)
196 implicit none
197 type (hecmwst_local_mesh), intent(inout) :: hecmesh
198 type (hecmwst_local_matrix), intent(inout) :: bttmat, btmat
199 type (hecmwst_matrix), intent(in) :: hecmat
200 integer(kind=kint), intent(in) :: iws(:)
201 integer(kind=kint), intent(in) :: num_lagrange
202 type (hecmwst_matrix), intent(inout) :: hectkt
203 if (hecmesh%n_neighbor_pe == 0) then
204 call hecmw_trimatmul_ttkt_serial(hecmesh, bttmat, hecmat, btmat, &
205 iws, num_lagrange, hectkt)
206 else
207 call hecmw_trimatmul_ttkt_parallel(hecmesh, bttmat, hecmat, btmat, &
208 iws, num_lagrange, hectkt)
209 endif
210 end subroutine hecmw_trimatmul_ttkt
211
212 subroutine hecmw_trimatmul_ttkt_serial(hecMESH, BTtmat, hecMAT, BTmat, &
213 iwS, num_lagrange, hecTKT)
215 implicit none
216 type (hecmwst_local_mesh), intent(in) :: hecmesh
217 type (hecmwst_local_matrix), intent(in) :: bttmat, btmat
218 type (hecmwst_matrix), intent(in) :: hecmat
219 integer(kind=kint), intent(in) :: iws(:)
220 integer(kind=kint), intent(in) :: num_lagrange
221 type (hecmwst_matrix), intent(inout) :: hectkt
222 type (hecmwst_local_matrix) :: bttkt
223 real(kind=kreal) :: num
224
225 ! perform three matrices multiplication for elimination
226 call trimatmul_ttkt(bttmat, hecmat, btmat, bttkt)
227 !write(700+hecmw_comm_get_rank(),*) 'DEBUG: BTtKT(MPC)'
228 !call hecmw_localmat_write(BTtKT, 700+hecmw_comm_get_rank())
229
230 ! place small numbers where the DOF is eliminated
231 !num = hecmw_mat_diag_max(hecMAT, hecMESH) * 1.0d-10
232 num = 1.d0
233 call place_num_on_diag(bttkt, iws, num_lagrange, num)
234 !write(700+hecmw_comm_get_rank(),*) 'DEBUG: BTtKT(MPC)'
235 !call hecmw_localmat_write(BTtKT, 700+hecmw_comm_get_rank())
236
237 ! make_new HECMW matrix
238 call make_new_hecmat(hecmat, bttkt, hectkt)
239 call hecmw_localmat_free(bttkt)
240 end subroutine hecmw_trimatmul_ttkt_serial
241
242 subroutine hecmw_trimatmul_ttkt_parallel(hecMESH, BTtmat, hecMAT, BTmat, &
243 iwS, num_lagrange, hecTKT)
245 implicit none
246 type (hecmwst_local_mesh), intent(inout) :: hecmesh
247 type (hecmwst_local_matrix), intent(inout) :: bttmat, btmat
248 type (hecmwst_matrix), intent(in) :: hecmat
249 integer(kind=kint), intent(in) :: iws(:)
250 integer(kind=kint), intent(in) :: num_lagrange
251 type (hecmwst_matrix), intent(inout) :: hectkt
252 type (hecmwst_local_matrix) :: bkmat, bttkmat, bttktmat
253 real(kind=kreal) :: num
254 real(kind=kreal) :: t0, t1
255
256 ! perform three matrices multiplication for elimination
257 t0 = hecmw_wtime()
258 call hecmw_localmat_init_with_hecmat(bkmat, hecmat)
259 if (debug >= 3) then
260 write(700+hecmw_comm_get_rank(),*) 'BKmat (hecMAT)'
261 if (debug == 3) then
262 call hecmw_localmat_write_ij(bkmat, 700+hecmw_comm_get_rank())
263 else
265 endif
266 endif
267 t1 = hecmw_wtime()
268 if (timer >= 1) write(0, '(A,f10.4)') "#### hecmw_trimatmul_TtKT_parallel (1) : ",t1-t0
269
270 t0 = hecmw_wtime()
271 call hecmw_localmat_multmat(bttmat, bkmat, hecmesh, bttkmat)
272 if (debug >= 2) write(0,*) ' DEBUG2: multiply Tt and K done'
273 if (debug >= 3) then
274 write(700+hecmw_comm_get_rank(),*) 'BTtKmat'
275 if (debug == 3) then
276 call hecmw_localmat_write_ij(bttkmat, 700+hecmw_comm_get_rank())
277 else
278 call hecmw_localmat_write(bttkmat, 700+hecmw_comm_get_rank())
279 endif
280 endif
281 call hecmw_localmat_free(bkmat)
282 t1 = hecmw_wtime()
283 if (timer >= 1) write(0, '(A,f10.4)') "#### hecmw_trimatmul_TtKT_parallel (2) : ",t1-t0
284
285 t0 = hecmw_wtime()
286 call hecmw_localmat_multmat(bttkmat, btmat, hecmesh, bttktmat)
287 if (debug >= 2) write(0,*) ' DEBUG2: multiply TtK and T done'
288 if (debug >= 3) then
289 write(700+hecmw_comm_get_rank(),*) 'BTtKTmat'
290 if (debug == 3) then
291 call hecmw_localmat_write_ij(bttktmat, 700+hecmw_comm_get_rank())
292 else
293 call hecmw_localmat_write(bttktmat, 700+hecmw_comm_get_rank())
294 endif
295 endif
296 call hecmw_localmat_free(bttkmat)
297 t1 = hecmw_wtime()
298 if (timer >= 1) write(0, '(A,f10.4)') "#### hecmw_trimatmul_TtKT_parallel (3) : ",t1-t0
299
300 t0 = hecmw_wtime()
301 ! place small numbers where the DOF is eliminated
302 !num = hecmw_mat_diag_max(hecMAT, hecMESH) * 1.0d-10
303 num = 1.d0
304 call place_num_on_diag(bttktmat, iws, num_lagrange, num)
305 if (debug >= 3) then
306 write(700+hecmw_comm_get_rank(),*) 'num_lagrange =', num_lagrange
307 write(700+hecmw_comm_get_rank(),*) 'iwS(1:num_lagrange)'
308 write(700+hecmw_comm_get_rank(),*) iws(1:num_lagrange)
309 write(700+hecmw_comm_get_rank(),*) 'BTtKTmat (place 1.0 on slave diag)'
310 if (debug == 3) then
311 call hecmw_localmat_write_ij(bttktmat, 700+hecmw_comm_get_rank())
312 else
313 call hecmw_localmat_write(bttktmat, 700+hecmw_comm_get_rank())
314 endif
315 endif
316 t1 = hecmw_wtime()
317 if (timer >= 1) write(0, '(A,f10.4)') "#### hecmw_trimatmul_TtKT_parallel (4) : ",t1-t0
318
319 t0 = hecmw_wtime()
320 ! make_new HECMW matrix
321 call make_new_hecmat(hecmat, bttktmat, hectkt)
322 call hecmw_localmat_free(bttktmat)
323 t1 = hecmw_wtime()
324 if (timer >= 1) write(0, '(A,f10.4)') "#### hecmw_trimatmul_TtKT_parallel (5) : ",t1-t0
325 end subroutine hecmw_trimatmul_ttkt_parallel
326
327 subroutine trimatmul_ttkt(BTtmat, hecMAT, BTmat, BTtKT)
328 implicit none
329 type (hecmwst_local_matrix), intent(in) :: bttmat, btmat
330 type (hecmwst_matrix), intent(in) :: hecmat
331 type (hecmwst_local_matrix), intent(out) :: bttkt
332 integer :: nr, nc, ndof, ndof2, i, icnt, js, je, j, jj, ks, ke, k, kk
333 integer :: ls, le, l, ll, m, ms, me, mm
334 integer, allocatable :: iw(:)
335 real(kind=kreal), pointer :: ttp(:), kp(:), tp(:), ttktp(:)
336 ! real(kind=kreal) :: tsym_s, tsym_e, tnum_s, tnum_e
337
338 nr=bttmat%nr
339 nc=btmat%nc
340 ndof=bttmat%ndof
341 ndof2=ndof*ndof
342
343 bttkt%nr=nr
344 bttkt%nc=nc
345 bttkt%ndof=ndof
346 allocate(bttkt%index(0:nr))
347
348 ! tsym_s = hecmw_wtime()
349
350 !$omp parallel default(none), &
351 !$omp& private(iw,i,icnt,js,je,j,jj,ks,ke,k,kk,ls,le,l,ll,m), &
352 !$omp& shared(nr,nc,BTtmat,hecMAT,BTmat,BTtKT)
353 allocate(iw(nc))
354 !$omp do
355 do i=1,nr
356 icnt=0
357 js=bttmat%index(i-1)+1
358 je=bttmat%index(i)
359 do j=js,je
360 jj=bttmat%item(j)
361 ! lower
362 ks=hecmat%indexL(jj-1)+1
363 ke=hecmat%indexL(jj)
364 do k=ks,ke
365 kk=hecmat%itemL(k)
366 ls=btmat%index(kk-1)+1
367 le=btmat%index(kk)
368 ll1: do l=ls,le
369 ll=btmat%item(l)
370 do m=1,icnt
371 if (iw(m)==ll) cycle ll1
372 enddo
373 icnt=icnt+1
374 iw(icnt)=ll
375 !if (i==1) write(0,*) 'l', icnt, jj, kk, ll
376 enddo ll1
377 enddo
378 ! diagonal
379 ls=btmat%index(jj-1)+1
380 le=btmat%index(jj)
381 ll2: do l=ls,le
382 ll=btmat%item(l)
383 do m=1,icnt
384 if (iw(m)==ll) cycle ll2
385 enddo
386 icnt=icnt+1
387 iw(icnt)=ll
388 !if (i==1) write(0,*) 'd', icnt, jj, kk, ll
389 enddo ll2
390 ! upper
391 ks=hecmat%indexU(jj-1)+1
392 ke=hecmat%indexU(jj)
393 do k=ks,ke
394 kk=hecmat%itemU(k)
395 ls=btmat%index(kk-1)+1
396 le=btmat%index(kk)
397 ll3: do l=ls,le
398 ll=btmat%item(l)
399 do m=1,icnt
400 if (iw(m)==ll) cycle ll3
401 enddo
402 icnt=icnt+1
403 iw(icnt)=ll
404 !if (i==1) write(0,*) 'u', icnt, jj, kk, ll
405 enddo ll3
406 enddo
407 enddo
408 if (icnt == 0) icnt=1
409 !if (i==1) write(0,*) iw(1:icnt)
410 bttkt%index(i)=icnt
411 enddo
412 !$omp end do
413 deallocate(iw)
414 !$omp end parallel
415
416 ! tsym_e = hecmw_wtime()
417 ! write(0,*) 'tsym:',tsym_e-tsym_s
418
419 bttkt%index(0)=0
420 do i=1,nr
421 bttkt%index(i)=bttkt%index(i-1)+bttkt%index(i)
422 enddo
423 !write(0,*) BTtKT%index(1:n)-BTtKT%index(0:n-1)
424
425 bttkt%nnz=bttkt%index(nr)
426 allocate(bttkt%item(bttkt%nnz))
427 allocate(bttkt%A(bttkt%nnz*ndof2))
428 bttkt%item=0
429 bttkt%A=0.d0
430
431 ! tnum_s = hecmw_wtime()
432
433 !$omp parallel default(none), &
434 !$omp& private(i,icnt,js,je,j,jj,ks,ke,k,kk,ls,le,l,ll,m, &
435 !$omp& ms,me,mm,Ttp,Kp,Tp,TtKTp), &
436 !$omp& shared(nr,nc,BTtmat,hecMAT,BTmat,BTtKT,ndof,ndof2)
437 !$omp do
438 do i=1,nr
439 icnt=0
440 ms=bttkt%index(i-1)+1
441 !me=BTtKT%index(i)
442 js=bttmat%index(i-1)+1
443 je=bttmat%index(i)
444 do j=js,je
445 jj=bttmat%item(j)
446 ttp=>bttmat%A((j-1)*ndof2+1:j*ndof2)
447 ! lower
448 ks=hecmat%indexL(jj-1)+1
449 ke=hecmat%indexL(jj)
450 do k=ks,ke
451 kk=hecmat%itemL(k)
452 kp=>hecmat%AL((k-1)*ndof2+1:k*ndof2)
453 ls=btmat%index(kk-1)+1
454 le=btmat%index(kk)
455 do l=ls,le
456 ll=btmat%item(l)
457 tp=>btmat%A((l-1)*ndof2+1:l*ndof2)
458 me=ms-1+icnt
459 mm=-1
460 do m=ms,me
461 if (bttkt%item(m)==ll) mm=m
462 enddo
463 if (mm<0) then
464 icnt=icnt+1
465 mm=me+1
466 bttkt%item(mm)=ll
467 !if (i==1) write(0,*) 'l', mm, jj, kk, ll
468 endif
469 ttktp=>bttkt%A((mm-1)*ndof2+1:mm*ndof2)
470 call blk_trimatmul_add(ndof, ttp, kp, tp, ttktp)
471 enddo
472 enddo
473 ! diagonal
474 kp=>hecmat%D((jj-1)*ndof2+1:jj*ndof2)
475 ls=btmat%index(jj-1)+1
476 le=btmat%index(jj)
477 do l=ls,le
478 ll=btmat%item(l)
479 tp=>btmat%A((l-1)*ndof2+1:l*ndof2)
480 me=ms-1+icnt
481 mm=-1
482 do m=ms,me
483 if (bttkt%item(m)==ll) mm=m
484 enddo
485 if (mm<0) then
486 icnt=icnt+1
487 mm=me+1
488 bttkt%item(mm)=ll
489 !if (i==1) write(0,*) 'd', mm, jj, kk, ll
490 endif
491 ttktp=>bttkt%A((mm-1)*ndof2+1:mm*ndof2)
492 call blk_trimatmul_add(ndof, ttp, kp, tp, ttktp)
493 enddo
494 ! upper
495 ks=hecmat%indexU(jj-1)+1
496 ke=hecmat%indexU(jj)
497 do k=ks,ke
498 kk=hecmat%itemU(k)
499 kp=>hecmat%AU((k-1)*ndof2+1:k*ndof2)
500 ls=btmat%index(kk-1)+1
501 le=btmat%index(kk)
502 do l=ls,le
503 ll=btmat%item(l)
504 tp=>btmat%A((l-1)*ndof2+1:l*ndof2)
505 me=ms-1+icnt
506 mm=-1
507 do m=ms,me
508 if (bttkt%item(m)==ll) mm=m
509 enddo
510 if (mm<0) then
511 icnt=icnt+1
512 mm=me+1
513 bttkt%item(mm)=ll
514 !if (i==1) write(0,*) 'u', mm, jj, kk, ll
515 endif
516 ttktp=>bttkt%A((mm-1)*ndof2+1:mm*ndof2)
517 call blk_trimatmul_add(ndof, ttp, kp, tp, ttktp)
518 enddo
519 enddo
520 enddo
521 if (icnt == 0) then
522 icnt=1
523 bttkt%item(ms)=i
524 endif
525 ! error check!
526 !write(0,*) BTtKT%item(ms:ms-1+icnt)
527 !write(0,*) BTtKT%index(i)-BTtKT%index(i-1), icnt
528 if (ms-1+icnt /= bttkt%index(i)) stop 'ERROR: trimatmul'
529 enddo
530 !$omp end do
531 !$omp end parallel
532
533 ! tnum_e = hecmw_wtime()
534 ! write(0,*) 'tnum:',tnum_e-tnum_s
535 end subroutine trimatmul_ttkt
536
537 subroutine blk_trimatmul_add(ndof, A, B, C, ABC)
538 implicit none
539 integer, intent(in) :: ndof
540 real(kind=kreal), intent(in) :: a(:), b(:), c(:)
541 real(kind=kreal), intent(inout) :: abc(:)
542 real(kind=kreal), allocatable :: ab(:)
543 integer :: ndof2, i, j, k, i0, j0, ij, ik, jk
544
545 ndof2=ndof*ndof
546 allocate(ab(ndof2))
547 ab=0.d0
548
549 do i=1,ndof
550 i0=(i-1)*ndof
551 do j=1,ndof
552 ij=i0+j
553 j0=(j-1)*ndof
554 do k=1,ndof
555 ik=i0+k
556 jk=j0+k
557 ab(ik)=ab(ik)+a(ij)*b(jk)
558 enddo
559 enddo
560 enddo
561
562 do i=1,ndof
563 i0=(i-1)*ndof
564 do j=1,ndof
565 ij=i0+j
566 j0=(j-1)*ndof
567 do k=1,ndof
568 ik=i0+k
569 jk=j0+k
570 abc(ik)=abc(ik)+ab(ij)*c(jk)
571 enddo
572 enddo
573 enddo
574
575 deallocate(ab)
576 end subroutine blk_trimatmul_add
577
578 subroutine place_num_on_diag(BTtKT, iwS, num_lagrange, num)
579 implicit none
580 type (hecmwst_local_matrix), intent(inout) :: bttkt
581 integer(kind=kint), intent(in) :: iws(:)
582 integer(kind=kint), intent(in) :: num_lagrange
583 real(kind=kreal), intent(in) :: num
584 integer(kind=kint) :: ndof, ndof2, ilag, i, idof, js, je, j, jj
585 integer(kind=kint) :: nmissing, k, ks, ke
586 integer(kind=kint), allocatable :: missing(:), cnt(:)
587 integer(kind=kint), pointer :: index(:), item(:)
588 real(kind=kreal), pointer :: a(:)
589
590 ndof=bttkt%ndof
591 ndof2=ndof*ndof
592
593 ! check if there are places
594 allocate(missing(num_lagrange))
595 nmissing = 0
596 outer1: do ilag=1,num_lagrange
597 i=(iws(ilag)-1)/ndof+1
598 idof=mod(iws(ilag)-1, ndof)+1
599 js=bttkt%index(i-1)+1
600 je=bttkt%index(i)
601 do j=js,je
602 jj=bttkt%item(j)
603 if (jj==i) cycle outer1 ! found place
604 enddo
605 ! not found
606 do k=1,nmissing
607 if (missing(k) == i) cycle outer1 ! already marked as missing
608 enddo
609 nmissing = nmissing + 1
610 missing(nmissing) = i
611 enddo outer1
612
613 ! if not, reallocate
614 if (nmissing > 0) then
615 allocate(cnt(bttkt%nr))
616 allocate(index(0:bttkt%nr))
617 do i=1,bttkt%nr
618 cnt(i) = bttkt%index(i) - bttkt%index(i-1)
619 enddo
620 do i=1,nmissing
621 cnt(missing(i)) = cnt(missing(i)) + 1
622 enddo
623 call make_index(bttkt%nr, cnt, index)
624 allocate(item(bttkt%nnz + nmissing))
625 allocate(a(ndof2 * (bttkt%nnz + nmissing)))
626 do i=1,bttkt%nr
627 ks=index(i-1)+1
628 js=bttkt%index(i-1)+1
629 je=bttkt%index(i)
630 item(ks:ks+(je-js))=bttkt%item(js:je)
631 a(ndof2*(ks-1)+1:ndof2*(ks+(je-js)))=bttkt%A(ndof2*(js-1)+1:ndof2*je)
632 enddo
633 do i=1,nmissing
634 ke=index(missing(i))
635 item(ke)=missing(i)
636 a(ndof2*(ke-1)+1:ndof2*ke)=0.d0
637 enddo
638 deallocate(bttkt%index)
639 deallocate(bttkt%item)
640 deallocate(bttkt%A)
641 bttkt%index => index
642 bttkt%item => item
643 bttkt%A => a
644 bttkt%nnz = index(bttkt%nr)
645 deallocate(cnt)
646 endif
647 deallocate(missing)
648
649 ! place num
650 outer: do ilag=1,num_lagrange
651 i=(iws(ilag)-1)/ndof+1
652 idof=mod(iws(ilag)-1, ndof)+1
653 js=bttkt%index(i-1)+1
654 je=bttkt%index(i)
655 do j=js,je
656 jj=bttkt%item(j)
657 if (jj==i) then
658 !write(0,*) ilag, i, idof
659 bttkt%A((j-1)*ndof2+(idof-1)*ndof+idof)=num
660 cycle outer
661 endif
662 enddo
663 enddo outer
664 end subroutine place_num_on_diag
665
666 subroutine replace_hecmat(hecMAT, BTtKT)
667 implicit none
668 type (hecmwst_matrix), intent(inout) :: hecmat
669 type (hecmwst_local_matrix), intent(in) :: bttkt
670 integer :: nr, nc, ndof, ndof2, i, nl, nu, js, je, j, jj
671 integer :: ksl, ksu, k
672
673 nr=bttkt%nr
674 nc=bttkt%nc
675 ndof=hecmat%NDOF
676 ndof2=ndof*ndof
677
678 ! free old hecMAT
679 if (associated(hecmat%AL)) deallocate(hecmat%AL)
680 if (associated(hecmat%AU)) deallocate(hecmat%AU)
681 if (associated(hecmat%itemL)) deallocate(hecmat%itemL)
682 if (associated(hecmat%itemU)) deallocate(hecmat%itemU)
683 hecmat%indexL=0
684 hecmat%indexU=0
685
686 ! count NPL, NPU
687 !$omp parallel default(none),private(i,nl,nu,js,je,j,jj), &
688 !$omp& shared(nr,BTtKT,hecMAT)
689 !$omp do
690 do i=1,nr
691 nl=0
692 nu=0
693 js=bttkt%index(i-1)+1
694 je=bttkt%index(i)
695 do j=js,je
696 jj=bttkt%item(j)
697 if (jj < i) then
698 nl=nl+1
699 elseif (i < jj) then
700 nu=nu+1
701 else
702 ! diagonal
703 endif
704 enddo
705 hecmat%indexL(i)=nl
706 hecmat%indexU(i)=nu
707 enddo
708 !$omp end do
709 !$omp end parallel
710
711 hecmat%indexL(0)=0
712 hecmat%indexU(0)=0
713 do i=1,nc
714 hecmat%indexL(i)=hecmat%indexL(i-1)+hecmat%indexL(i)
715 hecmat%indexU(i)=hecmat%indexU(i-1)+hecmat%indexU(i)
716 enddo
717 hecmat%NPL=hecmat%indexL(nc)
718 hecmat%NPU=hecmat%indexU(nc)
719
720 ! allocate new hecMAT
721 allocate(hecmat%itemL(hecmat%NPL), hecmat%itemU(hecmat%NPU))
722 allocate(hecmat%AL(hecmat%NPL*ndof2), hecmat%AU(hecmat%NPU*ndof2))
723 hecmat%itemL=0
724 hecmat%itemU=0
725 hecmat%D=0.d0
726 hecmat%AL=0.d0
727 hecmat%AU=0.d0
728
729 ! copy from BTtKT to hecMAT
730 !$omp parallel default(none),private(i,nl,nu,js,je,ksl,ksu,j,jj,k), &
731 !$omp& shared(nr,BTtKT,hecMAT,ndof2)
732 !$omp do
733 do i=1,nr
734 nl=0
735 nu=0
736 js=bttkt%index(i-1)+1
737 je=bttkt%index(i)
738 ksl=hecmat%indexL(i-1)+1
739 ksu=hecmat%indexU(i-1)+1
740 do j=js,je
741 jj=bttkt%item(j)
742 if (jj < i) then
743 k=ksl+nl
744 hecmat%itemL(k)=jj
745 hecmat%AL((k-1)*ndof2+1:k*ndof2)=bttkt%A((j-1)*ndof2+1:j*ndof2)
746 nl=nl+1
747 elseif (i < jj) then
748 k=ksu+nu
749 hecmat%itemU(k)=jj
750 hecmat%AU((k-1)*ndof2+1:k*ndof2)=bttkt%A((j-1)*ndof2+1:j*ndof2)
751 nu=nu+1
752 else
753 hecmat%D((i-1)*ndof2+1:i*ndof2)=bttkt%A((j-1)*ndof2+1:j*ndof2)
754 endif
755 enddo
756 ! if (ksl+nl /= hecMAT%indexL(i)+1) stop 'ERROR: indexL'
757 ! if (ksu+nu /= hecMAT%indexU(i)+1) stop 'ERROR: indexU'
758 enddo
759 !$omp end do
760 !$omp end parallel
761
762 ! do i=1,hecMAT%NPL
763 ! if (hecMAT%itemL(i) <= 0) stop 'ERROR: negative itemL'
764 ! if (hecMAT%itemL(i) > nc) stop 'ERROR: too big itemL'
765 ! enddo
766 ! do i=1,hecMAT%NPU
767 ! if (hecMAT%itemU(i) <= 0) stop 'ERROR: negative itemU'
768 ! if (hecMAT%itemU(i) > nc) stop 'ERROR: too big itemU'
769 ! enddo
770 end subroutine replace_hecmat
771
772 subroutine make_new_hecmat(hecMAT, BTtKT, hecTKT)
773 implicit none
774 type(hecmwst_matrix), intent(in) :: hecmat
775 type(hecmwst_local_matrix), intent(in) :: bttkt
776 type(hecmwst_matrix), intent(inout) :: hectkt
777 integer(kind=kint) :: nr, nc, ndof, ndof2
778
779 nr=bttkt%nr
780 nc=bttkt%nc
781 ndof=bttkt%ndof
782 ndof2=ndof*ndof
783
784 !write(0,*) 'DEBUG: nr, nc =',nr,nc
785
786 ! if (nr /= nc) then
787 ! stop 'ERROR: nr /= nc'
788 ! endif
789 hectkt%N =hecmat%N
790 hectkt%NP=nc
791 hectkt%NDOF=ndof
792
793 if (associated(hectkt%D)) deallocate(hectkt%D)
794 allocate(hectkt%D(nc*ndof2))
795
796 if (associated(hectkt%indexL)) deallocate(hectkt%indexL)
797 if (associated(hectkt%indexU)) deallocate(hectkt%indexU)
798 allocate(hectkt%indexL(0:nc))
799 allocate(hectkt%indexU(0:nc))
800
801 hectkt%Iarray=hecmat%Iarray
802 hectkt%Rarray=hecmat%Rarray
803
804 call replace_hecmat(hectkt, bttkt)
805 end subroutine make_new_hecmat
806
807 subroutine hecmw_localmat_mulvec(BTmat, V, TV)
808 implicit none
809 type (hecmwst_local_matrix), intent(in) :: btmat
810 real(kind=kreal), intent(in), target :: v(:)
811 real(kind=kreal), intent(out), target :: tv(:)
812 real(kind=kreal), pointer :: tvp(:), tp(:), vp(:)
813 integer :: nr, ndof, ndof2, i, js, je, j, jj, k, kl0, l
814 !!$ real(kind=kreal) :: vnorm
815
816 nr=btmat%nr
817 ndof=btmat%ndof
818 ndof2=ndof*ndof
819
820 tv=0.d0
821
822 !!$ vnorm=0.d0
823 !!$ do i=1,nr*ndof
824 !!$ vnorm=vnorm+V(i)**2
825 !!$ enddo
826 !!$ write(0,*) 'vnorm:', sqrt(vnorm)
827
828 !$omp parallel default(none),private(i,TVp,js,je,j,jj,Tp,Vp,k,kl0,l), &
829 !$omp& shared(nr,TV,ndof,BTmat,ndof2,V)
830 !$omp do
831 do i=1,nr
832 tvp=>tv((i-1)*ndof+1:i*ndof)
833 js=btmat%index(i-1)+1
834 je=btmat%index(i)
835 do j=js,je
836 jj=btmat%item(j)
837 tp=>btmat%A((j-1)*ndof2+1:j*ndof2)
838 vp=>v((jj-1)*ndof+1:jj*ndof)
839 do k=1,ndof
840 kl0=(k-1)*ndof
841 do l=1,ndof
842 tvp(k)=tvp(k)+tp(kl0+l)*vp(l)
843 enddo
844 enddo
845 enddo
846 enddo
847 !$omp end do
848 !$omp end parallel
849 end subroutine hecmw_localmat_mulvec
850
851 subroutine hecmw_trimatmul_ttkt_mpc(hecMESH, hecMAT, hecTKT)
852 implicit none
853 type (hecmwst_local_mesh), intent(inout) :: hecmesh
854 type (hecmwst_matrix), intent(in) :: hecmat
855 type (hecmwst_matrix), intent(inout) :: hectkt
856 type (hecmwst_local_matrix) :: btmat, bttmat
857 integer(kind=kint), allocatable :: iws(:)
858 integer(kind=kint) :: ndof, n_mpc, i_mpc
859 integer(kind=kint) :: i, j, k, kk, ilag
860 real(kind=kreal) :: t0, t1
861 t0 = hecmw_wtime()
862 ndof=hecmat%NDOF
863 n_mpc=0
864 outer: do i=1,hecmesh%mpc%n_mpc
865 do j= hecmesh%mpc%mpc_index(i-1) + 1, hecmesh%mpc%mpc_index(i)
866 if (hecmesh%mpc%mpc_dof(j) > ndof) cycle outer
867 enddo
868 n_mpc=n_mpc+1
869 enddo outer
870 allocate(iws(n_mpc))
871 i_mpc=0
872 outer2: do i=1,hecmesh%mpc%n_mpc
873 do j= hecmesh%mpc%mpc_index(i-1) + 1, hecmesh%mpc%mpc_index(i)
874 if (hecmesh%mpc%mpc_dof(j) > ndof) cycle outer2
875 enddo
876 i_mpc=i_mpc+1
877 k=hecmesh%mpc%mpc_index(i-1)+1
878 kk=ndof*(hecmesh%mpc%mpc_item(k)-1)+hecmesh%mpc%mpc_dof(k)
879 iws(i_mpc)=kk
880 enddo outer2
881 if (debug >= 2) then
882 write(700+hecmw_comm_get_rank(),*) 'DEBUG: n_mpc, slaves',n_mpc,iws(1:n_mpc)
883 endif
884 t1 = hecmw_wtime()
885 if (timer >= 1) write(0, '(A,f10.4)') "### hecmw_trimatmul_TtKT_mpc (1) : ",t1-t0
886 t0 = hecmw_wtime()
887 call make_btmat_mpc(hecmesh, ndof, btmat)
888 if (debug >= 3) then
889 write(700+hecmw_comm_get_rank(),*) 'DEBUG: BTmat(MPC)'
890 if (debug == 3) then
891 call hecmw_localmat_write_ij(btmat,700+hecmw_comm_get_rank())
892 else
894 endif
895 endif
896 t1 = hecmw_wtime()
897 if (timer >= 1) write(0, '(A,f10.4)') "### hecmw_trimatmul_TtKT_mpc (2) : ",t1-t0
898 t0 = hecmw_wtime()
899 ! call make_BTtmat_mpc(hecMESH, ndof, BTtmat)
900 call hecmw_localmat_transpose(btmat, bttmat)
901 ! if (hecmw_localmat_equal(BTtmat, BTtmat2) == 0) then
902 ! write(0,*) 'ERROR: BTtmat2 is incorrect!!!'
903 ! else
904 ! write(0,*) 'DEBUG: BTtmat2 is correct'
905 ! endif
906 if (debug >= 3) then
907 write(700+hecmw_comm_get_rank(),*) 'DEBUG: BTtmat(MPC)'
908 if (debug == 3) then
909 call hecmw_localmat_write_ij(bttmat,700+hecmw_comm_get_rank())
910 else
912 endif
913 endif
914 t1 = hecmw_wtime()
915 if (timer >= 1) write(0, '(A,f10.4)') "### hecmw_trimatmul_TtKT_mpc (3) : ",t1-t0
916 t0 = hecmw_wtime()
917 call hecmw_trimatmul_ttkt(hecmesh, bttmat, hecmat, btmat, iws, n_mpc, hectkt)
918 t1 = hecmw_wtime()
919 if (timer >= 1) write(0, '(A,f10.4)') "### hecmw_trimatmul_TtKT_mpc (4) : ",t1-t0
920 t0 = hecmw_wtime()
921
922 if (associated(hectkt%B)) deallocate(hectkt%B)
923 if (associated(hectkt%X)) deallocate(hectkt%X)
924 allocate(hectkt%B(ndof*hectkt%NP))
925 allocate(hectkt%X(ndof*hectkt%NP))
926 do i=1, size(hecmat%B)
927 hectkt%B(i) = hecmat%B(i)
928 enddo
929 do i=1, size(hecmat%X)
930 hectkt%X(i) = hecmat%X(i)
931 enddo
932 do ilag=1,n_mpc
933 hectkt%X(iws(ilag)) = 0.d0
934 enddo
935
936 call hecmw_localmat_free(btmat)
937 call hecmw_localmat_free(bttmat)
938 ! call hecmw_localmat_free(BTtmat2)
939 deallocate(iws)
940 t1 = hecmw_wtime()
941 if (timer >= 1) write(0, '(A,f10.4)') "### hecmw_trimatmul_TtKT_mpc (5) : ",t1-t0
942 end subroutine hecmw_trimatmul_ttkt_mpc
943
944 subroutine make_btmat_mpc(hecMESH, ndof, BTmat)
945 implicit none
946 type (hecmwst_local_mesh), intent(in) :: hecmesh
947 integer(kind=kint), intent(in) :: ndof
948 type (hecmwst_local_matrix), intent(out) :: btmat
949 type (hecmwst_local_matrix) :: tmat
950 integer(kind=kint) :: n_mpc
951 integer(kind=kint) :: i,j,k,js,jj,kk
952 n_mpc=0
953 outer: do i=1,hecmesh%mpc%n_mpc
954 do j= hecmesh%mpc%mpc_index(i-1) + 1, hecmesh%mpc%mpc_index(i)
955 if (hecmesh%mpc%mpc_dof(j) > ndof) cycle outer
956 enddo
957 n_mpc=n_mpc+1
958 enddo outer
959 tmat%nr=hecmesh%n_node*ndof
960 tmat%nc=tmat%nr
961 tmat%ndof=1
962 allocate(tmat%index(0:tmat%nr))
963 ! count nonzero in each row
964 tmat%index(1:tmat%nr)=1
965 outer2: do i=1,hecmesh%mpc%n_mpc
966 do j= hecmesh%mpc%mpc_index(i-1) + 1, hecmesh%mpc%mpc_index(i)
967 if (hecmesh%mpc%mpc_dof(j) > ndof) cycle outer2
968 enddo
969 k=hecmesh%mpc%mpc_index(i-1)+1
970 kk=ndof*(hecmesh%mpc%mpc_item(k)-1)+hecmesh%mpc%mpc_dof(k)
971 tmat%index(kk)=hecmesh%mpc%mpc_index(i)-hecmesh%mpc%mpc_index(i-1)-1
972 enddo outer2
973 ! index
974 tmat%index(0)=0
975 do i=1,tmat%nr
976 tmat%index(i)=tmat%index(i-1)+tmat%index(i)
977 enddo
978 tmat%nnz=tmat%index(tmat%nr)
979 allocate(tmat%item(tmat%nnz), tmat%A(tmat%nnz))
980 ! diag
981 do i=1,tmat%nr
982 js=tmat%index(i-1)+1
983 tmat%item(js)=i
984 tmat%A(js)=1.d0
985 enddo
986 ! others
987 outer3: do i=1,hecmesh%mpc%n_mpc
988 do j= hecmesh%mpc%mpc_index(i-1) + 1, hecmesh%mpc%mpc_index(i)
989 if (hecmesh%mpc%mpc_dof(j) > ndof) cycle outer3
990 enddo
991 k=hecmesh%mpc%mpc_index(i-1)+1
992 kk=ndof*(hecmesh%mpc%mpc_item(k)-1)+hecmesh%mpc%mpc_dof(k)
993 js=tmat%index(kk-1)+1
994 do j= hecmesh%mpc%mpc_index(i-1) + 2, hecmesh%mpc%mpc_index(i)
995 jj = ndof * (hecmesh%mpc%mpc_item(j) - 1) + hecmesh%mpc%mpc_dof(j)
996 tmat%item(js)=jj
997 tmat%A(js)=-hecmesh%mpc%mpc_val(j)
998 js=js+1
999 enddo
1000 enddo outer3
1001 !write(700+hecmw_comm_get_rank(),*) 'DEBUG: Tmat(MPC)'
1002 !call hecmw_localmat_write(Tmat,700+hecmw_comm_get_rank())
1003 call hecmw_localmat_blocking(tmat, ndof, btmat)
1004 call hecmw_localmat_free(tmat)
1005 end subroutine make_btmat_mpc
1006
1007 subroutine make_bttmat_mpc(hecMESH, ndof, BTtmat)
1008 implicit none
1009 type (hecmwst_local_mesh), intent(in) :: hecmesh
1010 integer(kind=kint), intent(in) :: ndof
1011 type (hecmwst_local_matrix), intent(out) :: bttmat
1012 type (hecmwst_local_matrix) :: ttmat
1013 integer(kind=kint) :: n_mpc
1014 integer(kind=kint) :: i,j,k,js,je,jj,kk
1015 integer(kind=kint), allocatable :: iw(:)
1016 n_mpc=0
1017 outer: do i=1,hecmesh%mpc%n_mpc
1018 do j= hecmesh%mpc%mpc_index(i-1) + 1, hecmesh%mpc%mpc_index(i)
1019 if (hecmesh%mpc%mpc_dof(j) > ndof) cycle outer
1020 enddo
1021 n_mpc=n_mpc+1
1022 enddo outer
1023 ttmat%nr=hecmesh%n_node*ndof
1024 ttmat%nc=ttmat%nr
1025 ttmat%ndof=1
1026 allocate(ttmat%index(0:ttmat%nr))
1027 ! count nonzero in each row
1028 ttmat%index(1:ttmat%nr)=1
1029 outer2: do i=1,hecmesh%mpc%n_mpc
1030 do j= hecmesh%mpc%mpc_index(i-1) + 1, hecmesh%mpc%mpc_index(i)
1031 if (hecmesh%mpc%mpc_dof(j) > ndof) cycle outer2
1032 enddo
1033 k=hecmesh%mpc%mpc_index(i-1)+1
1034 kk=ndof*(hecmesh%mpc%mpc_item(k)-1)+hecmesh%mpc%mpc_dof(k)
1035 ttmat%index(kk)=0
1036 do j= hecmesh%mpc%mpc_index(i-1) + 2, hecmesh%mpc%mpc_index(i)
1037 jj = ndof * (hecmesh%mpc%mpc_item(j) - 1) + hecmesh%mpc%mpc_dof(j)
1038 ttmat%index(jj)=ttmat%index(jj)+1
1039 enddo
1040 enddo outer2
1041 ! index
1042 ttmat%index(0)=0
1043 do i=1,ttmat%nr
1044 ttmat%index(i)=ttmat%index(i-1)+ttmat%index(i)
1045 enddo
1046 ttmat%nnz=ttmat%index(ttmat%nr)
1047 allocate(ttmat%item(ttmat%nnz), ttmat%A(ttmat%nnz))
1048 ! diag
1049 do i=1,ttmat%nr
1050 js=ttmat%index(i-1)+1
1051 je=ttmat%index(i)
1052 if (js <= je) then
1053 ttmat%item(js)=i
1054 ttmat%A(js)=1.d0
1055 endif
1056 enddo
1057 ! others
1058 allocate(iw(ttmat%nr))
1059 iw(:)=1
1060 outer3: do i=1,hecmesh%mpc%n_mpc
1061 do j= hecmesh%mpc%mpc_index(i-1) + 1, hecmesh%mpc%mpc_index(i)
1062 if (hecmesh%mpc%mpc_dof(j) > ndof) cycle outer3
1063 enddo
1064 k=hecmesh%mpc%mpc_index(i-1)+1
1065 kk=ndof*(hecmesh%mpc%mpc_item(k)-1)+hecmesh%mpc%mpc_dof(k)
1066 do j= hecmesh%mpc%mpc_index(i-1) + 2, hecmesh%mpc%mpc_index(i)
1067 jj = ndof * (hecmesh%mpc%mpc_item(j) - 1) + hecmesh%mpc%mpc_dof(j)
1068 js=ttmat%index(jj-1)+1+iw(jj)
1069 ttmat%item(js)=kk
1070 ttmat%A(js)=-hecmesh%mpc%mpc_val(j)
1071 iw(jj)=iw(jj)+1
1072 enddo
1073 enddo outer3
1074 deallocate(iw)
1075 !write(700+hecmw_comm_get_rank(),*) 'DEBUG: Ttmat(MPC)'
1076 !call hecmw_localmat_write(Ttmat,700+hecmw_comm_get_rank())
1077 call hecmw_localmat_blocking(ttmat, ndof, bttmat)
1078 call hecmw_localmat_free(ttmat)
1079 end subroutine make_bttmat_mpc
1080
1081 subroutine hecmw_localmat_transpose(Tmat, Ttmat)
1082 implicit none
1083 type (hecmwst_local_matrix), intent(in) :: tmat
1084 type (hecmwst_local_matrix), intent(out) :: ttmat
1085 integer(kind=kint), allocatable :: iw(:)
1086 integer(kind=kint) :: i, j, jj, ndof, ndof2, k, idof, jdof
1087 allocate(iw(tmat%nc))
1088 iw = 0
1089 do i = 1, tmat%nr
1090 do j = tmat%index(i-1)+1, tmat%index(i)
1091 jj = tmat%item(j)
1092 iw(jj) = iw(jj) + 1
1093 enddo
1094 enddo
1095 ttmat%nr = tmat%nc
1096 ttmat%nc = tmat%nr
1097 ttmat%nnz = tmat%nnz
1098 ttmat%ndof = tmat%ndof
1099 ndof = tmat%ndof
1100 ndof2 = ndof * ndof
1101 allocate(ttmat%index(0:ttmat%nr))
1102 allocate(ttmat%item(ttmat%nnz))
1103 allocate(ttmat%A(ttmat%nnz*ndof2))
1104 ttmat%index(0) = 0
1105 do i = 1, ttmat%nr
1106 ttmat%index(i) = ttmat%index(i-1) + iw(i)
1107 iw(i) = ttmat%index(i-1) + 1
1108 enddo
1109 do i = 1, tmat%nr
1110 do j = tmat%index(i-1)+1, tmat%index(i)
1111 jj = tmat%item(j)
1112 k = iw(jj)
1113 ttmat%item( k ) = i
1114 do idof = 1, ndof
1115 do jdof = 1, ndof
1116 ttmat%A((k-1)*ndof2+(idof-1)*ndof+jdof) = &
1117 tmat%A((j-1)*ndof2+(jdof-1)*ndof+idof)
1118 enddo
1119 enddo
1120 iw(jj) = k + 1
1121 enddo
1122 enddo
1123 end subroutine hecmw_localmat_transpose
1124
1125 function hecmw_localmat_equal(Tmat1, Tmat2)
1126 implicit none
1127 type (hecmwst_local_matrix), intent(in) :: tmat1, tmat2
1128 integer(kind=kint) :: hecmw_localmat_equal
1129 integer(kind=kint) :: i, j, k0, k, ndof, ndof2
1130 hecmw_localmat_equal = 0
1131 if (tmat1%nr /= tmat2%nr) return
1132 if (tmat1%nc /= tmat2%nc) return
1133 if (tmat1%nnz /= tmat2%nnz) return
1134 if (tmat1%ndof /= tmat2%ndof) return
1135 ndof = tmat1%ndof
1136 ndof2 = ndof * ndof
1137 do i = 1, tmat1%nr
1138 if (tmat1%index(i) /= tmat2%index(i)) return
1139 do j = tmat1%index(i-1)+1, tmat1%index(i)
1140 if (tmat1%item(j) /= tmat2%item(j)) return
1141 k0 = (j-1)*ndof2
1142 do k = 1, ndof2
1143 if (tmat1%A(k0+k) /= tmat2%A(k0+k)) return
1144 enddo
1145 enddo
1146 enddo
1147 hecmw_localmat_equal = 1
1148 end function hecmw_localmat_equal
1149
1150!!!
1151!!! Subroutines for parallel contact analysis with iterative linear solver
1152!!!
1153
1154 subroutine hecmw_localmat_assemble(BTmat, hecMESH, hecMESHnew)
1155 implicit none
1156 type (hecmwst_local_matrix), intent(inout) :: btmat
1157 type (hecmwst_local_mesh), intent(in) :: hecmesh
1158 type (hecmwst_local_mesh), intent(inout) :: hecmeshnew
1159 integer(kind=kint) :: nn_int, np, ndof, ndof2, nr_ext, nnz_ext
1160 integer(kind=kint), allocatable :: exp_rows_index(:), exp_cols_index(:)
1161 integer(kind=kint), allocatable :: exp_rows_item(:,:), exp_cols_item(:,:)
1162 type (hecmwst_local_matrix), allocatable :: bt_ext(:)
1163 type (hecmwst_local_matrix) :: bt_int
1164 type (hecmwst_local_matrix) :: btnew
1165 ! some checks
1166 if (debug >= 1) write(0,*) 'DEBUG: nr,nc,nnz,ndof',btmat%nr,btmat%nc,btmat%nnz,btmat%ndof
1167 if (btmat%nr /= hecmesh%n_node) stop 'ERROR: invalid size in hecmw_localmat_assemble'
1168 !
1169 nn_int = hecmesh%nn_internal
1170 np = hecmesh%n_node
1171 ndof = btmat%ndof
1172 ndof2 = ndof*ndof
1173 !
1174 nr_ext = np - nn_int
1175 nnz_ext = btmat%index(np) - btmat%index(nn_int)
1176 !
1177 call prepare_bt_ext(btmat, hecmesh, exp_rows_index, exp_rows_item, bt_ext)
1178 if (debug >= 1) write(0,*) 'DEBUG: prepare_BT_ext done'
1179 !
1180 call prepare_column_info(hecmesh, bt_ext, exp_cols_index, exp_cols_item)
1181 if (debug >= 1) write(0,*) 'DEBUG: prepare_column info done'
1182 !
1183 call send_bt_ext_and_recv_bt_int(hecmesh, exp_rows_index, exp_rows_item, bt_ext, &
1184 exp_cols_index, exp_cols_item, bt_int, hecmeshnew)
1185 if (debug >= 1) write(0,*) 'DEBUG: send BT_ext and recv BT_int done'
1186 !
1187 !write(0,*) 'BTmat%ndof,BT_int%ndof',BTmat%ndof,BT_int%ndof
1188 call hecmw_localmat_add(btmat, bt_int, btnew)
1189 if (debug >= 1) write(0,*) 'DEBUG: localmat_add done'
1190 !
1191 call hecmw_localmat_free(btmat)
1192 call hecmw_localmat_free(bt_int)
1193 !
1194 btmat%nr = btnew%nr
1195 btmat%nc = btnew%nc
1196 btmat%nnz = btnew%nnz
1197 btmat%ndof = btnew%ndof
1198 btmat%index => btnew%index
1199 btmat%item => btnew%item
1200 btmat%A => btnew%A
1201 !
1202 ! hecMESH%n_node = hecMESHnew%n_node
1203 ! hecMESH%n_neighbor_pe = hecMESHnew%n_neighbor_pe
1204 ! deallocate(hecMESH%neighbor_pe)
1205 ! deallocate(hecMESH%import_index)
1206 ! deallocate(hecMESH%export_index)
1207 ! deallocate(hecMESH%import_item)
1208 ! deallocate(hecMESH%export_item)
1209 ! deallocate(hecMESH%node_ID)
1210 ! deallocate(hecMESH%global_node_ID)
1211 ! hecMESH%neighbor_pe => hecMESHnew%neighbor_pe
1212 ! hecMESH%import_index => hecMESHnew%import_index
1213 ! hecMESH%export_index => hecMESHnew%export_index
1214 ! hecMESH%import_item => hecMESHnew%import_item
1215 ! hecMESH%export_item => hecMESHnew%export_item
1216 ! hecMESH%node_ID => hecMESHnew%node_ID
1217 ! hecMESH%global_node_ID => hecMESHnew%global_node_ID
1218 !
1219 if (debug >= 1) write(0,*) 'DEBUG: update BTmat and hecMESH done'
1220 end subroutine hecmw_localmat_assemble
1221
1222 subroutine prepare_bt_ext(BTmat, hecMESH, exp_rows_index, exp_rows_item, BT_ext)
1223 implicit none
1224 type (hecmwst_local_matrix), intent(in) :: btmat
1225 type (hecmwst_local_mesh), intent(in) :: hecmesh
1226 integer(kind=kint), allocatable, intent(out) :: exp_rows_index(:)
1227 integer(kind=kint), allocatable, intent(out) :: exp_rows_item(:,:)
1228 type (hecmwst_local_matrix), allocatable, intent(out) :: bt_ext(:)
1229 integer(kind=kint), allocatable :: incl_nz(:), exp_cols_per_row(:), exp_rows_per_rank(:)
1230 integer(kind=kint) :: nn_int
1231 nn_int = hecmesh%nn_internal
1232 !
1233 call check_external_nz_blocks(btmat, nn_int, incl_nz)
1234 !
1235 call count_ext_rows_with_nz(btmat, nn_int, incl_nz, exp_cols_per_row)
1236 !
1237 call count_exp_rows_per_rank(hecmesh, exp_cols_per_row, exp_rows_per_rank)
1238 !
1239 allocate(exp_rows_index(0:hecmesh%n_neighbor_pe))
1240 call make_index(hecmesh%n_neighbor_pe, exp_rows_per_rank, exp_rows_index)
1241 !write(0,*) 'exp_rows_index',exp_rows_index(:)
1242 !
1243 deallocate(exp_rows_per_rank)
1244 !
1245 call make_exp_rows_item(hecmesh, exp_cols_per_row, exp_rows_index, exp_rows_item)
1246 !
1247 deallocate(exp_cols_per_row)
1248 !
1249 allocate(bt_ext(hecmesh%n_neighbor_pe))
1250 call extract_bt_ext(hecmesh, btmat, incl_nz, exp_rows_index, exp_rows_item, bt_ext)
1251 !
1252 deallocate(incl_nz)
1253 end subroutine prepare_bt_ext
1254
1255 subroutine check_external_nz_blocks(BTmat, nn_internal, incl_nz)
1256 implicit none
1257 type (hecmwst_local_matrix), intent(in) :: btmat
1258 integer(kind=kint), intent(in) :: nn_internal
1259 integer(kind=kint), allocatable, intent(out) :: incl_nz(:)
1260 integer(kind=kint) :: ndof2, i0, nnz_ext, i, k, nnz_blk
1261 if (nn_internal > btmat%nr) stop 'ERROR: invalid nn_internal'
1262 ndof2 = btmat%ndof ** 2
1263 i0 = btmat%index(nn_internal)
1264 nnz_ext = btmat%index(btmat%nr) - i0
1265 allocate(incl_nz(nnz_ext))
1266 nnz_blk = 0
1267 do i = 1, nnz_ext
1268 incl_nz(i) = 0
1269 do k = 1, ndof2
1270 if (btmat%A(ndof2*(i0+i-1)+k) /= 0.0d0) then
1271 incl_nz(i) = 1
1272 nnz_blk = nnz_blk + 1
1273 exit
1274 endif
1275 enddo
1276 enddo
1277 if (debug >= 1) write(0,*) 'DEBUG: nnz_blk',nnz_blk
1278 end subroutine check_external_nz_blocks
1279
1280 subroutine count_ext_rows_with_nz(BTmat, nn_internal, incl_nz, exp_cols_per_row)
1281 implicit none
1282 type (hecmwst_local_matrix), intent(in) :: btmat
1283 integer(kind=kint), intent(in) :: nn_internal
1284 integer(kind=kint), intent(in) :: incl_nz(:)
1285 integer(kind=kint), allocatable, intent(out) :: exp_cols_per_row(:)
1286 integer(kind=kint) :: nr_ext, nnz_int, i, irow, js, je, j, jcol
1287 nr_ext = btmat%nr - nn_internal
1288 nnz_int = btmat%index(nn_internal)
1289 allocate(exp_cols_per_row(nr_ext))
1290 exp_cols_per_row(:) = 0
1291 do i = 1, nr_ext
1292 irow = nn_internal+i
1293 js = btmat%index(irow-1)+1
1294 je = btmat%index(irow)
1295 do j = js, je
1296 jcol = btmat%item(j)
1297 if (incl_nz(j-nnz_int) == 1) exp_cols_per_row(i) = exp_cols_per_row(i) + 1
1298 enddo
1299 enddo
1300 !write(0,*) 'exp_cols_per_row',exp_cols_per_row(:)
1301 end subroutine count_ext_rows_with_nz
1302
1303 subroutine count_exp_rows_per_rank(hecMESH, exp_cols_per_row, exp_rows_per_rank)
1304 implicit none
1305 type (hecmwst_local_mesh), intent(in) :: hecmesh
1306 integer(kind=kint), intent(in) :: exp_cols_per_row(:)
1307 integer(kind=kint), allocatable, intent(out) :: exp_rows_per_rank(:)
1308 integer(kind=kint) :: nn_int, np, nr_ext, i, irow, exp_rank, idom
1309 allocate(exp_rows_per_rank(hecmesh%n_neighbor_pe))
1310 exp_rows_per_rank(1:hecmesh%n_neighbor_pe) = 0
1311 nn_int = hecmesh%nn_internal
1312 np = hecmesh%n_node
1313 nr_ext = np - nn_int
1314 do i = 1, nr_ext
1315 if (exp_cols_per_row(i) > 0) then
1316 irow = nn_int + i
1317 exp_rank = hecmesh%node_ID(2*irow)
1318 call rank_to_idom(hecmesh, exp_rank, idom)
1319 exp_rows_per_rank(idom) = exp_rows_per_rank(idom) + 1
1320 endif
1321 enddo
1322 !write(0,*) 'exp_rows_per_rank',exp_rows_per_rank(:)
1323 end subroutine count_exp_rows_per_rank
1324
1325 subroutine rank_to_idom(hecMESH, rank, idom)
1326 implicit none
1327 type (hecmwst_local_mesh), intent(in) :: hecmesh
1328 integer(kind=kint), intent(in) :: rank
1329 integer(kind=kint), intent(out) :: idom
1330 integer(kind=kint) :: i
1331 do i = 1, hecmesh%n_neighbor_pe
1332 if (hecmesh%neighbor_pe(i) == rank) then
1333 idom = i
1334 return
1335 endif
1336 enddo
1337 stop 'ERROR: exp_rank not found in neighbor_pe'
1338 end subroutine rank_to_idom
1339
1340 subroutine make_index(len, cnt, index)
1341 implicit none
1342 integer(kind=kint), intent(in) :: len
1343 integer(kind=kint), intent(in) :: cnt(len)
1344 integer(kind=kint), intent(out) :: index(0:)
1345 integer(kind=kint) :: i
1346 ! write(0,*) 'make_index: len',len
1347 index(0) = 0
1348 do i = 1,len
1349 index(i) = index(i-1) + cnt(i)
1350 enddo
1351 end subroutine make_index
1352
1353 subroutine make_exp_rows_item(hecMESH, exp_cols_per_row, exp_rows_index, exp_rows_item)
1354 implicit none
1355 type (hecmwst_local_mesh), intent(in) :: hecmesh
1356 integer(kind=kint), intent(in) :: exp_cols_per_row(:)
1357 integer(kind=kint), allocatable, intent(in) :: exp_rows_index(:)
1358 integer(kind=kint), allocatable, intent(out) :: exp_rows_item(:,:)
1359 integer(kind=kint), allocatable :: cnt(:)
1360 integer(kind=kint) :: nn_int, np, nr_ext, i, irow, exp_rank, idom, idx
1361 allocate(exp_rows_item(2,exp_rows_index(hecmesh%n_neighbor_pe)))
1362 allocate(cnt(hecmesh%n_neighbor_pe))
1363 cnt(:) = 0
1364 nn_int = hecmesh%nn_internal
1365 np = hecmesh%n_node
1366 nr_ext = np - nn_int
1367 do i = 1, nr_ext
1368 if (exp_cols_per_row(i) > 0) then
1369 irow = nn_int + i
1370 exp_rank = hecmesh%node_ID(2*irow)
1371 call rank_to_idom(hecmesh, exp_rank, idom)
1372 cnt(idom) = cnt(idom) + 1
1373 idx = exp_rows_index(idom-1) + cnt(idom)
1374 exp_rows_item(1,idx) = irow
1375 exp_rows_item(2,idx) = exp_cols_per_row(i)
1376 endif
1377 enddo
1378 !write(0,*) 'cnt',cnt(:)
1379 do idom = 1, hecmesh%n_neighbor_pe
1380 if (cnt(idom) /= exp_rows_index(idom)-exp_rows_index(idom-1)) stop 'ERROR: make exp_rows_item'
1381 enddo
1382 !write(0,*) 'exp_rows_item(1,:)',exp_rows_item(1,:)
1383 !write(0,*) 'exp_rows_item(2,:)',exp_rows_item(2,:)
1384 end subroutine make_exp_rows_item
1385
1386 subroutine extract_bt_ext(hecMESH, BTmat, incl_nz, exp_rows_index, exp_rows_item, BT_ext)
1387 implicit none
1388 type (hecmwst_local_mesh), intent(in) :: hecmesh
1389 type (hecmwst_local_matrix), intent(in) :: btmat
1390 integer(kind=kint), intent(in) :: incl_nz(:)
1391 integer(kind=kint), allocatable, intent(in) :: exp_rows_index(:)
1392 integer(kind=kint), intent(in) :: exp_rows_item(:,:)
1393 type (hecmwst_local_matrix), allocatable, intent(out) :: bt_ext(:)
1394 integer(kind=kint) :: ndof, ndof2, nn_int, nnz_int, idom, j, idx, ncol, cnt, jrow, ks, ke, k, kcol
1395 allocate(bt_ext(hecmesh%n_neighbor_pe))
1396 ndof = btmat%ndof
1397 ndof2 = ndof * ndof
1398 nn_int = hecmesh%nn_internal
1399 nnz_int = btmat%index(nn_int)
1400 do idom = 1, hecmesh%n_neighbor_pe
1401 bt_ext(idom)%nr = exp_rows_index(idom) - exp_rows_index(idom-1)
1402 bt_ext(idom)%nc = btmat%nc
1403 bt_ext(idom)%nnz = 0
1404 bt_ext(idom)%ndof = ndof
1405 allocate(bt_ext(idom)%index(0:bt_ext(idom)%nr))
1406 bt_ext(idom)%index(0) = 0
1407 do j = 1, bt_ext(idom)%nr
1408 idx = exp_rows_index(idom-1) + j
1409 ncol = exp_rows_item(2,idx)
1410 bt_ext(idom)%index(j) = bt_ext(idom)%index(j-1) + ncol
1411 enddo
1412 bt_ext(idom)%nnz = bt_ext(idom)%index(bt_ext(idom)%nr)
1413 if (debug >= 1) write(0,*) 'DEBUG: idom,nr,nc,nnz,ndof', &
1414 idom,bt_ext(idom)%nr,bt_ext(idom)%nc,bt_ext(idom)%nnz,bt_ext(idom)%ndof
1415 allocate(bt_ext(idom)%item(bt_ext(idom)%nnz))
1416 allocate(bt_ext(idom)%A(bt_ext(idom)%nnz * ndof2))
1417 cnt = 0
1418 do j = 1, bt_ext(idom)%nr
1419 idx = exp_rows_index(idom-1) + j
1420 jrow = exp_rows_item(1,idx)
1421 if (jrow < 1 .or. btmat%nr < jrow) stop 'ERROR: extract BT_ext: jrow'
1422 ks = btmat%index(jrow-1)+1
1423 ke = btmat%index(jrow)
1424 do k = ks, ke
1425 kcol = btmat%item(k)
1426 if (incl_nz(k-nnz_int) == 0) cycle
1427 cnt = cnt + 1
1428 bt_ext(idom)%item(cnt) = kcol
1429 bt_ext(idom)%A(ndof2*(cnt-1)+1:ndof2*cnt) = btmat%A(ndof2*(k-1)+1:ndof2*k)
1430 enddo
1431 if (cnt /= bt_ext(idom)%index(j)) stop 'ERROR: extract BT_ext'
1432 enddo
1433 ! if (DEBUG >= 3) write(100+hecMESH%my_rank,*) 'BT_ext(',idom,')'
1434 ! call hecmw_localmat_write(BT_ext(idom), 100+hecMESH%my_rank)
1435 enddo
1436 end subroutine extract_bt_ext
1437
1438 subroutine prepare_column_info(hecMESH, BT_ext, exp_cols_index, exp_cols_item)
1439 implicit none
1440 type (hecmwst_local_mesh), intent(in) :: hecmesh
1441 type (hecmwst_local_matrix), intent(in) :: bt_ext(:)
1442 integer(kind=kint), allocatable, intent(out) :: exp_cols_index(:)
1443 integer(kind=kint), allocatable, intent(out) :: exp_cols_item(:,:)
1444 !
1445 call make_exp_cols_index(hecmesh%n_neighbor_pe, bt_ext, exp_cols_index)
1446 if (debug >= 2) write(0,*) ' DEBUG2: make exp_cols_index done'
1447 if (debug >= 3) write(0,*) ' DEBUG3: exp_cols_index', exp_cols_index(0:hecmesh%n_neighbor_pe)
1448 !
1449 ! (col ID, rank, global ID)
1450 !
1451 call make_exp_cols_item(hecmesh, bt_ext, exp_cols_index, exp_cols_item)
1452 if (debug >= 2) write(0,*) ' DEBUG2: make exp_cols_item done'
1453 ! if (DEBUG >= 3) write(0,*) ' DEBUG3: exp_cols_item', exp_cols_item(1:cNCOL_ITEM,1:exp_cols_index(hecMESH%n_neighbor_pe))
1454 end subroutine prepare_column_info
1455
1456 subroutine make_exp_cols_index(nnb, BT_ext, exp_cols_index)
1457 implicit none
1458 integer(kind=kint), intent(in) :: nnb
1459 type (hecmwst_local_matrix), intent(in) :: bt_ext(:)
1460 integer(kind=kint), allocatable, intent(out) :: exp_cols_index(:)
1461 integer(kind=kint) :: idom
1462 allocate(exp_cols_index(0:nnb))
1463 exp_cols_index(0) = 0
1464 do idom = 1, nnb
1465 exp_cols_index(idom) = exp_cols_index(idom-1) + bt_ext(idom)%nnz
1466 enddo
1467 end subroutine make_exp_cols_index
1468
1469 subroutine make_exp_cols_item(hecMESH, BT_ext, exp_cols_index, exp_cols_item)
1470 implicit none
1471 type (hecmwst_local_mesh), intent(in) :: hecmesh
1472 type (hecmwst_local_matrix), intent(in) :: bt_ext(:)
1473 integer(kind=kint), allocatable, intent(in) :: exp_cols_index(:)
1474 integer(kind=kint), allocatable, intent(out) :: exp_cols_item(:,:)
1475 integer(kind=kint) :: cnt, idom, j, jcol
1476 allocate(exp_cols_item(cncol_item,exp_cols_index(hecmesh%n_neighbor_pe)))
1477 cnt = 0
1478 do idom = 1, hecmesh%n_neighbor_pe
1479 do j = 1, bt_ext(idom)%nnz
1480 cnt = cnt + 1
1481 jcol = bt_ext(idom)%item(j)
1482 ! if (DEBUG >= 3) write(0,*) ' DEBUG3: idom,j,cnt,jcol,nn_internal,n_node',&
1483 ! idom,j,cnt,jcol,hecMESH%nn_internal,hecMESH%n_node
1484 ! if (DEBUG >= 3) write(0,*) ' DEBUG3: size of exp_cols_item',size(exp_cols_item)
1485 ! if (DEBUG >= 3) write(0,*) ' DEBUG3: size of node_ID',size(hecMESH%node_ID)
1486 ! if (DEBUG >= 3) write(0,*) ' DEBUG3: size of global_node_ID',size(hecMESH%global_node_ID)
1487 exp_cols_item(clid,cnt) = hecmesh%node_ID(2*jcol-1)
1488 exp_cols_item(crank,cnt) = hecmesh%node_ID(2*jcol)
1489 if (cncol_item >= 3) exp_cols_item(cgid,cnt) = hecmesh%global_node_ID(jcol)
1490 ! if (DEBUG >= 3) write(0,*) ' DEBUG3: lid,rank(,gid)',exp_cols_item(1:cNCOL_ITEM,cnt)
1491 enddo
1492 if (cnt /= exp_cols_index(idom)) stop 'ERROR: make exp_cols_item'
1493 enddo
1494 end subroutine make_exp_cols_item
1495
1496 subroutine send_bt_ext_and_recv_bt_int(hecMESH, exp_rows_index, exp_rows_item, BT_ext, &
1497 exp_cols_index, exp_cols_item, BT_int, hecMESHnew)
1498 implicit none
1499 type (hecmwst_local_mesh), intent(in) :: hecmesh
1500 integer(kind=kint), allocatable, intent(inout) :: exp_rows_index(:), exp_cols_index(:)
1501 integer(kind=kint), allocatable, intent(inout) :: exp_rows_item(:,:), exp_cols_item(:,:)
1502 type (hecmwst_local_matrix), allocatable, intent(inout) :: bt_ext(:)
1503 type (hecmwst_local_matrix), intent(out) :: bt_int
1504 type (hecmwst_local_mesh), intent(inout) :: hecmeshnew
1505 integer(kind=kint), allocatable :: imp_rows_index(:), imp_cols_index(:)
1506 integer(kind=kint), allocatable :: imp_rows_item(:,:), imp_cols_item(:,:)
1507 real(kind=kreal), allocatable :: imp_vals_item(:)
1508 integer(kind=kint), allocatable :: map(:), add_nodes(:,:)
1509 integer(kind=kint) :: ndof, ndof2, idom, n_add_node, i0
1510 if (hecmesh%n_neighbor_pe == 0) return
1511 ndof = bt_ext(1)%ndof
1512 ndof2 = ndof*ndof
1513 !
1514 call convert_rowid_to_remote_localid(hecmesh, exp_rows_index(hecmesh%n_neighbor_pe), exp_rows_item)
1515 if (debug >= 2) write(0,*) ' DEBUG2: convert rowID to remote localID done'
1516 !
1517 call send_recv_bt_ext_nr_nnz(hecmesh, bt_ext, imp_rows_index, imp_cols_index)
1518 if (debug >= 2) write(0,*) ' DEBUG2: send recv BT_ext nr and nnz done'
1519 !
1520 call send_recv_bt_ext_contents(hecmesh, bt_ext, &
1521 exp_rows_index, exp_cols_index, exp_rows_item, exp_cols_item, &
1522 imp_rows_index, imp_cols_index, &
1523 imp_rows_item, imp_cols_item, imp_vals_item)
1524 if (debug >= 2) write(0,*) ' DEBUG2: send recv BT_ext contents done'
1525 !
1526 do idom = 1, hecmesh%n_neighbor_pe
1527 call hecmw_localmat_free(bt_ext(idom))
1528 enddo
1529 deallocate(bt_ext)
1530 !
1531 call allocate_bt_int(hecmesh, ndof, imp_rows_index, imp_rows_item, bt_int)
1532 if (debug >= 2) write(0,*) ' DEBUG2: allocate BT_int done'
1533 !
1534 ! call copy_mesh(hecMESH, hecMESHnew)
1535 ! if (DEBUG >= 2) write(0,*) ' DEBUG2: copy mesh done'
1536 !
1537 call map_imported_cols(hecmeshnew, imp_cols_index(hecmesh%n_neighbor_pe), &
1538 imp_cols_item, n_add_node, add_nodes, map, i0)
1539 if (debug >= 2) write(0,*) ' DEBUG2: map imported cols done'
1540 !
1541 call update_comm_table(hecmeshnew, n_add_node, add_nodes, i0)
1542 if (debug >= 2) write(0,*) ' DEBUG2: update comm_table done'
1543 !
1544 bt_int%nc = hecmeshnew%n_node
1545 !
1546 call copy_vals_to_bt_int(hecmesh%n_neighbor_pe, imp_rows_index, imp_cols_index, &
1547 imp_rows_item, map, ndof2, imp_vals_item, bt_int)
1548 if (debug >= 2) write(0,*) ' DEBUG2: copy vals to BT_int done'
1549 !
1550 deallocate(imp_rows_index)
1551 deallocate(imp_cols_index)
1552 deallocate(imp_rows_item)
1553 deallocate(imp_cols_item)
1554 deallocate(imp_vals_item)
1555 deallocate(map)
1556 !
1557 call sort_and_uniq_rows(bt_int)
1558 if (debug >= 2) write(0,*) ' DEBUG2: sort and uniq rows of BT_int done'
1559 end subroutine send_bt_ext_and_recv_bt_int
1560
1561 subroutine convert_rowid_to_remote_localid(hecMESH, len, exp_rows_item)
1562 implicit none
1563 type (hecmwst_local_mesh), intent(in) :: hecmesh
1564 integer(kind=kint), intent(in) :: len
1565 integer(kind=kint), intent(out) :: exp_rows_item(:,:)
1566 integer(kind=kint) :: i
1567 do i = 1, len
1568 exp_rows_item(1,i) = hecmesh%node_ID(2 * exp_rows_item(1,i) - 1)
1569 enddo
1570 end subroutine convert_rowid_to_remote_localid
1571
1572 subroutine send_recv_bt_ext_nr_nnz(hecMESH, BT_ext, imp_rows_index, imp_cols_index)
1573 use m_hecmw_comm_f
1574 implicit none
1575 type (hecmwst_local_mesh), intent(in) :: hecmesh
1576 type (hecmwst_local_matrix), intent(in) :: bt_ext(:)
1577 integer(kind=kint), allocatable, intent(out) :: imp_rows_index(:), imp_cols_index(:)
1578 integer(kind=kint) :: nnb, idom, irank, tag, recvbuf(2)
1579 integer(kind=kint), allocatable :: sendbuf(:,:)
1580 integer(kind=kint), allocatable :: requests(:)
1581 integer(kind=kint), allocatable :: statuses(:,:)
1582 nnb = hecmesh%n_neighbor_pe
1583 allocate(imp_rows_index(0:nnb))
1584 allocate(imp_cols_index(0:nnb))
1585 allocate(requests(nnb))
1586 allocate(statuses(hecmw_status_size, nnb))
1587 allocate(sendbuf(2,nnb))
1588 do idom = 1, nnb
1589 irank = hecmesh%neighbor_pe(idom)
1590 ! nr = exp_rows_per_rank(idom)
1591 sendbuf(1,idom) = bt_ext(idom)%nr
1592 sendbuf(2,idom) = bt_ext(idom)%nnz
1593 tag=2001
1594 call hecmw_isend_int(sendbuf(1,idom), 2, irank, tag, hecmesh%MPI_COMM, &
1595 requests(idom))
1596 enddo
1597 imp_rows_index(0) = 0
1598 imp_cols_index(0) = 0
1599 do idom = 1, nnb
1600 irank = hecmesh%neighbor_pe(idom)
1601 tag = 2001
1602 call hecmw_recv_int(recvbuf, 2, irank, tag, &
1603 hecmesh%MPI_COMM, statuses(:,1))
1604 imp_rows_index(idom) = imp_rows_index(idom-1) + recvbuf(1)
1605 imp_cols_index(idom) = imp_cols_index(idom-1) + recvbuf(2)
1606 enddo
1607 call hecmw_waitall(nnb, requests, statuses)
1608 deallocate(requests)
1609 deallocate(statuses)
1610 deallocate(sendbuf)
1611 end subroutine send_recv_bt_ext_nr_nnz
1612
1613 subroutine send_recv_bt_ext_contents(hecMESH, BT_ext, &
1614 exp_rows_index, exp_cols_index, exp_rows_item, exp_cols_item, &
1615 imp_rows_index, imp_cols_index, &
1616 imp_rows_item, imp_cols_item, imp_vals_item)
1617 use m_hecmw_comm_f
1618 implicit none
1619 type (hecmwst_local_mesh), intent(in) :: hecmesh
1620 type (hecmwst_local_matrix), intent(in) :: bt_ext(:)
1621 integer(kind=kint), allocatable, intent(inout) :: exp_rows_index(:), exp_cols_index(:)
1622 integer(kind=kint), allocatable, intent(inout) :: exp_rows_item(:,:), exp_cols_item(:,:)
1623 integer(kind=kint), allocatable, intent(in) :: imp_rows_index(:), imp_cols_index(:)
1624 integer(kind=kint), allocatable, intent(out) :: imp_rows_item(:,:), imp_cols_item(:,:)
1625 real(kind=kreal), allocatable, intent(out) :: imp_vals_item(:)
1626 integer(kind=kint) :: nnb, ndof2, n_send, idom, irank, tag, nr, nnz
1627 integer(kind=kint), allocatable :: requests(:)
1628 integer(kind=kint), allocatable :: statuses(:,:)
1629 nnb = hecmesh%n_neighbor_pe
1630 if (nnb < 1) return
1631 ndof2 = bt_ext(1)%ndof ** 2
1632 allocate(imp_rows_item(2,imp_rows_index(nnb)))
1633 allocate(imp_cols_item(cncol_item,imp_cols_index(nnb)))
1634 allocate(imp_vals_item(ndof2*imp_cols_index(nnb)))
1635 allocate(requests(3*nnb))
1636 allocate(statuses(hecmw_status_size, 3*nnb))
1637 n_send = 0
1638 do idom = 1, nnb
1639 irank = hecmesh%neighbor_pe(idom)
1640 if (bt_ext(idom)%nr > 0) then
1641 n_send = n_send + 1
1642 tag = 2002
1643 call hecmw_isend_int(exp_rows_item(1,exp_rows_index(idom-1)+1), &
1644 2*bt_ext(idom)%nr, irank, tag, hecmesh%MPI_COMM, &
1645 requests(n_send))
1646 n_send = n_send + 1
1647 tag = 2003
1648 call hecmw_isend_int(exp_cols_item(1,exp_cols_index(idom-1)+1), &
1649 cncol_item*bt_ext(idom)%nnz, irank, tag, hecmesh%MPI_COMM, &
1650 requests(n_send))
1651 n_send = n_send + 1
1652 tag = 2004
1653 call hecmw_isend_r(bt_ext(idom)%A, ndof2*bt_ext(idom)%nnz, irank, &
1654 tag, hecmesh%MPI_COMM, requests(n_send))
1655 endif
1656 enddo
1657 do idom = 1, nnb
1658 irank = hecmesh%neighbor_pe(idom)
1659 nr = imp_rows_index(idom) - imp_rows_index(idom-1)
1660 nnz = imp_cols_index(idom) - imp_cols_index(idom-1)
1661 if (nr > 0) then
1662 tag = 2002
1663 call hecmw_recv_int(imp_rows_item(1,imp_rows_index(idom-1)+1), &
1664 2*nr, irank, tag, hecmesh%MPI_COMM, statuses(:,1))
1665 tag = 2003
1666 call hecmw_recv_int(imp_cols_item(1,imp_cols_index(idom-1)+1), &
1667 cncol_item*nnz, irank, tag, hecmesh%MPI_COMM, statuses(:,1))
1668 tag = 2004
1669 call hecmw_recv_r(imp_vals_item(ndof2*imp_cols_index(idom-1)+1), &
1670 ndof2*nnz, irank, tag, hecmesh%MPI_COMM, statuses(:,1))
1671 endif
1672 enddo
1673 call hecmw_waitall(n_send, requests, statuses)
1674 deallocate(exp_rows_index)
1675 deallocate(exp_rows_item)
1676 deallocate(exp_cols_index)
1677 deallocate(exp_cols_item)
1678 end subroutine send_recv_bt_ext_contents
1679
1680 subroutine allocate_bt_int(hecMESH, ndof, imp_rows_index, imp_rows_item, BT_int)
1681 implicit none
1682 type (hecmwst_local_mesh), intent(in) :: hecmesh
1683 integer(kind=kint), intent(in) :: ndof
1684 integer(kind=kint), allocatable, intent(in) :: imp_rows_index(:), imp_rows_item(:,:)
1685 type (hecmwst_local_matrix), intent(out) :: bt_int
1686 integer(kind=kint), allocatable :: cnt(:)
1687 integer(kind=kint) :: idom, is, ie, i, irow, ncol, ndof2
1688 ndof2 = ndof*ndof
1689 bt_int%nr = hecmesh%nn_internal
1690 bt_int%nc = hecmesh%n_node
1691 bt_int%nnz = 0
1692 bt_int%ndof = ndof
1693 allocate(cnt(bt_int%nr))
1694 cnt(:) = 0
1695 do idom = 1, hecmesh%n_neighbor_pe
1696 is = imp_rows_index(idom-1)+1
1697 ie = imp_rows_index(idom)
1698 do i = is, ie
1699 irow = imp_rows_item(1,i)
1700 ncol = imp_rows_item(2,i)
1701 if (irow < 1 .or. bt_int%nr < irow) stop 'ERROR: allocate BT_int'
1702 cnt(irow) = cnt(irow) + ncol !!! might include duplicate cols
1703 enddo
1704 enddo
1705 !
1706 allocate(bt_int%index(0:bt_int%nr))
1707 call make_index(bt_int%nr, cnt, bt_int%index)
1708 !
1709 bt_int%nnz = bt_int%index(bt_int%nr)
1710 allocate(bt_int%item(bt_int%nnz))
1711 allocate(bt_int%A(bt_int%nnz * ndof2))
1712 bt_int%A(:) = 0.d0
1713 end subroutine allocate_bt_int
1714
1715 subroutine copy_mesh(src, dst)
1716 implicit none
1717 type (hecmwst_local_mesh), intent(in) :: src
1718 type (hecmwst_local_mesh), intent(out) :: dst
1719 dst%zero = src%zero
1720 dst%MPI_COMM = src%MPI_COMM
1721 dst%PETOT = src%PETOT
1722 dst%PEsmpTOT = src%PEsmpTOT
1723 dst%my_rank = src%my_rank
1724 dst%n_subdomain = src%n_subdomain
1725 dst%n_node = src%n_node
1726 dst%nn_internal = src%nn_internal
1727 dst%n_dof = src%n_dof
1728 dst%n_neighbor_pe = src%n_neighbor_pe
1729 allocate(dst%neighbor_pe(dst%n_neighbor_pe))
1730 dst%neighbor_pe(:) = src%neighbor_pe(:)
1731 allocate(dst%import_index(0:dst%n_neighbor_pe))
1732 allocate(dst%export_index(0:dst%n_neighbor_pe))
1733 dst%import_index(:)= src%import_index(:)
1734 dst%export_index(:)= src%export_index(:)
1735 allocate(dst%import_item(dst%import_index(dst%n_neighbor_pe)))
1736 dst%import_item(:) = src%import_item(:)
1737 allocate(dst%export_item(dst%export_index(dst%n_neighbor_pe)))
1738 dst%export_item(:) = src%export_item(:)
1739 allocate(dst%node_ID(2*dst%n_node))
1740 dst%node_ID(1:2*dst%n_node) = src%node_ID(1:2*src%n_node)
1741 allocate(dst%global_node_ID(dst%n_node))
1742 dst%global_node_ID(1:dst%n_node) = src%global_node_ID(1:src%n_node)
1743 dst%mpc%n_mpc = 0
1744 dst%node => src%node
1745 end subroutine copy_mesh
1746
1747 subroutine map_imported_cols(hecMESHnew, ncols, cols, n_add_node, add_nodes, map, i0)
1748 implicit none
1749 type (hecmwst_local_mesh), intent(inout) :: hecmeshnew
1750 integer(kind=kint), intent(in) :: ncols
1751 integer(kind=kint), intent(in) :: cols(cncol_item,ncols)
1752 integer(kind=kint), allocatable, intent(out) :: map(:)
1753 integer(kind=kint), intent(out) :: n_add_node
1754 integer(kind=kint), allocatable, intent(out) :: add_nodes(:,:)
1755 integer(kind=kint), intent(out) :: i0
1756 allocate(map(ncols))
1757 !
1758 call map_present_nodes(hecmeshnew, ncols, cols, map, n_add_node)
1759 !
1760 ! add nodes == unmapped nodes
1761 !
1762 call extract_add_nodes(ncols, cols, map, n_add_node, add_nodes)
1763 !
1764 call append_nodes(hecmeshnew, n_add_node, add_nodes, i0)
1765 !
1766 call map_additional_nodes(ncols, cols, n_add_node, add_nodes, i0, map)
1767 end subroutine map_imported_cols
1768
1769 subroutine map_present_nodes(hecMESH, ncols, cols, map, n_add_node)
1770 implicit none
1771 type (hecmwst_local_mesh), intent(in) :: hecmesh
1772 integer(kind=kint), intent(in) :: ncols
1773 integer(kind=kint), intent(in) :: cols(cncol_item,ncols)
1774 integer(kind=kint), intent(out) :: map(ncols)
1775 integer(kind=kint), intent(out) :: n_add_node
1776 integer(kind=kint) :: i, j, lid, rank, llid, n_ext_node, idx
1777 integer(kind=kint), allocatable :: ext_node(:)
1778 type (hecmwst_pair_array) :: parray
1779 !
1780 call hecmw_pair_array_init(parray, hecmesh%n_node - hecmesh%nn_internal)
1781 do i = hecmesh%nn_internal + 1, hecmesh%n_node
1782 call hecmw_pair_array_append(parray, i, hecmesh%node_ID(2*i-1), hecmesh%node_ID(2*i))
1783 enddo
1784 call hecmw_pair_array_sort(parray)
1785 !
1786 n_add_node = 0
1787 n_ext_node = 0
1788 allocate(ext_node(ncols))
1789 !$omp parallel default(none), &
1790 !$omp& private(i,lid,rank,llid,idx,j), &
1791 !$omp& shared(ncols,hecMESH,cols,map,n_ext_node,ext_node,parray), &
1792 !$omp& reduction(+:n_add_node)
1793 !$omp do
1794 do i = 1, ncols
1795 lid = cols(clid,i)
1796 rank = cols(crank,i)
1797 ! check rank
1798 if (rank == hecmesh%my_rank) then ! internal: set mapping
1799 map(i) = lid
1800 else ! external
1801 !$omp atomic capture
1802 n_ext_node = n_ext_node + 1
1803 idx = n_ext_node
1804 !$omp end atomic
1805 ext_node(idx) = i
1806 endif
1807 enddo
1808 !$omp end do
1809 !$omp do
1810 do j = 1, n_ext_node
1811 i = ext_node(j)
1812 lid = cols(clid,i)
1813 rank = cols(crank,i)
1814 ! search node_ID in external nodes
1815 llid = hecmw_pair_array_find_id(parray, lid, rank)
1816 if (llid > 0) then ! found: set mapping
1817 map(i) = llid
1818 else ! not found
1819 map(i) = -1
1820 n_add_node = n_add_node + 1
1821 endif
1822 enddo
1823 !$omp end do
1824 !$omp end parallel
1825 deallocate(ext_node)
1826 !
1827 call hecmw_pair_array_finalize(parray)
1828 end subroutine map_present_nodes
1829
1830 subroutine extract_add_nodes(ncols, cols, map, n_add_node, add_nodes)
1831 implicit none
1832 integer(kind=kint), intent(in) :: ncols
1833 integer(kind=kint), intent(in) :: cols(cncol_item,ncols), map(ncols)
1834 integer(kind=kint), intent(inout) :: n_add_node
1835 integer(kind=kint), allocatable, intent(out) :: add_nodes(:,:)
1836 integer(kind=kint) :: cnt, i
1837 allocate(add_nodes(cncol_item,n_add_node))
1838 cnt = 0
1839 do i = 1, ncols
1840 if (map(i) == -1) then
1841 cnt = cnt + 1
1842 add_nodes(1:cncol_item,cnt) = cols(1:cncol_item,i)
1843 endif
1844 enddo
1845 if (cnt /= n_add_node) stop 'ERROR: extract add_nodes'
1846 call sort_and_uniq_add_nodes(n_add_node, add_nodes)
1847 end subroutine extract_add_nodes
1848
1849 subroutine sort_and_uniq_add_nodes(n_add_node, add_nodes)
1850 implicit none
1851 integer(kind=kint), intent(inout) :: n_add_node
1852 integer(kind=kint), intent(inout) :: add_nodes(cncol_item,n_add_node)
1853 integer(kind=kint) :: ndup
1854 call sort_add_nodes(add_nodes, 1, n_add_node)
1855 call uniq_add_nodes(add_nodes, n_add_node, ndup)
1856 n_add_node = n_add_node - ndup
1857 end subroutine sort_and_uniq_add_nodes
1858
1859 recursive subroutine sort_add_nodes(add_nodes, id1, id2)
1860 implicit none
1861 integer(kind=kint), intent(inout) :: add_nodes(:,:)
1862 integer(kind=kint), intent(in) :: id1, id2
1863 integer(kind=kint) :: center, left, right
1864 integer(kind=kint) :: pivot(cncol_item), tmp(cncol_item)
1865 if (id1 >= id2) return
1866 center = (id1 + id2) / 2
1867 pivot(1:cncol_item) = add_nodes(1:cncol_item,center)
1868 left = id1
1869 right = id2
1870 do
1871 do while ((add_nodes(crank,left) < pivot(crank)) .or. &
1872 (add_nodes(crank,left) == pivot(crank) .and. add_nodes(clid,left) < pivot(clid)))
1873 left = left + 1
1874 enddo
1875 do while ((pivot(crank) < add_nodes(crank,right)) .or. &
1876 (pivot(crank) == add_nodes(crank,right) .and. pivot(clid) < add_nodes(clid,right)))
1877 right = right - 1
1878 enddo
1879 if (left >= right) exit
1880 tmp(1:cncol_item) = add_nodes(1:cncol_item,left)
1881 add_nodes(1:cncol_item,left) = add_nodes(1:cncol_item,right)
1882 add_nodes(1:cncol_item,right) = tmp(1:cncol_item)
1883 left = left + 1
1884 right = right - 1
1885 enddo
1886 if (id1 < left-1) call sort_add_nodes(add_nodes, id1, left-1)
1887 if (right+1 < id2) call sort_add_nodes(add_nodes, right+1, id2)
1888 return
1889 end subroutine sort_add_nodes
1890
1891 subroutine uniq_add_nodes(add_nodes, len, ndup)
1892 implicit none
1893 integer(kind=kint), intent(inout) :: add_nodes(:,:)
1894 integer(kind=kint), intent(in) :: len
1895 integer(kind=kint), intent(out) :: ndup
1896 integer(kind=kint) :: i
1897 ndup = 0
1898 do i = 2,len
1899 if (add_nodes(clid,i) == add_nodes(clid,i-1-ndup) .and. &
1900 add_nodes(crank,i) == add_nodes(crank,i-1-ndup)) then
1901 ndup = ndup + 1
1902 else if (ndup > 0) then
1903 add_nodes(1:cncol_item,i-ndup) = add_nodes(1:cncol_item,i)
1904 endif
1905 enddo
1906 end subroutine uniq_add_nodes
1907
1908 subroutine search_add_nodes(n_add_node, add_nodes, rank, lid, idx)
1909 implicit none
1910 integer(kind=kint), intent(in) :: n_add_node
1911 integer(kind=kint), intent(in) :: add_nodes(cncol_item,n_add_node)
1912 integer(kind=kint), intent(in) :: rank
1913 integer(kind=kint), intent(in) :: lid
1914 integer(kind=kint), intent(out) :: idx
1915 integer(kind=kint) :: left, right, center
1916 left = 1
1917 right = n_add_node
1918 do while (left <= right)
1919 center = (left + right) / 2
1920 if ((rank == add_nodes(crank,center)) .and. (lid == add_nodes(clid,center))) then
1921 idx = center
1922 return
1923 else if ((rank < add_nodes(crank,center)) .or. &
1924 (rank == add_nodes(crank,center) .and. lid < add_nodes(clid,center))) then
1925 right = center - 1
1926 else if ((add_nodes(crank,center) < rank) .or. &
1927 (add_nodes(crank,center) == rank .and. add_nodes(clid,center) < lid)) then
1928 left = center + 1
1929 endif
1930 end do
1931 idx = -1
1932 end subroutine search_add_nodes
1933
1934 subroutine append_nodes(hecMESHnew, n_add_node, add_nodes, i0)
1935 implicit none
1936 type (hecmwst_local_mesh), intent(inout) :: hecmeshnew
1937 integer(kind=kint), intent(in) :: n_add_node
1938 integer(kind=kint), intent(in) :: add_nodes(:,:)
1939 integer(kind=kint), intent(out) :: i0
1940 integer(kind=kint) :: n_node, i, ii
1941 integer(kind=kint), pointer :: node_id(:), global_node_id(:)
1942 i0 = hecmeshnew%n_node
1943 n_node = hecmeshnew%n_node + n_add_node
1944 allocate(node_id(2*n_node))
1945 allocate(global_node_id(n_node))
1946 do i = 1, hecmeshnew%n_node
1947 node_id(2*i-1) = hecmeshnew%node_ID(2*i-1)
1948 node_id(2*i ) = hecmeshnew%node_ID(2*i )
1949 global_node_id(i) = hecmeshnew%global_node_ID(i)
1950 enddo
1951 do i = 1, n_add_node
1952 ii = hecmeshnew%n_node + i
1953 node_id(2*ii-1) = add_nodes(clid,i)
1954 node_id(2*ii ) = add_nodes(crank,i)
1955 if (cncol_item >= 3) then
1956 global_node_id(i) = add_nodes(cgid,i)
1957 else
1958 global_node_id(i) = -1
1959 endif
1960 enddo
1961 deallocate(hecmeshnew%node_ID)
1962 deallocate(hecmeshnew%global_node_ID)
1963 hecmeshnew%n_node = n_node
1964 hecmeshnew%node_ID => node_id
1965 hecmeshnew%global_node_ID => global_node_id
1966 end subroutine append_nodes
1967
1968 subroutine map_additional_nodes(ncols, cols, n_add_node, add_nodes, i0, map)
1969 implicit none
1970 integer(kind=kint), intent(in) :: ncols
1971 integer(kind=kint), intent(in) :: cols(cncol_item,ncols)
1972 integer(kind=kint), intent(in) :: n_add_node
1973 integer(kind=kint), intent(in) :: add_nodes(cncol_item,n_add_node)
1974 integer(kind=kint), intent(in) :: i0
1975 integer(kind=kint), intent(inout) :: map(ncols)
1976 integer(kind=kint) :: i, j
1977 do i = 1, ncols
1978 if (map(i) > 0) cycle
1979 call search_add_nodes(n_add_node, add_nodes, cols(crank,i), cols(clid,i), j)
1980 if (j == -1) stop 'ERROR: map_additional_nodes'
1981 map(i) = i0 + j
1982 enddo
1983 end subroutine map_additional_nodes
1984
1985 subroutine update_comm_table(hecMESHnew, n_add_node, add_nodes, i0)
1986 use m_hecmw_comm_f
1987 implicit none
1988 type (hecmwst_local_mesh), intent(inout) :: hecmeshnew
1989 integer(kind=kint), intent(in) :: n_add_node
1990 integer(kind=kint), allocatable, intent(inout) :: add_nodes(:,:)
1991 integer(kind=kint), intent(in) :: i0
1992 integer(kind=kint), allocatable :: n_add_imp(:), add_imp_index(:)
1993 integer(kind=kint), allocatable :: add_imp_item_remote(:), add_imp_item_local(:)
1994 integer(kind=kint), allocatable :: n_add_exp(:), add_exp_index(:), add_exp_item(:)
1995 integer(kind=kint), allocatable :: n_new_imp(:), n_new_exp(:)
1996 integer(kind=kint) :: npe, nnb, comm, new_nnb
1997 integer(kind=kint), pointer :: nbpe(:), new_nbpe(:)
1998 integer(kind=kint), pointer :: import_index(:), export_index(:), import_item(:), export_item(:)
1999 integer(kind=kint), pointer :: new_import_index(:), new_export_index(:)
2000 integer(kind=kint), pointer :: new_import_item(:), new_export_item(:)
2001 npe = hecmeshnew%PETOT
2002 nnb = hecmeshnew%n_neighbor_pe
2003 comm = hecmeshnew%MPI_COMM
2004 nbpe => hecmeshnew%neighbor_pe
2005 import_index => hecmeshnew%import_index
2006 export_index => hecmeshnew%export_index
2007 import_item => hecmeshnew%import_item
2008 export_item => hecmeshnew%export_item
2009 !
2010 call count_add_imp_per_rank(n_add_node, add_nodes, npe, n_add_imp)
2011 if (debug >= 3) write(0,*) ' DEBUG3: count add_imp per rank done'
2012 !
2013 allocate(add_imp_index(0:npe))
2014 call make_index(npe, n_add_imp, add_imp_index)
2015 if (debug >= 3) write(0,*) ' DEBUG3: make add_imp_index done'
2016 !
2017 call make_add_imp_item(n_add_node, add_nodes, npe, i0, add_imp_index, &
2018 add_imp_item_remote, add_imp_item_local)
2019 if (debug >= 3) write(0,*) ' DEBUG3: make add_imp_item done'
2020 !
2021 deallocate(add_nodes)
2022 !
2023 ! all_to_all n_add_imp -> n_add_exp
2024 !
2025 allocate(n_add_exp(npe))
2026 call hecmw_alltoall_int(n_add_imp, 1, n_add_exp, 1, comm)
2027 if (debug >= 3) write(0,*) ' DEBUG3: alltoall n_add_imp to n_add_exp done'
2028 !
2029 allocate(add_exp_index(0:npe))
2030 call make_index(npe, n_add_exp, add_exp_index)
2031 if (debug >= 3) write(0,*) ' DEBUG3: make add_exp_index done'
2032 !
2033 call send_recv_add_imp_exp_item(npe, add_imp_index, add_imp_item_remote, &
2034 add_exp_index, add_exp_item, comm)
2035 if (debug >= 3) write(0,*) ' DEBUG3: send recv add_imp/exp_item done'
2036 !
2037 ! count new import
2038 !
2039 call count_new_comm_nodes(npe, nnb, nbpe, import_index, n_add_imp, n_new_imp)
2040 if (debug >= 3) write(0,*) ' DEBUG3: count new comm_nodes (import) done'
2041 !
2042 ! count new export
2043 !
2044 call count_new_comm_nodes(npe, nnb, nbpe, export_index, n_add_exp, n_new_exp)
2045 if (debug >= 3) write(0,*) ' DEBUG3: count new comm_nodes (export) done'
2046 !
2047 call update_neighbor_pe(npe, n_new_imp, n_new_exp, new_nnb, new_nbpe)
2048 if (debug >= 3) write(0,*) ' DEBUG3: update neighbor_pe done'
2049 !
2050 ! merge import table: import
2051 !
2052 call merge_comm_table(npe, nnb, nbpe, import_index, import_item, &
2053 new_nnb, new_nbpe, add_imp_index, add_imp_item_local, n_add_imp, n_new_imp, &
2054 new_import_index, new_import_item)
2055 if (debug >= 3) write(0,*) ' DEBUG3: merge comm_table (import) done'
2056 !
2057 deallocate(n_add_imp)
2058 deallocate(add_imp_index)
2059 deallocate(add_imp_item_remote, add_imp_item_local)
2060 deallocate(n_new_imp)
2061 !
2062 ! merge export table: export
2063 !
2064 call merge_comm_table(npe, nnb, nbpe, export_index, export_item, &
2065 new_nnb, new_nbpe, add_exp_index, add_exp_item, n_add_exp, n_new_exp, &
2066 new_export_index, new_export_item)
2067 if (debug >= 3) write(0,*) ' DEBUG3: merge comm_table (export) done'
2068 !
2069 deallocate(n_add_exp)
2070 deallocate(add_exp_index)
2071 deallocate(add_exp_item)
2072 deallocate(n_new_exp)
2073 !
2074 deallocate(nbpe)
2075 deallocate(import_index,import_item)
2076 deallocate(export_index,export_item)
2077 hecmeshnew%n_neighbor_pe = new_nnb
2078 hecmeshnew%neighbor_pe => new_nbpe
2079 hecmeshnew%import_index => new_import_index
2080 hecmeshnew%export_index => new_export_index
2081 hecmeshnew%import_item => new_import_item
2082 hecmeshnew%export_item => new_export_item
2083 end subroutine update_comm_table
2084
2085 subroutine count_add_imp_per_rank(n_add_node, add_nodes, npe, n_add_imp)
2086 implicit none
2087 integer(kind=kint), intent(in) :: n_add_node
2088 integer(kind=kint), intent(in) :: add_nodes(cncol_item,n_add_node)
2089 integer(kind=kint), intent(in) :: npe
2090 integer(kind=kint), allocatable, intent(out) :: n_add_imp(:)
2091 integer(kind=kint) :: i, rank
2092 allocate(n_add_imp(npe))
2093 n_add_imp(:) = 0
2094 do i = 1, n_add_node
2095 rank = add_nodes(crank,i)
2096 n_add_imp(rank+1) = n_add_imp(rank+1) + 1
2097 enddo
2098 end subroutine count_add_imp_per_rank
2099
2100 subroutine make_add_imp_item(n_add_node, add_nodes, npe, i0, add_imp_index, &
2101 add_imp_item_remote, add_imp_item_local)
2102 implicit none
2103 integer(kind=kint), intent(in) :: n_add_node
2104 integer(kind=kint), intent(in) :: add_nodes(cncol_item,n_add_node)
2105 integer(kind=kint), intent(in) :: npe, i0
2106 integer(kind=kint), allocatable, intent(in) :: add_imp_index(:)
2107 integer(kind=kint), allocatable, intent(out) :: add_imp_item_remote(:), add_imp_item_local(:)
2108 integer(kind=kint), allocatable :: cnt(:)
2109 integer(kind=kint) :: i, lid, rank, ipe
2110 allocate(add_imp_item_remote(add_imp_index(npe)))
2111 allocate(add_imp_item_local(add_imp_index(npe)))
2112 allocate(cnt(npe))
2113 cnt(:) = 0
2114 do i = 1, n_add_node
2115 lid = add_nodes(clid,i)
2116 rank = add_nodes(crank,i)
2117 ipe = rank + 1
2118 cnt(ipe) = cnt(ipe) + 1
2119 add_imp_item_remote(add_imp_index(ipe-1) + cnt(ipe)) = lid
2120 add_imp_item_local(add_imp_index(ipe-1) + cnt(ipe)) = i0 + i
2121 enddo
2122 deallocate(cnt)
2123 end subroutine make_add_imp_item
2124
2125 subroutine send_recv_add_imp_exp_item(npe, add_imp_index, add_imp_item_remote, &
2126 add_exp_index, add_exp_item, mpi_comm)
2127 use m_hecmw_comm_f
2128 implicit none
2129 integer(kind=kint), intent(in) :: npe
2130 integer(kind=kint), allocatable, intent(in) :: add_imp_index(:), add_imp_item_remote(:)
2131 integer(kind=kint), allocatable, intent(in) :: add_exp_index(:)
2132 integer(kind=kint), allocatable, intent(out) :: add_exp_item(:)
2133 integer(kind=kint), intent(in) :: mpi_comm
2134 integer(kind=kint) :: n_send, i, irank, is, ie, len, tag
2135 integer(kind=kint), allocatable :: requests(:)
2136 integer(kind=kint), allocatable :: statuses(:,:)
2137 allocate(add_exp_item(add_exp_index(npe)))
2138 allocate(requests(npe))
2139 allocate(statuses(hecmw_status_size, npe))
2140 n_send = 0
2141 do i = 1, npe
2142 irank = i-1
2143 is = add_imp_index(i-1)+1
2144 ie = add_imp_index(i)
2145 len = ie - is + 1
2146 if (len == 0) cycle
2147 tag = 4001
2148 n_send = n_send + 1
2149 call hecmw_isend_int(add_imp_item_remote(is:ie), len, irank, tag, &
2150 mpi_comm, requests(n_send))
2151 enddo
2152 !
2153 do i = 1, npe
2154 irank = i-1
2155 is = add_exp_index(i-1)+1
2156 ie = add_exp_index(i)
2157 len = ie - is + 1
2158 if (len == 0) cycle
2159 tag = 4001
2160 call hecmw_recv_int(add_exp_item(is:ie), len, irank, tag, &
2161 mpi_comm, statuses(:,1))
2162 enddo
2163 call hecmw_waitall(n_send, requests, statuses)
2164 end subroutine send_recv_add_imp_exp_item
2165
2166 subroutine count_new_comm_nodes(npe, org_nnb, org_nbpe, org_index, n_add, n_new)
2167 implicit none
2168 integer(kind=kint), intent(in) :: npe, org_nnb
2169 !integer(kind=kint), intent(in) :: org_nbpe(org_nnb), org_index(0:org_nnb), n_add(npe)
2170 integer(kind=kint), pointer, intent(in) :: org_nbpe(:), org_index(:)
2171 integer(kind=kint), intent(in) :: n_add(:)
2172 integer(kind=kint), allocatable, intent(out) :: n_new(:)
2173 integer(kind=kint) :: i, irank, n_org
2174 allocate(n_new(npe))
2175 n_new(:) = n_add(:)
2176 do i = 1, org_nnb
2177 irank = org_nbpe(i)
2178 n_org = org_index(i) - org_index(i-1)
2179 n_new(irank+1) = n_new(irank+1) + n_org
2180 enddo
2181 end subroutine count_new_comm_nodes
2182
2183 subroutine update_neighbor_pe(npe, n_new_imp, n_new_exp, &
2184 new_nnb, new_nbpe)
2185 implicit none
2186 integer(kind=kint), intent(in) :: npe
2187 integer(kind=kint), intent(in) :: n_new_imp(npe), n_new_exp(npe)
2188 integer(kind=kint), intent(out) :: new_nnb
2189 integer(kind=kint), pointer, intent(out) :: new_nbpe(:)
2190 integer(kind=kint) :: i
2191 new_nnb = 0
2192 do i = 1, npe
2193 if (n_new_imp(i) > 0 .or. n_new_exp(i) > 0) new_nnb = new_nnb+1
2194 enddo
2195 allocate(new_nbpe(new_nnb))
2196 new_nnb = 0
2197 do i = 1, npe
2198 if (n_new_imp(i) > 0 .or. n_new_exp(i) > 0) then
2199 new_nnb = new_nnb+1
2200 new_nbpe(new_nnb) = i-1
2201 endif
2202 enddo
2203 end subroutine update_neighbor_pe
2204
2205 subroutine merge_comm_table(npe, org_nnb, org_nbpe, org_index, org_item, &
2206 new_nnb, new_nbpe, add_index, add_item, n_add, n_new, new_index, new_item)
2207 implicit none
2208 integer(kind=kint), intent(in) :: npe, org_nnb
2209 !integer(kind=kint), intent(in) :: org_nbpe(org_nnb), org_index(0:org_nnb), org_item(:)
2210 integer(kind=kint), pointer, intent(in) :: org_nbpe(:), org_index(:), org_item(:)
2211 integer(kind=kint), intent(in) :: new_nnb
2212 !integer(kind=kint), intent(in) :: new_nbpe(new_nnb), add_index(0:npe), add_item(:)
2213 integer(kind=kint), pointer, intent(in) :: new_nbpe(:)
2214 integer(kind=kint), allocatable, intent(in) :: add_index(:), add_item(:)
2215 integer(kind=kint), intent(in) :: n_add(npe), n_new(npe)
2216 integer(kind=kint), pointer, intent(out) :: new_index(:), new_item(:)
2217 integer(kind=kint), allocatable :: cnt(:)
2218 integer(kind=kint) :: i, irank, j, jrank, i0, j0, len
2219 ! if (associated(new_index)) deallocate(new_index)
2220 ! if (associated(new_item)) deallocate(new_item)
2221 allocate(new_index(0:new_nnb))
2222 new_index(0) = 0
2223 do i = 1, new_nnb
2224 irank = new_nbpe(i)
2225 new_index(i) = new_index(i-1) + n_new(irank+1)
2226 enddo
2227 allocate(new_item(new_index(new_nnb)))
2228 allocate(cnt(npe))
2229 cnt(:) = 0
2230 j = 1
2231 jrank = new_nbpe(j)
2232 do i = 1, org_nnb
2233 if (org_index(i) - org_index(i-1) == 0) cycle
2234 irank = org_nbpe(i)
2235 do while (jrank < irank)
2236 j = j + 1
2237 if (j > new_nnb) exit
2238 jrank = new_nbpe(j)
2239 enddo
2240 if (jrank /= irank) stop 'ERROR: merging comm table: org into new'
2241 i0 = org_index(i-1)
2242 len = org_index(i) - i0
2243 j0 = new_index(j-1)
2244 new_item(j0+1:j0+len) = org_item(i0+1:i0+len)
2245 cnt(jrank+1) = len
2246 enddo
2247 j = 1
2248 jrank = new_nbpe(j)
2249 do i = 1, npe
2250 if (n_add(i) == 0) cycle
2251 irank = i-1
2252 do while (jrank < irank)
2253 j = j + 1
2254 jrank = new_nbpe(j)
2255 enddo
2256 if (jrank /= irank) stop 'ERROR: merging comm table: add into new'
2257 i0 = add_index(i-1)
2258 len = add_index(i) - i0
2259 j0 = new_index(j-1) + cnt(jrank+1)
2260 new_item(j0+1:j0+len) = add_item(i0+1:i0+len)
2261 cnt(jrank+1) = cnt(jrank+1) + len
2262 if (cnt(jrank+1) /= new_index(j)-new_index(j-1)) stop 'ERROR: merging comm table'
2263 enddo
2264 deallocate(cnt)
2265 end subroutine merge_comm_table
2266
2267 subroutine copy_vals_to_bt_int(nnb, imp_rows_index, imp_cols_index, &
2268 imp_rows_item, map, ndof2, imp_vals_item, BT_int)
2269 implicit none
2270 integer(kind=kint), intent(in) :: nnb
2271 integer(kind=kint), allocatable, intent(in) :: imp_rows_index(:), imp_cols_index(:)
2272 integer(kind=kint), intent(in) :: imp_rows_item(:,:), map(:)
2273 integer(kind=kint), intent(in) :: ndof2
2274 real(kind=kreal), intent(in) :: imp_vals_item(:)
2275 type (hecmwst_local_matrix), intent(inout) :: bt_int
2276 integer(kind=kint), allocatable :: cnt(:)
2277 integer(kind=kint) :: idom, is, ie, ic0, i, irow, ncol, j0, j
2278 allocate(cnt(bt_int%nr))
2279 cnt(:) = 0
2280 do idom = 1, nnb
2281 is = imp_rows_index(idom-1)+1
2282 ie = imp_rows_index(idom)
2283 ic0 = imp_cols_index(idom-1)
2284 do i = is, ie
2285 irow = imp_rows_item(1,i)
2286 ncol = imp_rows_item(2,i)
2287 if (irow < 1 .or. bt_int%nr < irow) stop 'ERROR: copy vals to BT_int: irow'
2288 j0 = bt_int%index(irow-1) + cnt(irow)
2289 do j = 1, ncol
2290 bt_int%item(j0+j) = map(ic0+j)
2291 bt_int%A(ndof2*(j0+j-1)+1:ndof2*(j0+j)) = imp_vals_item(ndof2*(ic0+j-1)+1:ndof2*(ic0+j))
2292 enddo
2293 cnt(irow) = cnt(irow) + ncol
2294 ic0 = ic0 + ncol
2295 enddo
2296 if (ic0 /= imp_cols_index(idom)) stop 'ERROR: copy vals to BT_int: ic0'
2297 enddo
2298 deallocate(cnt)
2299 end subroutine copy_vals_to_bt_int
2300
2301 subroutine sort_and_uniq_rows(BTmat)
2303 implicit none
2304 type (hecmwst_local_matrix), intent(inout) :: btmat
2305 integer(kind=kint) :: nr, ndof, ndof2
2306 integer(kind=kint) :: irow, is, ie, is_new, ie_new, i, i_new
2307 integer(kind=kint) :: ndup, ndup_tot
2308 integer(kind=kint) :: js, je, js_new, je_new
2309 integer(kind=kint) :: new_nnz
2310 integer(kind=kint), allocatable :: cnt(:)
2311 integer(kind=kint), pointer :: sort_item(:), new_index(:), new_item(:)
2312 real(kind=kreal), pointer :: new_a(:)
2313 logical :: sorted
2314 real(kind=kreal) :: t0, t1
2315 t0 = hecmw_wtime()
2316 nr = btmat%nr
2317 ! check if already sorted
2318 sorted = .true.
2319 outer: do irow = 1, nr
2320 is = btmat%index(irow-1)+1
2321 ie = btmat%index(irow)
2322 do i = is, ie-1
2323 if (btmat%item(i) >= btmat%item(i+1)) then
2324 sorted = .false.
2325 exit outer
2326 endif
2327 enddo
2328 end do outer
2329 t1 = hecmw_wtime()
2330 if (timer >= 4) write(0, '(A,f10.4,L2)') "####### sort_and_uniq_rows (1) : ",t1-t0,sorted
2331 t0 = hecmw_wtime()
2332 if (sorted) return
2333 ! perform sort
2334 ndof = btmat%ndof
2335 ndof2 = ndof*ndof
2336 ! duplicate item array (sort_item)
2337 allocate(sort_item(btmat%nnz))
2338 do i = 1, btmat%nnz
2339 sort_item(i) = btmat%item(i)
2340 enddo
2341 ! sort and uniq item for each row
2342 allocate(cnt(nr))
2343 ndup_tot = 0
2344 !$omp parallel do default(none), &
2345 !$omp& schedule(dynamic,1), &
2346 !$omp& private(irow,is,ie,ndup), &
2347 !$omp& shared(nr,BTmat,sort_item,cnt), &
2348 !$omp& reduction(+:ndup_tot)
2349 do irow = 1, nr
2350 is = btmat%index(irow-1)+1
2351 ie = btmat%index(irow)
2352 call hecmw_qsort_int_array(sort_item, is, ie)
2353 call hecmw_uniq_int_array(sort_item, is, ie, ndup)
2354 cnt(irow) = (ie-is+1) - ndup
2355 ndup_tot = ndup_tot + ndup
2356 enddo
2357 !$omp end parallel do
2358 t1 = hecmw_wtime()
2359 if (timer >= 4) write(0, '(A,f10.4,I5)') "####### sort_and_uniq_rows (2) : ",t1-t0,ndup_tot
2360 t0 = hecmw_wtime()
2361 ! make new index and item array (new_index, new_item)
2362 if (ndup_tot == 0) then
2363 new_index => btmat%index
2364 new_nnz = btmat%nnz
2365 new_item => sort_item
2366 else
2367 allocate(new_index(0:nr))
2368 call make_index(nr, cnt, new_index)
2369 new_nnz = new_index(nr)
2370 allocate(new_item(new_nnz))
2371 do irow = 1, nr
2372 is = btmat%index(irow-1)+1
2373 ie = is+cnt(irow)-1
2374 is_new = new_index(irow-1)+1
2375 ie_new = is_new+cnt(irow)-1
2376 new_item(is_new:ie_new) = sort_item(is:ie)
2377 enddo
2378 deallocate(sort_item)
2379 endif
2380 deallocate(cnt)
2381 t1 = hecmw_wtime()
2382 if (timer >= 4) write(0, '(A,f10.4)') "####### sort_and_uniq_rows (3) : ",t1-t0
2383 t0 = hecmw_wtime()
2384 ! allocate and clear value array (new_A)
2385 allocate(new_a(ndof2*new_nnz))
2386 new_a(:) = 0.d0
2387 ! copy/add value from old A to new A
2388 !$omp parallel do default(none), &
2389 !$omp& schedule(dynamic,1), &
2390 !$omp& private(irow,is,ie,is_new,ie_new,i,i_new,js,je,js_new,je_new), &
2391 !$omp& shared(nr,BTmat,new_index,new_item,ndof2,new_A)
2392 do irow = 1, nr
2393 is = btmat%index(irow-1)+1
2394 ie = btmat%index(irow)
2395 is_new = new_index(irow-1)+1
2396 ie_new = new_index(irow)
2397 ! for each item in row
2398 do i = is, ie
2399 ! find place in new item
2400 call hecmw_bsearch_int_array(new_item, is_new, ie_new, btmat%item(i), i_new)
2401 if (i_new == -1) stop 'ERROR: sort_and_uniq_rows'
2402 js = ndof2*(i-1)+1
2403 je = ndof2*i
2404 js_new = ndof2*(i_new-1)+1
2405 je_new = ndof2*i_new
2406 new_a(js_new:je_new) = new_a(js_new:je_new) + btmat%A(js:je)
2407 enddo
2408 enddo
2409 !$omp end parallel do
2410 t1 = hecmw_wtime()
2411 if (timer >= 4) write(0, '(A,f10.4)') "####### sort_and_uniq_rows (4) : ",t1-t0
2412 t0 = hecmw_wtime()
2413 ! deallocate/update nnz, index, item, A
2414 if (ndup_tot == 0) then
2415 deallocate(btmat%item)
2416 btmat%item => new_item
2417 deallocate(btmat%A)
2418 btmat%A => new_a
2419 else
2420 btmat%nnz = new_nnz
2421 deallocate(btmat%index)
2422 btmat%index => new_index
2423 deallocate(btmat%item)
2424 btmat%item => new_item
2425 deallocate(btmat%A)
2426 btmat%A => new_a
2427 endif
2428 end subroutine sort_and_uniq_rows
2429
2430 subroutine hecmw_localmat_add(Amat, Bmat, Cmat)
2431 implicit none
2432 type (hecmwst_local_matrix), intent(in) :: amat
2433 type (hecmwst_local_matrix), intent(in) :: bmat
2434 type (hecmwst_local_matrix), intent(out) :: cmat
2435 integer(kind=kint) :: ndof, ndof2, nr, nc, i, icnt, js, je, j, jcol, idx, i0, k
2436 integer(kind=kint), allocatable :: iw(:)
2437 if (amat%ndof /= bmat%ndof) stop 'ERROR: hecmw_localmat_add: non-matching ndof'
2438 ndof = amat%ndof
2439 ndof2 = ndof*ndof
2440 nr = min(amat%nr, bmat%nr)
2441 nc = max(amat%nc, bmat%nc)
2442 cmat%ndof = ndof
2443 cmat%nr = nr
2444 cmat%nc = nc
2445 cmat%nnz = 0
2446 allocate(cmat%index(0:nr))
2447 cmat%index(0) = 0
2448 allocate(iw(nc))
2449 do i = 1, nr
2450 icnt = 0
2451 ! Amat
2452 js = amat%index(i-1)+1
2453 je = amat%index(i)
2454 do j = js, je
2455 jcol = amat%item(j)
2456 icnt = icnt + 1
2457 iw(icnt) = jcol
2458 enddo
2459 ! Bmat
2460 js = bmat%index(i-1)+1
2461 je = bmat%index(i)
2462 lj1: do j = js, je
2463 jcol = bmat%item(j)
2464 do k = 1, icnt
2465 if (iw(k) == jcol) cycle lj1
2466 enddo
2467 icnt = icnt + 1
2468 iw(icnt) = jcol
2469 enddo lj1
2470 cmat%index(i) = cmat%index(i-1) + icnt
2471 enddo
2472 cmat%nnz = cmat%index(nr)
2473 allocate(cmat%item(cmat%nnz))
2474 allocate(cmat%A(ndof2*cmat%nnz))
2475 do i = 1, nr
2476 i0 = cmat%index(i-1)
2477 icnt = 0
2478 ! Amat
2479 js = amat%index(i-1)+1
2480 je = amat%index(i)
2481 do j = js, je
2482 jcol = amat%item(j)
2483 icnt = icnt + 1
2484 idx = i0 + icnt
2485 cmat%item(idx) = jcol
2486 cmat%A(ndof2*(idx-1)+1:ndof2*idx) = amat%A(ndof2*(j-1)+1:ndof2*j)
2487 enddo
2488 ! Bmat
2489 js = bmat%index(i-1)+1
2490 je = bmat%index(i)
2491 lj2: do j = js, je
2492 jcol = bmat%item(j)
2493 do k = 1, icnt
2494 idx = i0 + k
2495 if (cmat%item(idx) == jcol) then
2496 cmat%A(ndof2*(idx-1)+1:ndof2*idx) = &
2497 cmat%A(ndof2*(idx-1)+1:ndof2*idx) + bmat%A(ndof2*(j-1)+1:ndof2*j)
2498 cycle lj2
2499 endif
2500 enddo
2501 icnt = icnt + 1
2502 idx = i0 + icnt
2503 cmat%item(idx) = jcol
2504 cmat%A(ndof2*(idx-1)+1:ndof2*idx) = bmat%A(ndof2*(j-1)+1:ndof2*j)
2505 enddo lj2
2506 if (i0 + icnt /= cmat%index(i)) stop 'ERROR: merge localmat'
2507 enddo
2508 call sort_and_uniq_rows(cmat)
2509 end subroutine hecmw_localmat_add
2510
2511 ! subroutine hecmw_localmat_add(Amat, Bmat, Cmat)
2512 ! implicit none
2513 ! type (hecmwST_local_matrix), intent(in) :: Amat
2514 ! type (hecmwST_local_matrix), intent(in) :: Bmat
2515 ! type (hecmwST_local_matrix), intent(out) :: Cmat
2516 ! integer(kind=kint) :: ndof, ndof2, nr, nc, i, js, je, j, jcol, nnz_row, idx, ks, ke, k, kcol
2517 ! if (Amat%ndof /= Bmat%ndof) stop 'ERROR: hecmw_localmat_add: non-matching ndof'
2518 ! ndof = Amat%ndof
2519 ! ndof2 = ndof*ndof
2520 ! nr = min(Amat%nr, Bmat%nr)
2521 ! nc = max(Amat%nc, Bmat%nc)
2522 ! Cmat%ndof = ndof
2523 ! Cmat%nr = nr
2524 ! Cmat%nc = nc
2525 ! Cmat%nnz = Amat%index(nr) + Bmat%index(nr)
2526 ! allocate(Cmat%index(0:nr))
2527 ! allocate(Cmat%item(Cmat%nnz))
2528 ! allocate(Cmat%A(ndof2 * Cmat%nnz))
2529 ! Cmat%index(0) = 0
2530 ! idx = 0
2531 ! do i = 1, nr
2532 ! ! Amat
2533 ! js = Amat%index(i-1)+1
2534 ! je = Amat%index(i)
2535 ! do j = js, je
2536 ! idx = idx + 1
2537 ! Cmat%item(idx) = Amat%item(j)
2538 ! Cmat%A(ndof2*(idx-1)+1:ndof2*idx) = Amat%A(ndof2*(j-1)+1:ndof2*j)
2539 ! enddo
2540 ! ! Bmat
2541 ! js = Bmat%index(i-1)+1
2542 ! je = Bmat%index(i)
2543 ! do j = js, je
2544 ! idx = idx + 1
2545 ! Cmat%item(idx) = Bmat%item(j)
2546 ! Cmat%A(ndof2*(idx-1)+1:ndof2*idx) = Bmat%A(ndof2*(j-1)+1:ndof2*j)
2547 ! enddo
2548 ! Cmat%index(i) = idx
2549 ! enddo
2550 ! if (Cmat%index(nr) /= Cmat%nnz) stop 'ERROR: merge localmat'
2551 ! call sort_and_uniq_rows(Cmat)
2552 ! end subroutine hecmw_localmat_add
2553
2554 subroutine hecmw_localmat_init_with_hecmat(BKmat, hecMAT, num_lagrange)
2555 implicit none
2556 type (hecmwst_local_matrix), intent(inout) :: bkmat
2557 type (hecmwst_matrix), intent(in) :: hecmat
2558 integer(kind=kint), optional, intent(in) :: num_lagrange
2559 integer(kind=kint) :: ndof, ndof2, i, idx, idx2, js, je, j, k
2560 integer(kind=kint), allocatable :: incl_nz(:), cnt(:)
2561 logical :: check_nonzero
2562 !check_nonzero = .false.
2563 check_nonzero = .true. !!! always checking nonzero seems to be faster
2564 !
2565 ndof = hecmat%NDOF
2566 ndof2 = ndof*ndof
2567 ! nr, nc, nnz
2568 bkmat%nr = hecmat%NP
2569 bkmat%nc = hecmat%NP
2570 bkmat%ndof = ndof
2571 !
2572 if (present(num_lagrange)) then !!! TEMPORARY (DUE TO WRONG conMAT WHEN num_lagrange==0) !!!
2573 if (num_lagrange == 0) then
2574 bkmat%nnz = 0
2575 allocate(bkmat%index(0:bkmat%nr))
2576 bkmat%index(:) = 0
2577 bkmat%item => null()
2578 bkmat%A => null()
2579 return
2580 endif
2581 check_nonzero = .true.
2582 endif
2583 !
2584 if (check_nonzero) then
2585 allocate(incl_nz(hecmat%NPL + hecmat%NPU + hecmat%NP))
2586 allocate(cnt(bkmat%nr))
2587 incl_nz(:) = 0
2588 !$omp parallel default(none), &
2589 !$omp& private(i,idx,js,je,j,k), &
2590 !$omp& shared(BKmat,hecMAT,cnt,ndof2,incl_nz)
2591 !$omp do
2592 do i = 1, bkmat%nr
2593 idx = hecmat%indexL(i-1) + (i-1) + hecmat%indexU(i-1)
2594 cnt(i) = 0
2595 ! lower
2596 js = hecmat%indexL(i-1)+1
2597 je = hecmat%indexL(i)
2598 do j = js, je
2599 idx = idx + 1
2600 do k = 1, ndof2
2601 if (hecmat%AL(ndof2*(j-1)+k) /= 0.0d0) then
2602 incl_nz(idx) = 1
2603 cnt(i) = cnt(i) + 1
2604 exit
2605 endif
2606 enddo
2607 enddo
2608 ! diag
2609 idx = idx + 1
2610 do k = 1, ndof2
2611 if (hecmat%D(ndof2*(i-1)+k) /= 0.0d0) then
2612 incl_nz(idx) = 1
2613 cnt(i) = cnt(i) + 1
2614 exit
2615 endif
2616 enddo
2617 ! upper
2618 js = hecmat%indexU(i-1)+1
2619 je = hecmat%indexU(i)
2620 do j = js, je
2621 idx = idx + 1
2622 do k = 1, ndof2
2623 if (hecmat%AU(ndof2*(j-1)+k) /= 0.0d0) then
2624 incl_nz(idx) = 1
2625 cnt(i) = cnt(i) + 1
2626 exit
2627 endif
2628 enddo
2629 enddo
2630 if (idx /= hecmat%indexL(i) + i + hecmat%indexU(i)) stop 'ERROR: hecmw_localmat_init_with_hecmat: count'
2631 enddo
2632 !$omp end do
2633 !$omp end parallel
2634 ! index
2635 allocate(bkmat%index(0:bkmat%nr))
2636 call make_index(bkmat%nr, cnt, bkmat%index)
2637 deallocate(cnt)
2638 bkmat%nnz = bkmat%index(bkmat%nr)
2639 ! item, A
2640 allocate(bkmat%item(bkmat%nnz))
2641 allocate(bkmat%A(ndof2 * bkmat%nnz))
2642 !$omp parallel default(none), &
2643 !$omp& private(i,idx,idx2,js,je,j), &
2644 !$omp& shared(BKmat,hecMAT,ndof2,incl_nz)
2645 !$omp do
2646 do i = 1, bkmat%nr
2647 idx = hecmat%indexL(i-1) + (i-1) + hecmat%indexU(i-1)
2648 idx2 = bkmat%index(i-1)
2649 ! lower
2650 js = hecmat%indexL(i-1)+1
2651 je = hecmat%indexL(i)
2652 do j = js, je
2653 idx = idx + 1
2654 if (incl_nz(idx) == 1) then
2655 idx2 = idx2 + 1
2656 bkmat%item(idx2) = hecmat%itemL(j)
2657 bkmat%A(ndof2*(idx2-1)+1:ndof2*idx2) = hecmat%AL(ndof2*(j-1)+1:ndof2*j)
2658 endif
2659 enddo
2660 ! diag
2661 idx = idx + 1
2662 if (incl_nz(idx) == 1) then
2663 idx2 = idx2 + 1
2664 bkmat%item(idx2) = i
2665 bkmat%A(ndof2*(idx2-1)+1:ndof2*idx2) = hecmat%D(ndof2*(i-1)+1:ndof2*i)
2666 endif
2667 ! upper
2668 js = hecmat%indexU(i-1)+1
2669 je = hecmat%indexU(i)
2670 do j = js, je
2671 idx = idx + 1
2672 if (incl_nz(idx) == 1) then
2673 idx2 = idx2 + 1
2674 bkmat%item(idx2) = hecmat%itemU(j)
2675 bkmat%A(ndof2*(idx2-1)+1:ndof2*idx2) = hecmat%AU(ndof2*(j-1)+1:ndof2*j)
2676 endif
2677 enddo
2678 if (idx /= hecmat%indexL(i) + i + hecmat%indexU(i)) stop 'ERROR: hecmw_localmat_init_with_hecmat: copy'
2679 if (idx2 /= bkmat%index(i)) stop 'ERROR: hecmw_localmat_init_with_hecmat: index'
2680 enddo
2681 !$omp end do
2682 !$omp end parallel
2683 deallocate(incl_nz)
2684 else
2685 bkmat%nnz = hecmat%NPL + hecmat%NP + hecmat%NPU
2686 allocate(bkmat%index(0:bkmat%nr))
2687 allocate(bkmat%item(bkmat%nnz))
2688 allocate(bkmat%A(ndof2 * bkmat%nnz))
2689 bkmat%index(0) = 0
2690 !$omp parallel do default(none), &
2691 !$omp& private(i,idx,js,je,j), &
2692 !$omp& shared(BKmat,hecMAT,ndof2)
2693 do i = 1, bkmat%nr
2694 idx = hecmat%indexL(i-1) + (i-1) + hecmat%indexU(i-1)
2695 ! lower
2696 js = hecmat%indexL(i-1)+1
2697 je = hecmat%indexL(i)
2698 do j = js, je
2699 idx = idx + 1
2700 bkmat%item(idx) = hecmat%itemL(j)
2701 bkmat%A(ndof2*(idx-1)+1:ndof2*idx) = hecmat%AL(ndof2*(j-1)+1:ndof2*j)
2702 enddo
2703 ! diag
2704 idx = idx + 1
2705 bkmat%item(idx) = i
2706 bkmat%A(ndof2*(idx-1)+1:ndof2*idx) = hecmat%D(ndof2*(i-1)+1:ndof2*i)
2707 ! upper
2708 js = hecmat%indexU(i-1)+1
2709 je = hecmat%indexU(i)
2710 do j = js, je
2711 idx = idx + 1
2712 bkmat%item(idx) = hecmat%itemU(j)
2713 bkmat%A(ndof2*(idx-1)+1:ndof2*idx) = hecmat%AU(ndof2*(j-1)+1:ndof2*j)
2714 enddo
2715 bkmat%index(i) = idx
2716 if (idx /= hecmat%indexL(i) + i + hecmat%indexU(i)) stop 'ERROR: hecmw_localmat_init_with_hecmat: copy'
2717 enddo
2718 !$omp end parallel do
2719 endif
2720 end subroutine hecmw_localmat_init_with_hecmat
2721
2722 subroutine hecmw_localmat_add_hecmat(BKmat, hecMAT)
2723 implicit none
2724 type (hecmwst_local_matrix), intent(inout) :: bkmat
2725 type (hecmwst_matrix), intent(in) :: hecmat
2726 type (hecmwst_local_matrix) :: w1mat, w2mat
2727 !! Should Be Simple If Non-Zero Profile Is Kept !!
2728 call hecmw_localmat_init_with_hecmat(w1mat, hecmat)
2729 if (debug >= 3) then
2730 write(700+hecmw_comm_get_rank(),*) 'BKmat (hecMAT)'
2731 if (debug == 3) then
2732 call hecmw_localmat_write_ij(w1mat, 700+hecmw_comm_get_rank())
2733 else
2734 call hecmw_localmat_write(w1mat, 700+hecmw_comm_get_rank())
2735 endif
2736 endif
2737 call hecmw_localmat_add(bkmat, w1mat, w2mat)
2738 call hecmw_localmat_free(bkmat)
2739 call hecmw_localmat_free(w1mat)
2740 bkmat%nr = w2mat%nr
2741 bkmat%nc = w2mat%nc
2742 bkmat%nnz = w2mat%nnz
2743 bkmat%ndof = w2mat%ndof
2744 bkmat%index => w2mat%index
2745 bkmat%item => w2mat%item
2746 bkmat%A => w2mat%A
2747 end subroutine hecmw_localmat_add_hecmat
2748
2749 subroutine hecmw_localmat_multmat(BKmat, BTmat, hecMESH, BKTmat)
2750 implicit none
2751 type (hecmwst_local_matrix), intent(inout) :: bkmat
2752 type (hecmwst_local_matrix), intent(inout) :: btmat
2753 type (hecmwst_local_mesh), intent(inout) :: hecmesh
2754 type (hecmwst_local_matrix), intent(out) :: bktmat
2755 type (hecmwst_matrix_comm) :: heccomm
2756 type (hecmwst_local_mesh) :: hecmeshnew
2757 type (hecmwst_local_matrix), allocatable :: bt_exp(:)
2758 type (hecmwst_local_matrix) :: bt_imp, bt_all
2759 integer(kind=kint), allocatable :: exp_cols_index(:)
2760 integer(kind=kint), allocatable :: exp_cols_item(:,:)
2761 real(kind=kreal) :: t0, t1
2762 t0 = hecmw_wtime()
2763 !
2764 call make_comm_table(bkmat, hecmesh, heccomm)
2765 if (debug >= 1) write(0,*) 'DEBUG: hecmw_localmat_multmat: make_comm_table done'
2766 t1 = hecmw_wtime()
2767 if (timer >= 2) write(0,'(A,f10.4)') '##### hecmw_localmat_multmat (1) : ',t1-t0
2768 t0 = hecmw_wtime()
2769 !
2770 if (btmat%nr > hecmesh%nn_internal) then
2771 ! consider only internal part of BTmat
2772 if (debug >= 1) write(0,'(A)') 'DEBUG: hecmw_localmat_multmat: ignore external part of BTmat'
2773 btmat%nr = hecmesh%nn_internal
2774 btmat%nnz = btmat%index(btmat%nr)
2775 endif
2776 !
2777 call extract_bt_exp(btmat, heccomm, bt_exp)
2778 if (debug >= 1) write(0,*) 'DEBUG: hecmw_localmat_multmat: extract_BT_exp done'
2779 t1 = hecmw_wtime()
2780 if (timer >= 2) write(0,'(A,f10.4)') '##### hecmw_localmat_multmat (2) : ',t1-t0
2781 t0 = hecmw_wtime()
2782 !
2783 call prepare_column_info(hecmesh, bt_exp, exp_cols_index, exp_cols_item)
2784 if (debug >= 1) write(0,*) 'DEBUG: hecmw_localmat_multmat: prepare column info done'
2785 t1 = hecmw_wtime()
2786 if (timer >= 2) write(0,'(A,f10.4)') '##### hecmw_localmat_multmat (3) : ',t1-t0
2787 t0 = hecmw_wtime()
2788 !
2789 call send_bt_exp_and_recv_bt_imp(hecmesh, heccomm, bt_exp, exp_cols_index, exp_cols_item, bt_imp, hecmeshnew)
2790 if (debug >= 1) write(0,*) 'DEBUG: hecmw_localmat_multmat: send BT_exp and recv BT_imp done'
2791 t1 = hecmw_wtime()
2792 if (timer >= 2) write(0,'(A,f10.4)') '##### hecmw_localmat_multmat (4) : ',t1-t0
2793 t0 = hecmw_wtime()
2794 call free_comm_table(heccomm)
2795 !
2796 call concat_btmat_and_bt_imp(btmat, bt_imp, bt_all)
2797 if (debug >= 1) write(0,*) 'DEBUG: hecmw_localmat_multmat: concat BTmat and BT_imp into BT_all done'
2798 t1 = hecmw_wtime()
2799 if (timer >= 2) write(0,'(A,f10.4)') '##### hecmw_localmat_multmat (5) : ',t1-t0
2800 t0 = hecmw_wtime()
2801 call hecmw_localmat_free(bt_imp)
2802 !
2803 call multiply_mat_mat(bkmat, bt_all, bktmat)
2804 if (debug >= 1) write(0,*) 'DEBUG: hecmw_localmat_multmat: multiply BKmat and BT_all into BKTmat done'
2805 t1 = hecmw_wtime()
2806 if (timer >= 2) write(0,'(A,f10.4)') '##### hecmw_localmat_multmat (6) : ',t1-t0
2807 t0 = hecmw_wtime()
2808 call hecmw_localmat_free(bt_all)
2809 !
2810 if (hecmesh%n_neighbor_pe > 0) then
2811 hecmesh%n_node = hecmeshnew%n_node
2812 hecmesh%n_neighbor_pe = hecmeshnew%n_neighbor_pe
2813 deallocate(hecmesh%neighbor_pe)
2814 deallocate(hecmesh%import_index)
2815 deallocate(hecmesh%export_index)
2816 deallocate(hecmesh%import_item)
2817 deallocate(hecmesh%export_item)
2818 deallocate(hecmesh%node_ID)
2819 deallocate(hecmesh%global_node_ID)
2820 hecmesh%neighbor_pe => hecmeshnew%neighbor_pe
2821 hecmesh%import_index => hecmeshnew%import_index
2822 hecmesh%export_index => hecmeshnew%export_index
2823 hecmesh%import_item => hecmeshnew%import_item
2824 hecmesh%export_item => hecmeshnew%export_item
2825 hecmesh%node_ID => hecmeshnew%node_ID
2826 hecmesh%global_node_ID => hecmeshnew%global_node_ID
2827 if (debug >= 1) write(0,*) 'DEBUG: hecmw_localmat_multmat: update hecMESH done'
2828 t1 = hecmw_wtime()
2829 if (timer >= 2) write(0,'(A,f10.4)') '##### hecmw_localmat_multmat (7) : ',t1-t0
2830 endif
2831 end subroutine hecmw_localmat_multmat
2832
2833 subroutine make_comm_table(BKmat, hecMESH, hecCOMM)
2834 use m_hecmw_comm_f
2835 implicit none
2836 type (hecmwst_local_matrix), intent(in) :: bkmat
2837 type (hecmwst_local_mesh), intent(in) :: hecmesh
2838 type (hecmwst_matrix_comm), intent(out) :: heccomm
2839 integer(kind=kint) :: nn_int, nn_ext, nnb, i, icol, irank, idom, idx, n_send, tag, js, je, len
2840 integer(kind=kint), allocatable :: is_nz_col(:), imp_cnt(:), exp_cnt(:), import_item_remote(:)
2841 integer(kind=kint), allocatable :: requests(:), statuses(:,:)
2842 heccomm%zero = hecmesh%zero
2843 heccomm%HECMW_COMM = hecmesh%MPI_COMM
2844 heccomm%PETOT = hecmesh%PETOT
2845 heccomm%PEsmpTOT = hecmesh%PEsmpTOT
2846 heccomm%my_rank = hecmesh%my_rank
2847 heccomm%errnof = hecmesh%errnof
2848 heccomm%n_subdomain = hecmesh%n_subdomain
2849 heccomm%n_neighbor_pe = hecmesh%n_neighbor_pe
2850 allocate(heccomm%neighbor_pe(heccomm%n_neighbor_pe))
2851 heccomm%neighbor_pe(:) = hecmesh%neighbor_pe(:)
2852 !
2853 nn_int = hecmesh%nn_internal
2854 nn_ext = hecmesh%n_node - hecmesh%nn_internal
2855 nnb = heccomm%n_neighbor_pe
2856 !
2857 ! check_external_nz_cols (by profile (not number))
2858 allocate(is_nz_col(nn_ext))
2859 is_nz_col(:) = 0
2860 do i = 1, bkmat%index(nn_int)
2861 icol = bkmat%item(i)
2862 if (icol > nn_int) is_nz_col(icol - nn_int) = 1
2863 enddo
2864 !
2865 ! count_nz_cols_per_rank
2866 allocate(imp_cnt(nnb))
2867 imp_cnt(:) = 0
2868 do i = 1, nn_ext
2869 if (is_nz_col(i) == 1) then
2870 irank = hecmesh%node_ID(2*(nn_int+i))
2871 call rank_to_idom(hecmesh, irank, idom)
2872 imp_cnt(idom) = imp_cnt(idom) + 1
2873 endif
2874 enddo
2875 if (debug >= 3) write(0,*) ' DEBUG3: imp_cnt',imp_cnt(:)
2876 !
2877 ! make_index
2878 allocate(heccomm%import_index(0:nnb))
2879 call make_index(nnb, imp_cnt, heccomm%import_index)
2880 if (debug >= 3) write(0,*) ' DEBUG3: import_index',heccomm%import_index(:)
2881 !
2882 ! fill item
2883 allocate(heccomm%import_item(heccomm%import_index(nnb)))
2884 imp_cnt(:) = 0
2885 do i = 1, nn_ext
2886 if (is_nz_col(i) == 1) then
2887 irank = hecmesh%node_ID(2*(nn_int+i))
2888 call rank_to_idom(hecmesh, irank, idom)
2889 imp_cnt(idom) = imp_cnt(idom) + 1
2890 idx = heccomm%import_index(idom-1)+imp_cnt(idom)
2891 heccomm%import_item(idx) = nn_int+i
2892 endif
2893 enddo
2894 if (debug >= 3) write(0,*) ' DEBUG3: import_item',heccomm%import_item(:)
2895 !
2896 allocate(import_item_remote(heccomm%import_index(nnb)))
2897 do i = 1, heccomm%import_index(nnb)
2898 import_item_remote(i) = hecmesh%node_ID(2*heccomm%import_item(i)-1)
2899 enddo
2900 if (debug >= 3) write(0,*) ' DEBUG3: import_item_remote',import_item_remote(:)
2901 !
2902 allocate(requests(2*nnb))
2903 allocate(statuses(hecmw_status_size, 2*nnb))
2904 !
2905 ! send/recv
2906 n_send = 0
2907 do idom = 1, nnb
2908 irank = heccomm%neighbor_pe(idom)
2909 n_send = n_send + 1
2910 tag = 6001
2911 call hecmw_isend_int(imp_cnt(idom), 1, irank, tag, heccomm%HECMW_COMM, requests(n_send))
2912 if (imp_cnt(idom) > 0) then
2913 js = heccomm%import_index(idom-1)+1
2914 je = heccomm%import_index(idom)
2915 len = je-js+1
2916 n_send = n_send + 1
2917 tag = 6002
2918 call hecmw_isend_int(import_item_remote(js:je), len, irank, tag, &
2919 heccomm%HECMW_COMM, requests(n_send))
2920 endif
2921 enddo
2922 !
2923 ! index
2924 allocate(exp_cnt(nnb))
2925 do idom = 1, nnb
2926 irank = heccomm%neighbor_pe(idom)
2927 tag = 6001
2928 call hecmw_recv_int(exp_cnt(idom), 1, irank, tag, heccomm%HECMW_COMM, statuses(:,1))
2929 enddo
2930 allocate(heccomm%export_index(0:nnb))
2931 call make_index(nnb, exp_cnt, heccomm%export_index)
2932 if (debug >= 3) write(0,*) ' DEBUG3: export_index',heccomm%export_index(:)
2933 !
2934 ! item
2935 allocate(heccomm%export_item(heccomm%export_index(nnb)))
2936 do idom = 1, nnb
2937 if (exp_cnt(idom) <= 0) cycle
2938 irank = heccomm%neighbor_pe(idom)
2939 js = heccomm%export_index(idom-1)+1
2940 je = heccomm%export_index(idom)
2941 len = je-js+1
2942 tag = 6002
2943 call hecmw_recv_int(heccomm%export_item(js:je), len, irank, tag, &
2944 heccomm%HECMW_COMM, statuses(:,1))
2945 enddo
2946 if (debug >= 3) write(0,*) ' DEBUG3: export_item',heccomm%export_item(:)
2947 call hecmw_waitall(n_send, requests, statuses)
2948 !
2949 deallocate(imp_cnt)
2950 deallocate(exp_cnt)
2951 deallocate(import_item_remote)
2952 end subroutine make_comm_table
2953
2954 subroutine free_comm_table(hecCOMM)
2955 implicit none
2956 type (hecmwst_matrix_comm), intent(inout) :: heccomm
2957 deallocate(heccomm%neighbor_pe)
2958 deallocate(heccomm%import_index)
2959 deallocate(heccomm%import_item)
2960 deallocate(heccomm%export_index)
2961 deallocate(heccomm%export_item)
2962 end subroutine free_comm_table
2963
2964 subroutine extract_bt_exp(BTmat, hecCOMM, BT_exp)
2965 implicit none
2966 type (hecmwst_local_matrix), intent(in) :: btmat
2967 type (hecmwst_matrix_comm), intent(in) :: heccomm
2968 type (hecmwst_local_matrix), allocatable, intent(out) :: bt_exp(:)
2969 integer(kind=kint) :: ndof, ndof2, idom, idx_0, idx_n, j, jrow, nnz_row, idx, ks, ke, k
2970 if (heccomm%n_neighbor_pe == 0) return
2971 allocate(bt_exp(heccomm%n_neighbor_pe))
2972 ndof = btmat%ndof
2973 ndof2 = ndof * ndof
2974 do idom = 1, heccomm%n_neighbor_pe
2975 idx_0 = heccomm%export_index(idom-1)
2976 idx_n = heccomm%export_index(idom)
2977 bt_exp(idom)%nr = idx_n - idx_0
2978 bt_exp(idom)%nc = btmat%nc
2979 bt_exp(idom)%nnz = 0
2980 bt_exp(idom)%ndof = ndof
2981 allocate(bt_exp(idom)%index(0:bt_exp(idom)%nr))
2982 bt_exp(idom)%index(0) = 0
2983 do j = 1, bt_exp(idom)%nr
2984 jrow = heccomm%export_item(idx_0 + j)
2985 nnz_row = btmat%index(jrow) - btmat%index(jrow-1)
2986 bt_exp(idom)%index(j) = bt_exp(idom)%index(j-1) + nnz_row
2987 enddo
2988 bt_exp(idom)%nnz = bt_exp(idom)%index(bt_exp(idom)%nr)
2989 allocate(bt_exp(idom)%item(bt_exp(idom)%nnz))
2990 allocate(bt_exp(idom)%A(ndof2 * bt_exp(idom)%nnz))
2991 idx = 0
2992 do j = 1, bt_exp(idom)%nr
2993 jrow = heccomm%export_item(idx_0 + j)
2994 ks = btmat%index(jrow-1) + 1
2995 ke = btmat%index(jrow)
2996 do k = ks, ke
2997 idx = idx + 1
2998 bt_exp(idom)%item(idx) = btmat%item(k)
2999 bt_exp(idom)%A(ndof2*(idx-1)+1:ndof2*idx) = btmat%A(ndof2*(k-1)+1:ndof2*k)
3000 enddo
3001 if (idx /= bt_exp(idom)%index(j)) stop 'ERROR: extract BT_exp'
3002 enddo
3003 enddo
3004 end subroutine extract_bt_exp
3005
3006 subroutine send_bt_exp_and_recv_bt_imp(hecMESH, hecCOMM, BT_exp, exp_cols_index, exp_cols_item, BT_imp, hecMESHnew)
3007 use m_hecmw_comm_f
3008 implicit none
3009 type (hecmwst_local_mesh), intent(in) :: hecmesh
3010 type (hecmwst_matrix_comm), intent(in) :: heccomm
3011 type (hecmwst_local_matrix), allocatable, intent(inout) :: bt_exp(:)
3012 integer(kind=kint), allocatable, intent(inout) :: exp_cols_index(:)
3013 integer(kind=kint), allocatable, intent(inout) :: exp_cols_item(:,:)
3014 type (hecmwst_local_matrix), intent(out) :: bt_imp
3015 type (hecmwst_local_mesh), intent(inout) :: hecmeshnew
3016 integer(kind=kint), allocatable :: nnz_imp(:), cnt(:), index_imp(:)
3017 integer(kind=kint), allocatable :: imp_cols_index(:)
3018 integer(kind=kint), allocatable :: imp_cols_item(:,:)
3019 real(kind=kreal), allocatable :: imp_vals_item(:)
3020 integer(kind=kint) :: nnb, ndof, ndof2, idom, irank, nr, n_send, tag, idx_0, idx_n, j, jj, nnz
3021 integer(kind=kint), allocatable :: requests(:)
3022 integer(kind=kint), allocatable :: statuses(:,:)
3023 integer(kind=kint), allocatable :: map(:), add_nodes(:,:)
3024 integer(kind=kint) :: n_add_node, i0
3025 nnb = heccomm%n_neighbor_pe
3026 if (nnb == 0) then
3027 bt_imp%nr = 0
3028 bt_imp%nc = 0
3029 bt_imp%nnz = 0
3030 bt_imp%ndof = 0
3031 allocate(bt_imp%index(0:0))
3032 bt_imp%index(0) = 0
3033 return
3034 endif
3035 ndof = bt_exp(1)%ndof
3036 ndof2 = ndof*ndof
3037 allocate(requests(nnb*3))
3038 allocate(statuses(hecmw_status_size, nnb*3))
3039 n_send = 0
3040 do idom = 1, nnb
3041 irank = heccomm%neighbor_pe(idom)
3042 nr = bt_exp(idom)%nr
3043 if (nr == 0) cycle
3044 n_send = n_send + 1
3045 tag = 3001
3046 call hecmw_isend_int(bt_exp(idom)%index(0:bt_exp(idom)%nr), bt_exp(idom)%nr + 1, &
3047 irank, tag, heccomm%HECMW_COMM, requests(n_send))
3048 if (bt_exp(idom)%nnz == 0) cycle
3049 n_send = n_send + 1
3050 tag = 3002
3051 call hecmw_isend_int(exp_cols_item(1,exp_cols_index(idom-1)+1), &
3052 cncol_item * bt_exp(idom)%nnz, irank, tag, heccomm%HECMW_COMM, requests(n_send))
3053 n_send = n_send + 1
3054 tag = 3003
3055 call hecmw_isend_r(bt_exp(idom)%A, ndof2 * bt_exp(idom)%nnz, &
3056 irank, tag, heccomm%HECMW_COMM, requests(n_send))
3057 enddo
3058 !
3059 ! BT_imp%nr = hecCOMM%import_index(nnb)
3060 bt_imp%nr = hecmesh%n_node - hecmesh%nn_internal
3061 bt_imp%nc = 0 !!! TEMPORARY
3062 bt_imp%nnz = 0
3063 bt_imp%ndof = ndof
3064 !
3065 allocate(nnz_imp(nnb))
3066 allocate(cnt(bt_imp%nr))
3067 !
3068 cnt(:) = 0
3069 do idom = 1, nnb
3070 irank = heccomm%neighbor_pe(idom)
3071 idx_0 = heccomm%import_index(idom-1)
3072 idx_n = heccomm%import_index(idom)
3073 nr = idx_n - idx_0
3074 if (nr == 0) then
3075 nnz_imp(idom) = 0
3076 cycle
3077 endif
3078 allocate(index_imp(0:nr))
3079 tag = 3001
3080 call hecmw_recv_int(index_imp(0:nr), nr+1, irank, tag, &
3081 heccomm%HECMW_COMM, statuses(:,1))
3082 nnz_imp(idom) = index_imp(nr)
3083 do j = 1, nr
3084 jj = heccomm%import_item(idx_0 + j) - hecmesh%nn_internal
3085 if (jj < 1 .or. bt_imp%nr < jj) stop 'ERROR: jj out of range'
3086 if (cnt(jj) /= 0) stop 'ERROR: duplicate import rows?'
3087 cnt(jj) = index_imp(j) - index_imp(j-1)
3088 enddo
3089 deallocate(index_imp)
3090 enddo
3091 !
3092 allocate(imp_cols_index(0:nnb))
3093 call make_index(nnb, nnz_imp, imp_cols_index)
3094 deallocate(nnz_imp)
3095 !
3096 allocate(bt_imp%index(0:bt_imp%nr))
3097 call make_index(bt_imp%nr, cnt, bt_imp%index)
3098 deallocate(cnt)
3099 !
3100 bt_imp%nnz = bt_imp%index(bt_imp%nr)
3101 if (bt_imp%nnz /= imp_cols_index(nnb)) &
3102 stop 'ERROR: total num of nonzero of BT_imp'
3103 !
3104 allocate(imp_cols_item(cncol_item, bt_imp%nnz))
3105 allocate(imp_vals_item(ndof2 * bt_imp%nnz))
3106 !
3107 do idom = 1, nnb
3108 irank = heccomm%neighbor_pe(idom)
3109 idx_0 = imp_cols_index(idom-1)
3110 idx_n = imp_cols_index(idom)
3111 nnz = idx_n - idx_0
3112 if (nnz == 0) cycle
3113 tag = 3002
3114 call hecmw_recv_int(imp_cols_item(1, idx_0 + 1), cncol_item * nnz, &
3115 irank, tag, heccomm%HECMW_COMM, statuses(:,1))
3116 tag = 3003
3117 call hecmw_recv_r(imp_vals_item(ndof2*idx_0 + 1), ndof2 * nnz, &
3118 irank, tag, heccomm%HECMW_COMM, statuses(:,1))
3119 enddo
3120 call hecmw_waitall(n_send, requests, statuses)
3121 if (debug >= 2) write(0,*) ' DEBUG2: send BT_imp and recv into temporary data done'
3122 !
3123 deallocate(requests)
3124 deallocate(statuses)
3125 !
3126 do idom = 1, nnb
3127 call hecmw_localmat_free(bt_exp(idom))
3128 enddo
3129 deallocate(bt_exp)
3130 deallocate(exp_cols_index)
3131 deallocate(exp_cols_item)
3132 !
3133 call copy_mesh(hecmesh, hecmeshnew)
3134 !
3135 call map_imported_cols(hecmeshnew, imp_cols_index(nnb), imp_cols_item, n_add_node, add_nodes, map, i0)
3136 if (debug >= 2) write(0,*) ' DEBUG2: map imported cols done'
3137 !
3138 call update_comm_table(hecmeshnew, n_add_node, add_nodes, i0)
3139 if (debug >= 2) write(0,*) ' DEBUG2: update comm_table done'
3140 !
3141 bt_imp%nc = hecmeshnew%n_node
3142 !
3143 allocate(bt_imp%item(bt_imp%nnz))
3144 allocate(bt_imp%A(ndof2 * bt_imp%nnz))
3145 call copy_vals_to_bt_imp(heccomm, hecmesh%nn_internal, imp_cols_index, map, imp_vals_item, bt_imp)
3146 if (debug >= 2) write(0,*) ' DEBUG2: copy vals to BT_imp done'
3147 !
3148 deallocate(imp_cols_index)
3149 deallocate(imp_cols_item)
3150 deallocate(imp_vals_item)
3151 deallocate(map)
3152 end subroutine send_bt_exp_and_recv_bt_imp
3153
3154 subroutine copy_vals_to_bt_imp(hecCOMM, nn_internal, imp_cols_index, map, imp_vals_item, BT_imp)
3155 implicit none
3156 type (hecmwst_matrix_comm), intent(in) :: heccomm
3157 integer(kind=kint), intent(in) :: nn_internal
3158 integer(kind=kint), allocatable, intent(in) :: imp_cols_index(:)
3159 integer(kind=kint), intent(in) :: map(:)
3160 real(kind=kreal), intent(in) :: imp_vals_item(:)
3161 type (hecmwst_local_matrix), intent(inout) :: bt_imp
3162 integer(kind=kint) :: nnb, ndof2, idx, idom, idx_0, idx_n, nr, j, jrow, ks, ke, k
3163 nnb = heccomm%n_neighbor_pe
3164 ndof2 = bt_imp%ndof ** 2
3165 idx = 0
3166 do idom = 1, nnb
3167 idx_0 = heccomm%import_index(idom-1)
3168 idx_n = heccomm%import_index(idom)
3169 nr = idx_n - idx_0
3170 if (nr == 0) cycle
3171 do j = 1, nr
3172 jrow = heccomm%import_item(idx_0 + j) - nn_internal
3173 ks = bt_imp%index(jrow-1)+1
3174 ke = bt_imp%index(jrow)
3175 do k = ks, ke
3176 idx = idx + 1
3177 bt_imp%item(k) = map(idx)
3178 bt_imp%A(ndof2*(k-1)+1:ndof2*k) = imp_vals_item(ndof2*(idx-1)+1:ndof2*idx)
3179 enddo
3180 enddo
3181 if (idx /= imp_cols_index(idom)) stop 'ERROR: copy vals to BT_imp'
3182 enddo
3183 end subroutine copy_vals_to_bt_imp
3184
3185 subroutine concat_btmat_and_bt_imp(BTmat, BT_imp, BT_all)
3186 implicit none
3187 type (hecmwst_local_matrix), intent(in) :: btmat
3188 type (hecmwst_local_matrix), intent(in) :: bt_imp
3189 type (hecmwst_local_matrix), intent(out) :: bt_all
3190 integer(kind=kint) :: ndof, ndof2, i, ii
3191 ndof = btmat%ndof
3192 if (bt_imp%nr > 0 .and. bt_imp%ndof /= ndof) stop 'ERROR: concat BTmat and BT_imp: ndof'
3193 ndof2 = ndof*ndof
3194 bt_all%nr = btmat%nr + bt_imp%nr
3195 bt_all%nc = max(btmat%nc, bt_imp%nc)
3196 bt_all%nnz = btmat%nnz + bt_imp%nnz
3197 bt_all%ndof = ndof
3198 allocate(bt_all%index(0:bt_all%nr))
3199 allocate(bt_all%item(bt_all%nnz))
3200 allocate(bt_all%A(ndof2 * bt_all%nnz))
3201 bt_all%index(0) = 0
3202 do i = 1, btmat%nr
3203 bt_all%index(i) = btmat%index(i)
3204 enddo
3205 do i = 1, bt_imp%nr
3206 bt_all%index(btmat%nr+i) = bt_all%index(btmat%nr+i-1) + &
3207 bt_imp%index(i) - bt_imp%index(i-1)
3208 enddo
3209 do i = 1, btmat%nnz
3210 bt_all%item(i) = btmat%item(i)
3211 bt_all%A(ndof2*(i-1)+1:ndof2*i) = btmat%A(ndof2*(i-1)+1:ndof2*i)
3212 enddo
3213 do i = 1, bt_imp%nnz
3214 ii = btmat%nnz + i
3215 bt_all%item(ii) = bt_imp%item(i)
3216 bt_all%A(ndof2*(ii-1)+1:ndof2*ii) = bt_imp%A(ndof2*(i-1)+1:ndof2*i)
3217 enddo
3218 end subroutine concat_btmat_and_bt_imp
3219
3220 subroutine multiply_mat_mat(Amat, Bmat, Cmat)
3221 implicit none
3222 type (hecmwst_local_matrix), intent(in) :: amat
3223 type (hecmwst_local_matrix), intent(in) :: bmat
3224 type (hecmwst_local_matrix), intent(out) :: cmat
3225 integer(kind=kint) :: ndof, ndof2, nr, nc, nnz, i, icnt
3226 integer(kind=kint) :: js, je, j, jj, ks, ke, k, kk, l, ll, l0
3227 integer(kind=kint), allocatable :: iw(:)
3228 real(kind=kreal), pointer :: ap(:), bp(:), cp(:)
3229 real(kind=kreal) :: t0, t1
3230 t0 = hecmw_wtime()
3231 if (amat%ndof /= bmat%ndof) stop 'ERROR: multiply_mat_mat: unmatching ndof'
3232 ndof = amat%ndof
3233 ndof2 = ndof*ndof
3234 nr = amat%nr
3235 nc = bmat%nc
3236 if (amat%nc /= bmat%nr) then
3237 write(0,*) 'Amat: nr, nc = ', amat%nr, amat%nc
3238 write(0,*) 'Bmat: nr, nc = ', bmat%nr, bmat%nc
3239 stop 'ERROR: multiply_mat_mat: unmatching size'
3240 endif
3241 cmat%ndof = ndof
3242 cmat%nr = nr
3243 cmat%nc = nc
3244 allocate(cmat%index(0:nr))
3245 cmat%index(0) = 0
3246 !$omp parallel default(none), &
3247 !$omp& private(iw,i,icnt,js,je,j,jj,ks,ke,k,kk,l), &
3248 !$omp& shared(nr,nc,Amat,Bmat,Cmat)
3249 allocate(iw(nc))
3250 !$omp do
3251 do i = 1, nr
3252 icnt = 0
3253 js = amat%index(i-1)+1
3254 je = amat%index(i)
3255 do j = js, je
3256 jj = amat%item(j)
3257 ks = bmat%index(jj-1)+1
3258 ke = bmat%index(jj)
3259 kl1: do k = ks, ke
3260 kk = bmat%item(k)
3261 do l = 1, icnt
3262 if (iw(l) == kk) cycle kl1
3263 enddo
3264 icnt = icnt + 1
3265 iw(icnt) = kk
3266 enddo kl1
3267 enddo
3268 cmat%index(i) = icnt
3269 enddo
3270 !$omp end do
3271 deallocate(iw)
3272 !$omp end parallel
3273 do i = 1, nr
3274 cmat%index(i) = cmat%index(i-1) + cmat%index(i)
3275 enddo
3276 nnz = cmat%index(nr)
3277 cmat%nnz = nnz
3278 !write(0,*) 'nnz',nnz
3279 t1 = hecmw_wtime()
3280 if (timer >= 3) write(0, '(A,f10.4)') "###### multiply_mat_mat (1) : ",t1-t0
3281 t0 = hecmw_wtime()
3282 allocate(cmat%item(nnz))
3283 allocate(cmat%A(ndof2 * nnz))
3284 cmat%A(:) = 0.0d0
3285 !$omp parallel default(none), &
3286 !$omp& private(i,icnt,l0,js,je,j,jj,Ap,ks,ke,k,kk,Bp,ll,l,Cp), &
3287 !$omp& shared(nr,Cmat,Amat,Bmat,ndof2,ndof)
3288 !$omp do
3289 do i = 1, nr
3290 icnt = 0
3291 l0 = cmat%index(i-1)
3292 ! item
3293 js = amat%index(i-1)+1
3294 je = amat%index(i)
3295 do j = js, je
3296 jj = amat%item(j)
3297 ap => amat%A(ndof2*(j-1)+1:ndof2*j)
3298 ks = bmat%index(jj-1)+1
3299 ke = bmat%index(jj)
3300 do k = ks, ke
3301 kk = bmat%item(k)
3302 bp => bmat%A(ndof2*(k-1)+1:ndof2*k)
3303 ll = -1
3304 do l = 1, icnt
3305 if (cmat%item(l0+l) == kk) then
3306 ll = l0 + l
3307 exit
3308 endif
3309 enddo
3310 if (ll < 0) then
3311 icnt = icnt + 1
3312 ll = l0 + icnt
3313 cmat%item(ll) = kk
3314 endif
3315 cp => cmat%A(ndof2*(ll-1)+1:ndof2*ll)
3316 call blk_matmul_add(ndof, ap, bp, cp)
3317 enddo
3318 enddo
3319 !write(0,*) 'l0,icnt,index(i)',Cmat%index(i-1),icnt,Cmat%index(i)
3320 if (l0+icnt /= cmat%index(i)) stop 'ERROR: multiply_mat_mat: unknown error'
3321 enddo
3322 !$omp end do
3323 !$omp end parallel
3324 t1 = hecmw_wtime()
3325 if (timer >= 3) write(0, '(A,f10.4)') "###### multiply_mat_mat (2) : ",t1-t0
3326 t0 = hecmw_wtime()
3327 call sort_and_uniq_rows(cmat)
3328 t1 = hecmw_wtime()
3329 if (timer >= 3) write(0, '(A,f10.4)') "###### multiply_mat_mat (3) : ",t1-t0
3330 end subroutine multiply_mat_mat
3331
3332 subroutine blk_matmul_add(ndof, A, B, AB)
3333 implicit none
3334 integer, intent(in) :: ndof
3335 real(kind=kreal), intent(in) :: a(:), b(:)
3336 real(kind=kreal), intent(inout) :: ab(:)
3337 integer :: ndof2, i, j, k, i0, j0, ij, ik, jk
3338 ndof2=ndof*ndof
3339 do i=1,ndof
3340 i0=(i-1)*ndof
3341 do j=1,ndof
3342 ij=i0+j
3343 j0=(j-1)*ndof
3344 do k=1,ndof
3345 ik=i0+k
3346 jk=j0+k
3347 !$omp atomic
3348 ab(ik)=ab(ik)+a(ij)*b(jk)
3349 enddo
3350 enddo
3351 enddo
3352 end subroutine blk_matmul_add
3353
3354 subroutine hecmw_localmat_make_hecmat(hecMAT, BTtKTmat, hecTKT)
3355 implicit none
3356 type (hecmwst_matrix), intent(in) :: hecmat
3357 type (hecmwst_local_matrix), intent(in) :: bttktmat
3358 type (hecmwst_matrix), intent(inout) :: hectkt
3359 call make_new_hecmat(hecmat, bttktmat, hectkt)
3360 end subroutine hecmw_localmat_make_hecmat
3361
3362 subroutine hecmw_localmat_shrink_comm_table(BKmat, hecMESH)
3363 implicit none
3364 type (hecmwst_local_matrix), intent(in) :: bkmat
3365 type (hecmwst_local_mesh), intent(inout) :: hecmesh
3366 type (hecmwst_matrix_comm) :: heccomm
3367 call make_comm_table(bkmat, hecmesh, heccomm)
3368 deallocate(hecmesh%import_index)
3369 deallocate(hecmesh%import_item)
3370 deallocate(hecmesh%export_index)
3371 deallocate(hecmesh%export_item)
3372 hecmesh%import_index => heccomm%import_index
3373 hecmesh%import_item => heccomm%import_item
3374 hecmesh%export_index => heccomm%export_index
3375 hecmesh%export_item => heccomm%export_item
3376 deallocate(heccomm%neighbor_pe)
3378
3379end module hecmw_local_matrix
subroutine, public hecmw_bsearch_int_array(array, istart, iend, val, idx)
recursive subroutine, public hecmw_qsort_int_array(array, istart, iend)
subroutine, public hecmw_uniq_int_array(array, istart, iend, ndup)
subroutine, public hecmw_localmat_write(tmat, iunit)
subroutine, public hecmw_localmat_add(amat, bmat, cmat)
subroutine, public hecmw_localmat_transpose(tmat, ttmat)
subroutine, public hecmw_trimatmul_ttkt(hecmesh, bttmat, hecmat, btmat, iws, num_lagrange, hectkt)
subroutine, public hecmw_localmat_init_with_hecmat(bkmat, hecmat, num_lagrange)
subroutine, public hecmw_localmat_assemble(btmat, hecmesh, hecmeshnew)
subroutine, public hecmw_localmat_free(tmat)
subroutine, public hecmw_localmat_blocking(tmat, ndof, btmat)
subroutine, public hecmw_localmat_make_hecmat(hecmat, bttktmat, hectkt)
subroutine, public hecmw_localmat_mulvec(btmat, v, tv)
subroutine, public hecmw_localmat_multmat(bkmat, btmat, hecmesh, bktmat)
subroutine, public hecmw_localmat_add_hecmat(bkmat, hecmat)
subroutine, public hecmw_localmat_shrink_comm_table(bkmat, hecmesh)
integer(kind=kint), parameter cncol_item
num of column items to be migrated (2 or 3)
subroutine, public hecmw_trimatmul_ttkt_mpc(hecmesh, hecmat, hectkt)
subroutine, public hecmw_pair_array_append(parray, id, i1, i2)
subroutine, public hecmw_pair_array_finalize(parray)
integer(kind=kint) function, public hecmw_pair_array_find_id(parray, i1, i2)
subroutine, public hecmw_pair_array_init(parray, max_num)
subroutine, public hecmw_pair_array_sort(parray)
I/O and Utility.
Definition: hecmw_util_f.F90:7
integer(kind=4), parameter kreal
integer(kind=kint), parameter hecmw_status_size
integer(kind=kint) function hecmw_comm_get_rank()
real(kind=kreal) function hecmw_wtime()
subroutine hecmw_isend_int(sbuf, sc, dest, tag, comm, req)
subroutine hecmw_recv_int(rbuf, rc, source, tag, comm, stat)
subroutine hecmw_isend_r(sbuf, sc, dest, tag, comm, req)
subroutine hecmw_waitall(cnt, reqs, stats)
subroutine hecmw_recv_r(rbuf, rc, source, tag, comm, stat)
subroutine hecmw_alltoall_int(sbuf, sc, rbuf, rc, comm)