26 integer(kind=kint) :: N
27 real(kind=
kreal),
pointer :: d(:) => null()
28 real(kind=
kreal),
pointer :: al(:) => null()
29 real(kind=
kreal),
pointer :: au(:) => null()
30 integer(kind=kint),
pointer :: indexL(:) => null()
31 integer(kind=kint),
pointer :: indexU(:) => null()
32 integer(kind=kint),
pointer :: itemL(:) => null()
33 integer(kind=kint),
pointer :: itemU(:) => null()
34 real(kind=
kreal),
pointer :: alu(:) => null()
36 integer(kind=kint) :: NContact = 0
37 real(kind=
kreal),
pointer :: cal(:) => null()
38 real(kind=
kreal),
pointer :: cau(:) => null()
39 integer(kind=kint),
pointer :: indexCL(:) => null()
40 integer(kind=kint),
pointer :: indexCU(:) => null()
41 integer(kind=kint),
pointer :: itemCL(:) => null()
42 integer(kind=kint),
pointer :: itemCU(:) => null()
44 integer(kind=kint) :: NColor
45 integer(kind=kint),
pointer :: COLORindex(:) => null()
46 integer(kind=kint),
pointer :: perm(:) => null()
47 integer(kind=kint),
pointer :: iperm(:) => null()
49 logical,
save :: isFirst = .true.
56 integer(kind=kint ) :: npl, npu, npcl, npcu
57 real (kind=
kreal),
allocatable :: cd(:)
58 integer(kind=kint ) :: ncolor_in
59 real (kind=
kreal) :: sigma_diag
60 real (kind=
kreal) :: alutmp(6,6), pw(6)
61 integer(kind=kint ) :: ii, i, j, k
62 integer(kind=kint ) :: nthreads = 1
63 integer(kind=kint ),
allocatable :: perm_tmp(:)
75 ncontact = hecmat%cmat%n_val
77 if (ncontact.gt.0)
then
81 if (nthreads == 1)
then
83 allocate(colorindex(0:1), perm(n), iperm(n))
94 indexl => hecmat%indexL
95 indexu => hecmat%indexU
98 if (ncontact.gt.0)
then
101 indexcl => hecmat%indexCL
102 indexcu => hecmat%indexCU
103 itemcl => hecmat%itemCL
104 itemcu => hecmat%itemCU
107 allocate(colorindex(0:n), perm_tmp(n), perm(n), iperm(n))
109 hecmat%indexU, hecmat%itemU, perm_tmp, iperm)
112 hecmat%indexU, hecmat%itemU, perm_tmp, &
113 ncolor_in, ncolor, colorindex, perm, iperm)
119 npl = hecmat%indexL(n)
120 npu = hecmat%indexU(n)
121 allocate(indexl(0:n), indexu(0:n), iteml(npl), itemu(npu))
123 hecmat%indexL, hecmat%indexU, hecmat%itemL, hecmat%itemU, &
124 indexl, indexu, iteml, itemu)
129 allocate(d(36*n), al(36*npl), au(36*npu))
131 hecmat%indexL, hecmat%indexU, hecmat%itemL, hecmat%itemU, &
132 hecmat%AL, hecmat%AU, hecmat%D, &
133 indexl, indexu, iteml, itemu, al, au, d)
139 if (ncontact.gt.0)
then
140 npcl = hecmat%indexCL(n)
141 npcu = hecmat%indexCU(n)
142 allocate(indexcl(0:n), indexcu(0:n), itemcl(npcl), itemcu(npcu))
144 hecmat%indexCL, hecmat%indexCU, hecmat%itemCL, hecmat%itemCU, &
145 indexcl, indexcu, itemcl, itemcu)
147 allocate(cd(36*n), cal(36*npcl), cau(36*npcu))
149 hecmat%indexCL, hecmat%indexCU, hecmat%itemCL, hecmat%itemCU, &
150 hecmat%CAL, hecmat%CAU, hecmat%D, &
151 indexcl, indexcu, itemcl, itemcu, cal, cau, cd)
184 alutmp(1,1)= alu(36*ii-35) * sigma_diag
185 alutmp(1,2)= alu(36*ii-34)
186 alutmp(1,3)= alu(36*ii-33)
187 alutmp(1,4)= alu(36*ii-32)
188 alutmp(1,5)= alu(36*ii-31)
189 alutmp(1,6)= alu(36*ii-30)
191 alutmp(2,1)= alu(36*ii-29)
192 alutmp(2,2)= alu(36*ii-28) * sigma_diag
193 alutmp(2,3)= alu(36*ii-27)
194 alutmp(2,4)= alu(36*ii-26)
195 alutmp(2,5)= alu(36*ii-25)
196 alutmp(2,6)= alu(36*ii-24)
198 alutmp(3,1)= alu(36*ii-23)
199 alutmp(3,2)= alu(36*ii-22)
200 alutmp(3,3)= alu(36*ii-21) * sigma_diag
201 alutmp(3,4)= alu(36*ii-20)
202 alutmp(3,5)= alu(36*ii-19)
203 alutmp(3,6)= alu(36*ii-18)
205 alutmp(4,1)= alu(36*ii-17)
206 alutmp(4,2)= alu(36*ii-16)
207 alutmp(4,3)= alu(36*ii-15)
208 alutmp(4,4)= alu(36*ii-14) * sigma_diag
209 alutmp(4,5)= alu(36*ii-13)
210 alutmp(4,6)= alu(36*ii-12)
212 alutmp(5,1)= alu(36*ii-11)
213 alutmp(5,2)= alu(36*ii-10)
214 alutmp(5,3)= alu(36*ii-9 )
215 alutmp(5,4)= alu(36*ii-8 )
216 alutmp(5,5)= alu(36*ii-7 ) * sigma_diag
217 alutmp(5,6)= alu(36*ii-6 )
219 alutmp(6,1)= alu(36*ii-5 )
220 alutmp(6,2)= alu(36*ii-4 )
221 alutmp(6,3)= alu(36*ii-3 )
222 alutmp(6,4)= alu(36*ii-2 )
223 alutmp(6,5)= alu(36*ii-1 )
224 alutmp(6,6)= alu(36*ii ) * sigma_diag
227 alutmp(k,k)= 1.d0/alutmp(k,k)
229 alutmp(i,k)= alutmp(i,k) * alutmp(k,k)
231 pw(j)= alutmp(i,j) - alutmp(i,k)*alutmp(k,j)
239 alu(36*ii-35)= alutmp(1,1)
240 alu(36*ii-34)= alutmp(1,2)
241 alu(36*ii-33)= alutmp(1,3)
242 alu(36*ii-32)= alutmp(1,4)
243 alu(36*ii-31)= alutmp(1,5)
244 alu(36*ii-30)= alutmp(1,6)
245 alu(36*ii-29)= alutmp(2,1)
246 alu(36*ii-28)= alutmp(2,2)
247 alu(36*ii-27)= alutmp(2,3)
248 alu(36*ii-26)= alutmp(2,4)
249 alu(36*ii-25)= alutmp(2,5)
250 alu(36*ii-24)= alutmp(2,6)
251 alu(36*ii-23)= alutmp(3,1)
252 alu(36*ii-22)= alutmp(3,2)
253 alu(36*ii-21)= alutmp(3,3)
254 alu(36*ii-20)= alutmp(3,4)
255 alu(36*ii-19)= alutmp(3,5)
256 alu(36*ii-18)= alutmp(3,6)
257 alu(36*ii-17)= alutmp(4,1)
258 alu(36*ii-16)= alutmp(4,2)
259 alu(36*ii-15)= alutmp(4,3)
260 alu(36*ii-14)= alutmp(4,4)
261 alu(36*ii-13)= alutmp(4,5)
262 alu(36*ii-12)= alutmp(4,6)
263 alu(36*ii-11)= alutmp(5,1)
264 alu(36*ii-10)= alutmp(5,2)
265 alu(36*ii-9 )= alutmp(5,3)
266 alu(36*ii-8 )= alutmp(5,4)
267 alu(36*ii-7 )= alutmp(5,5)
268 alu(36*ii-6 )= alutmp(5,6)
269 alu(36*ii-5 )= alutmp(6,1)
270 alu(36*ii-4 )= alutmp(6,2)
271 alu(36*ii-3 )= alutmp(6,3)
272 alu(36*ii-2 )= alutmp(6,4)
273 alu(36*ii-1 )= alutmp(6,5)
274 alu(36*ii )= alutmp(6,6)
286 real(kind=
kreal),
intent(inout) :: zp(:)
287 integer(kind=kint) :: ic, i, iold, j, isl, iel, isu, ieu, k
288 real(kind=
kreal) :: x1, x2, x3, x4, x5, x6
289 real(kind=
kreal) :: sw1, sw2, sw3, sw4, sw5, sw6
292 integer(kind=kint),
parameter :: numofblockperthread = 100
293 integer(kind=kint),
save :: numofthread = 1, numofblock
294 integer(kind=kint),
save,
allocatable :: ictoblockindex(:)
295 integer(kind=kint),
save,
allocatable :: blockindextocolorindex(:)
296 integer(kind=kint),
save :: sectorcachesize0, sectorcachesize1
297 integer(kind=kint) :: blockindex, elementcount, numofelement, ii
298 real(kind=
kreal) :: numofelementperblock
299 integer(kind=kint) :: my_rank
303 numofblock = numofthread * numofblockperthread
304 if (
allocated(ictoblockindex))
deallocate(ictoblockindex)
305 if (
allocated(blockindextocolorindex))
deallocate(blockindextocolorindex)
306 allocate (ictoblockindex(0:ncolor), &
307 blockindextocolorindex(0:numofblock + ncolor))
308 numofelement = n + indexl(n) + indexu(n)
309 numofelementperblock = dble(numofelement) / numofblock
312 ictoblockindex(0) = 0
313 blockindextocolorindex = -1
314 blockindextocolorindex(0) = 0
323 do i = colorindex(ic-1)+1, colorindex(ic)
324 elementcount = elementcount + 1
325 elementcount = elementcount + (indexl(i) - indexl(i-1))
326 elementcount = elementcount + (indexu(i) - indexu(i-1))
327 if (elementcount > ii * numofelementperblock &
328 .or. i == colorindex(ic))
then
330 blockindex = blockindex + 1
331 blockindextocolorindex(blockindex) = i
336 ictoblockindex(ic) = blockindex
338 numofblock = blockindex
341 sectorcachesize0, sectorcachesize1 )
361 do blockindex = ictoblockindex(ic-1)+1, ictoblockindex(ic)
362 do i = blockindextocolorindex(blockindex-1)+1, &
363 blockindextocolorindex(blockindex)
383 sw1= sw1 -al(36*j-35)*x1 -al(36*j-34)*x2 -al(36*j-33)*x3 -al(36*j-32)*x4 -al(36*j-31)*x5 -al(36*j-30)*x6
384 sw2= sw2 -al(36*j-29)*x1 -al(36*j-28)*x2 -al(36*j-27)*x3 -al(36*j-26)*x4 -al(36*j-25)*x5 -al(36*j-24)*x6
385 sw3= sw3 -al(36*j-23)*x1 -al(36*j-22)*x2 -al(36*j-21)*x3 -al(36*j-20)*x4 -al(36*j-19)*x5 -al(36*j-18)*x6
386 sw4= sw4 -al(36*j-17)*x1 -al(36*j-16)*x2 -al(36*j-15)*x3 -al(36*j-14)*x4 -al(36*j-13)*x5 -al(36*j-12)*x6
387 sw5= sw5 -al(36*j-11)*x1 -al(36*j-10)*x2 -al(36*j-9 )*x3 -al(36*j-8 )*x4 -al(36*j-7 )*x5 -al(36*j-6 )*x6
388 sw6= sw6 -al(36*j-5 )*x1 -al(36*j-4 )*x2 -al(36*j-3 )*x3 -al(36*j-2 )*x4 -al(36*j-1 )*x5 -al(36*j )*x6
391 if (ncontact.ne.0)
then
403 sw1= sw1 -cal(36*j-35)*x1 -cal(36*j-34)*x2 -cal(36*j-33)*x3 -cal(36*j-32)*x4 -cal(36*j-31)*x5 -cal(36*j-30)*x6
404 sw2= sw2 -cal(36*j-29)*x1 -cal(36*j-28)*x2 -cal(36*j-27)*x3 -cal(36*j-26)*x4 -cal(36*j-25)*x5 -cal(36*j-24)*x6
405 sw3= sw3 -cal(36*j-23)*x1 -cal(36*j-22)*x2 -cal(36*j-21)*x3 -cal(36*j-20)*x4 -cal(36*j-19)*x5 -cal(36*j-18)*x6
406 sw4= sw4 -cal(36*j-17)*x1 -cal(36*j-16)*x2 -cal(36*j-15)*x3 -cal(36*j-14)*x4 -cal(36*j-13)*x5 -cal(36*j-12)*x6
407 sw5= sw5 -cal(36*j-11)*x1 -cal(36*j-10)*x2 -cal(36*j-9 )*x3 -cal(36*j-8 )*x4 -cal(36*j-7 )*x5 -cal(36*j-6 )*x6
408 sw6= sw6 -cal(36*j-5 )*x1 -cal(36*j-4 )*x2 -cal(36*j-3 )*x3 -cal(36*j-2 )*x4 -cal(36*j-1 )*x5 -cal(36*j )*x6
418 x2= x2 -alu(36*i-29)*x1
419 x3= x3 -alu(36*i-23)*x1 -alu(36*i-22)*x2
420 x4= x4 -alu(36*i-17)*x1 -alu(36*i-16)*x2 -alu(36*i-5)*x3
421 x5= x5 -alu(36*i-11)*x1 -alu(36*i-10)*x2 -alu(36*i-9)*x3 -alu(36*i-8)*x4
422 x6= x6 -alu(36*i-5 )*x1 -alu(36*i-4 )*x2 -alu(36*i-3)*x3 -alu(36*i-2)*x4 -alu(36*i-1)*x5
424 x5= alu(36*i-7 )*( x5 -alu(36*i-6)*x6 )
425 x4= alu(36*i-14)*( x4 -alu(36*i-12)*x6 -alu(36*i-13)*x5)
426 x3= alu(36*i-21)*( x3 -alu(36*i-18)*x6 -alu(36*i-19)*x5 -alu(36*i-20)*x4)
427 x2= alu(36*i-28)*( x2 -alu(36*i-24)*x6 -alu(36*i-25)*x5 -alu(36*i-26)*x4 -alu(36*i-27)*x3)
428 x1= alu(36*i-35)*( x1 -alu(36*i-30)*x6 -alu(36*i-31)*x5 -alu(36*i-32)*x4 -alu(36*i-33)*x3 -alu(36*i-34)*x2)
443 do blockindex = ictoblockindex(ic), ictoblockindex(ic-1)+1, -1
444 do i = blockindextocolorindex(blockindex), &
445 blockindextocolorindex(blockindex-1)+1, -1
467 sw1= sw1 +au(36*j-35)*x1 +au(36*j-34)*x2 +au(36*j-33)*x3 +au(36*j-32)*x4 +au(36*j-31)*x5 +au(36*j-30)*x6
468 sw2= sw2 +au(36*j-29)*x1 +au(36*j-28)*x2 +au(36*j-27)*x3 +au(36*j-26)*x4 +au(36*j-25)*x5 +au(36*j-24)*x6
469 sw3= sw3 +au(36*j-23)*x1 +au(36*j-22)*x2 +au(36*j-21)*x3 +au(36*j-20)*x4 +au(36*j-19)*x5 +au(36*j-18)*x6
470 sw4= sw4 +au(36*j-17)*x1 +au(36*j-16)*x2 +au(36*j-15)*x3 +au(36*j-14)*x4 +au(36*j-13)*x5 +au(36*j-12)*x6
471 sw5= sw5 +au(36*j-11)*x1 +au(36*j-10)*x2 +au(36*j-9 )*x3 +au(36*j-8 )*x4 +au(36*j-7 )*x5 +au(36*j-6 )*x6
472 sw6= sw6 +au(36*j-5 )*x1 +au(36*j-4 )*x2 +au(36*j-3 )*x3 +au(36*j-2 )*x4 +au(36*j-1 )*x5 +au(36*j )*x6
475 if (ncontact.gt.0)
then
476 isu= indexcu(i-1) + 1
487 sw1= sw1 +cau(36*j-35)*x1 +cau(36*j-34)*x2 +cau(36*j-33)*x3 +cau(36*j-32)*x4 +cau(36*j-31)*x5 +cau(36*j-30)*x6
488 sw2= sw2 +cau(36*j-29)*x1 +cau(36*j-28)*x2 +cau(36*j-27)*x3 +cau(36*j-26)*x4 +cau(36*j-25)*x5 +cau(36*j-24)*x6
489 sw3= sw3 +cau(36*j-23)*x1 +cau(36*j-22)*x2 +cau(36*j-21)*x3 +cau(36*j-20)*x4 +cau(36*j-19)*x5 +cau(36*j-18)*x6
490 sw4= sw4 +cau(36*j-17)*x1 +cau(36*j-16)*x2 +cau(36*j-15)*x3 +cau(36*j-14)*x4 +cau(36*j-13)*x5 +cau(36*j-12)*x6
491 sw5= sw5 +cau(36*j-11)*x1 +cau(36*j-10)*x2 +cau(36*j-9 )*x3 +cau(36*j-8 )*x4 +cau(36*j-7 )*x5 +cau(36*j-6 )*x6
492 sw6= sw6 +cau(36*j-5 )*x1 +cau(36*j-4 )*x2 +cau(36*j-3 )*x3 +cau(36*j-2 )*x4 +cau(36*j-1 )*x5 +cau(36*j )*x6
502 x2= x2 -alu(36*i-29)*x1
503 x3= x3 -alu(36*i-23)*x1 -alu(36*i-22)*x2
504 x4= x4 -alu(36*i-17)*x1 -alu(36*i-16)*x2 -alu(36*i-5)*x3
505 x5= x5 -alu(36*i-11)*x1 -alu(36*i-10)*x2 -alu(36*i-9)*x3 -alu(36*i-8)*x4
506 x6= x6 -alu(36*i-5 )*x1 -alu(36*i-4 )*x2 -alu(36*i-3)*x3 -alu(36*i-2)*x4 -alu(36*i-1)*x5
508 x5= alu(36*i-7 )*( x5 -alu(36*i-6)*x6 )
509 x4= alu(36*i-14)*( x4 -alu(36*i-12)*x6 -alu(36*i-13)*x5)
510 x3= alu(36*i-21)*( x3 -alu(36*i-18)*x6 -alu(36*i-19)*x5 -alu(36*i-20)*x4)
511 x2= alu(36*i-28)*( x2 -alu(36*i-24)*x6 -alu(36*i-25)*x5 -alu(36*i-26)*x4 -alu(36*i-27)*x3)
512 x1= alu(36*i-35)*( x1 -alu(36*i-30)*x6 -alu(36*i-31)*x5 -alu(36*i-32)*x4 -alu(36*i-33)*x3 -alu(36*i-34)*x2)
514 zp(6*iold-5)= zp(6*iold-5) -x1
515 zp(6*iold-4)= zp(6*iold-4) -x2
516 zp(6*iold-3)= zp(6*iold-3) -x3
517 zp(6*iold-2)= zp(6*iold-2) -x4
518 zp(6*iold-1)= zp(6*iold-1) -x5
519 zp(6*iold )= zp(6*iold ) -x6
536 integer(kind=kint ) :: nthreads = 1
538 if (
associated(colorindex))
deallocate(colorindex)
539 if (
associated(perm))
deallocate(perm)
540 if (
associated(iperm))
deallocate(iperm)
541 if (
associated(alu))
deallocate(alu)
542 if (nthreads >= 1)
then
543 if (
associated(d))
deallocate(d)
544 if (
associated(al))
deallocate(al)
545 if (
associated(au))
deallocate(au)
546 if (
associated(indexl))
deallocate(indexl)
547 if (
associated(indexu))
deallocate(indexu)
548 if (
associated(iteml))
deallocate(iteml)
549 if (
associated(itemu))
deallocate(itemu)
550 if (ncontact.ne.0)
then
551 if (
associated(cal))
deallocate(cal)
552 if (
associated(cau))
deallocate(cau)
553 if (
associated(indexcl))
deallocate(indexcl)
554 if (
associated(indexcu))
deallocate(indexcu)
555 if (
associated(itemcl))
deallocate(itemcl)
556 if (
associated(itemcu))
deallocate(itemcu)
570 if (ncontact.ne.0)
then
582 subroutine write_debug_info
584 integer(kind=kint) :: my_rank, ic, in
587 if (my_rank.eq.0)
then
588 write(*,*)
'DEBUG: Output fort.19000+myrank and fort.29000+myrank for coloring information'
590 write(19000+my_rank,
'(a)')
'#NCOLORTot'
591 write(19000+my_rank,*) ncolor
592 write(19000+my_rank,
'(a)')
'#ic COLORindex(ic-1)+1 COLORindex(ic)'
594 write(19000+my_rank,*) ic, colorindex(ic-1)+1,colorindex(ic)
596 write(29000+my_rank,
'(a)')
'#n_node'
597 write(29000+my_rank,*) n
598 write(29000+my_rank,
'(a)')
'#in OLDtoNEW(in) NEWtoOLD(in)'
600 write(29000+my_rank,*) in, iperm(in), perm(in)
601 if (perm(iperm(in)) .ne. in)
then
602 write(29000+my_rank,*)
'** WARNING **: NEWtoOLD and OLDtoNEW: ',in
605 end subroutine write_debug_info
607 subroutine check_ordering
609 integer(kind=kint) :: ic, i, j, k
610 integer(kind=kint),
allocatable :: iicolor(:)
612 if (ncolor.gt.1)
then
615 do i= colorindex(ic-1)+1, colorindex(ic)
621 do i= colorindex(ic-1)+1, colorindex(ic)
622 do j= indexl(i-1)+1, indexl(i)
624 if (iicolor(i).eq.iicolor(k))
then
625 write(*,*) .eq.
'** ERROR **: L-part: iicolor(i)iicolor(k)',i,k,iicolor(i)
632 do i= colorindex(ic), colorindex(ic-1)+1, -1
633 do j= indexu(i-1)+1, indexu(i)
635 if (iicolor(i).eq.iicolor(k))
then
636 write(*,*) .eq.
'** ERROR **: U-part: iicolor(i)iicolor(k)',i,k,iicolor(i)
644 end subroutine check_ordering
integer(kind=kint) function, public hecmw_mat_get_ncolor_in(hecmat)
real(kind=kreal) function, public hecmw_mat_get_sigma_diag(hecmat)
subroutine, public hecmw_matrix_reorder_renum_item(n, perm, indexxp, itemxp)
subroutine, public hecmw_matrix_reorder_values(n, ndof, perm, iperm, indexl, indexu, iteml, itemu, al, au, d, indexlp, indexup, itemlp, itemup, alp, aup, dp)
subroutine, public hecmw_matrix_reorder_profile(n, perm, iperm, indexl, indexu, iteml, itemu, indexlp, indexup, itemlp, itemup)
subroutine, public hecmw_precond_ssor_66_apply(zp)
subroutine, public hecmw_precond_ssor_66_clear(hecmat)
subroutine, public hecmw_precond_ssor_66_setup(hecmat)
subroutine, public hecmw_tuning_fx_calc_sector_cache(n, ndof, sectorcachesize0, sectorcachesize1)
integer(kind=4), parameter kreal
integer(kind=kint) function hecmw_comm_get_rank()
subroutine, public hecmw_matrix_ordering_rcm(n, indexl, iteml, indexu, itemu, perm, iperm)
subroutine, public hecmw_matrix_ordering_mc(n, indexl, iteml, indexu, itemu, perm_cur, ncolor_in, ncolor_out, colorindex, perm, iperm)