26 integer(kind=kint) :: N
27 real(kind=
kreal),
pointer :: d(:) => null()
28 real(kind=
kreal),
pointer :: al(:) => null()
29 real(kind=
kreal),
pointer :: au(:) => null()
30 integer(kind=kint),
pointer :: indexL(:) => null()
31 integer(kind=kint),
pointer :: indexU(:) => null()
32 integer(kind=kint),
pointer :: itemL(:) => null()
33 integer(kind=kint),
pointer :: itemU(:) => null()
34 real(kind=
kreal),
pointer :: alu(:) => null()
36 integer(kind=kint) :: NContact = 0
37 real(kind=
kreal),
pointer :: cal(:) => null()
38 real(kind=
kreal),
pointer :: cau(:) => null()
39 integer(kind=kint),
pointer :: indexCL(:) => null()
40 integer(kind=kint),
pointer :: indexCU(:) => null()
41 integer(kind=kint),
pointer :: itemCL(:) => null()
42 integer(kind=kint),
pointer :: itemCU(:) => null()
44 integer(kind=kint) :: NColor
45 integer(kind=kint),
pointer :: COLORindex(:) => null()
46 integer(kind=kint),
pointer :: perm(:) => null()
47 integer(kind=kint),
pointer :: iperm(:) => null()
49 logical,
save :: isFirst = .true.
51 logical,
save :: INITIALIZED = .false.
58 integer(kind=kint ) :: npl, npu, npcl, npcu
59 real (kind=
kreal),
allocatable :: cd(:)
60 integer(kind=kint ) :: ncolor_in
61 real (kind=
kreal) :: sigma_diag
62 real (kind=
kreal) :: alutmp(4,4), pw(4)
63 integer(kind=kint ) :: ii, i, j, k
64 integer(kind=kint ) :: nthreads = 1
65 integer(kind=kint ),
allocatable :: perm_tmp(:)
72 if (hecmat%Iarray(98) == 1)
then
74 else if (hecmat%Iarray(97) == 1)
then
87 ncontact = hecmat%cmat%n_val
89 if (ncontact.gt.0)
then
93 if (nthreads == 1)
then
95 allocate(colorindex(0:1), perm(n), iperm(n))
103 allocate(colorindex(0:n), perm_tmp(n), perm(n), iperm(n))
105 hecmat%indexU, hecmat%itemU, perm_tmp, iperm)
108 hecmat%indexU, hecmat%itemU, perm_tmp, &
109 ncolor_in, ncolor, colorindex, perm, iperm)
116 npl = hecmat%indexL(n)
117 npu = hecmat%indexU(n)
118 allocate(indexl(0:n), indexu(0:n), iteml(npl), itemu(npu))
120 hecmat%indexL, hecmat%indexU, hecmat%itemL, hecmat%itemU, &
121 indexl, indexu, iteml, itemu)
126 allocate(d(16*n), al(16*npl), au(16*npu))
128 hecmat%indexL, hecmat%indexU, hecmat%itemL, hecmat%itemU, &
129 hecmat%AL, hecmat%AU, hecmat%D, &
130 indexl, indexu, iteml, itemu, al, au, d)
136 if (ncontact.gt.0)
then
137 npcl = hecmat%indexCL(n)
138 npcu = hecmat%indexCU(n)
139 allocate(indexcl(0:n), indexcu(0:n), itemcl(npcl), itemcu(npcu))
141 hecmat%indexCL, hecmat%indexCU, hecmat%itemCL, hecmat%itemCU, &
142 indexcl, indexcu, itemcl, itemcu)
144 allocate(cd(16*n), cal(16*npcl), cau(16*npcu))
146 hecmat%indexCL, hecmat%indexCU, hecmat%itemCL, hecmat%itemCU, &
147 hecmat%CAL, hecmat%CAU, hecmat%D, &
148 indexcl, indexcu, itemcl, itemcu, cal, cau, cd)
186 alutmp(1,1)= alu(16*ii-15) * sigma_diag
187 alutmp(1,2)= alu(16*ii-14)
188 alutmp(1,3)= alu(16*ii-13)
189 alutmp(1,4)= alu(16*ii-12)
190 alutmp(2,1)= alu(16*ii-11)
191 alutmp(2,2)= alu(16*ii-10) * sigma_diag
192 alutmp(2,3)= alu(16*ii- 9)
193 alutmp(2,4)= alu(16*ii- 8)
194 alutmp(3,1)= alu(16*ii- 7)
195 alutmp(3,2)= alu(16*ii- 6)
196 alutmp(3,3)= alu(16*ii- 5) * sigma_diag
197 alutmp(3,4)= alu(16*ii- 4)
198 alutmp(4,1)= alu(16*ii- 3)
199 alutmp(4,2)= alu(16*ii- 2)
200 alutmp(4,3)= alu(16*ii- 1)
201 alutmp(4,4)= alu(16*ii ) * sigma_diag
204 alutmp(k,k)= 1.d0/alutmp(k,k)
206 alutmp(i,k)= alutmp(i,k) * alutmp(k,k)
208 pw(j)= alutmp(i,j) - alutmp(i,k)*alutmp(k,j)
215 alu(16*ii-15)= alutmp(1,1)
216 alu(16*ii-14)= alutmp(1,2)
217 alu(16*ii-13)= alutmp(1,3)
218 alu(16*ii-12)= alutmp(1,4)
219 alu(16*ii-11)= alutmp(2,1)
220 alu(16*ii-10)= alutmp(2,2)
221 alu(16*ii- 9)= alutmp(2,3)
222 alu(16*ii- 8)= alutmp(2,4)
223 alu(16*ii- 7)= alutmp(3,1)
224 alu(16*ii- 6)= alutmp(3,2)
225 alu(16*ii- 5)= alutmp(3,3)
226 alu(16*ii- 4)= alutmp(3,4)
227 alu(16*ii- 3)= alutmp(4,1)
228 alu(16*ii- 2)= alutmp(4,2)
229 alu(16*ii- 1)= alutmp(4,3)
230 alu(16*ii )= alutmp(4,4)
237 hecmat%Iarray(98) = 0
238 hecmat%Iarray(97) = 0
247 real(kind=
kreal),
intent(inout) :: zp(:)
248 integer(kind=kint) :: ic, i, iold, j, isl, iel, isu, ieu, k
249 real(kind=
kreal) :: sw1, sw2, sw3, sw4, x1, x2, x3, x4
252 integer(kind=kint),
parameter :: numofblockperthread = 100
253 integer(kind=kint),
save :: numofthread = 1, numofblock
254 integer(kind=kint),
save,
allocatable :: ictoblockindex(:)
255 integer(kind=kint),
save,
allocatable :: blockindextocolorindex(:)
256 integer(kind=kint),
save :: sectorcachesize0, sectorcachesize1
257 integer(kind=kint) :: blockindex, elementcount, numofelement, ii
258 real(kind=
kreal) :: numofelementperblock
259 integer(kind=kint) :: my_rank
263 numofblock = numofthread * numofblockperthread
264 if (
allocated(ictoblockindex))
deallocate(ictoblockindex)
265 if (
allocated(blockindextocolorindex))
deallocate(blockindextocolorindex)
266 allocate (ictoblockindex(0:ncolor), &
267 blockindextocolorindex(0:numofblock + ncolor))
268 numofelement = n + indexl(n) + indexu(n)
269 numofelementperblock = dble(numofelement) / numofblock
272 ictoblockindex(0) = 0
273 blockindextocolorindex = -1
274 blockindextocolorindex(0) = 0
283 do i = colorindex(ic-1)+1, colorindex(ic)
284 elementcount = elementcount + 1
285 elementcount = elementcount + (indexl(i) - indexl(i-1))
286 elementcount = elementcount + (indexu(i) - indexu(i-1))
287 if (elementcount > ii * numofelementperblock &
288 .or. i == colorindex(ic))
then
290 blockindex = blockindex + 1
291 blockindextocolorindex(blockindex) = i
296 ictoblockindex(ic) = blockindex
298 numofblock = blockindex
301 sectorcachesize0, sectorcachesize1 )
321 do blockindex = ictoblockindex(ic-1)+1, ictoblockindex(ic)
322 do i = blockindextocolorindex(blockindex-1)+1, &
323 blockindextocolorindex(blockindex)
339 sw1= sw1 - al(16*j-15)*x1 - al(16*j-14)*x2 - al(16*j-13)*x3 - al(16*j-12)*x4
340 sw2= sw2 - al(16*j-11)*x1 - al(16*j-10)*x2 - al(16*j- 9)*x3 - al(16*j- 8)*x4
341 sw3= sw3 - al(16*j- 7)*x1 - al(16*j- 6)*x2 - al(16*j- 5)*x3 - al(16*j- 4)*x4
342 sw4= sw4 - al(16*j- 3)*x1 - al(16*j- 2)*x2 - al(16*j- 1)*x3 - al(16*j- 0)*x4
345 if (ncontact.ne.0)
then
355 sw1= sw1 - cal(16*j-15)*x1 - cal(16*j-14)*x2 - cal(16*j-13)*x3 - cal(16*j-12)*x4
356 sw2= sw2 - cal(16*j-11)*x1 - cal(16*j-10)*x2 - cal(16*j- 9)*x3 - cal(16*j- 8)*x4
357 sw3= sw3 - cal(16*j- 7)*x1 - cal(16*j- 6)*x2 - cal(16*j- 5)*x3 - cal(16*j- 4)*x4
358 sw4= sw4 - cal(16*j- 3)*x1 - cal(16*j- 2)*x2 - cal(16*j- 1)*x3 - cal(16*j- 0)*x4
366 x2= x2 - alu(16*i-11)*x1
367 x3= x3 - alu(16*i- 7)*x1 - alu(16*i- 6)*x2
368 x4= x4 - alu(16*i- 3)*x1 - alu(16*i- 2)*x2 - alu(16*i- 1)*x3
371 x3= alu(16*i- 5)*( x3 - alu(16*i- 4)*x4 )
372 x2= alu(16*i-10)*( x2 - alu(16*i- 8)*x4 - alu(16*i- 9)*x3 )
373 x1= alu(16*i-15)*( x1 - alu(16*i-12)*x4 - alu(16*i-13)*x3 - alu(16*i-14)*x2 )
387 do blockindex = ictoblockindex(ic), ictoblockindex(ic-1)+1, -1
388 do i = blockindextocolorindex(blockindex), &
389 blockindextocolorindex(blockindex-1)+1, -1
407 sw1= sw1 + au(16*j-15)*x1 + au(16*j-14)*x2 + au(16*j-13)*x3 + au(16*j-12)*x4
408 sw2= sw2 + au(16*j-11)*x1 + au(16*j-10)*x2 + au(16*j- 9)*x3 + au(16*j- 8)*x4
409 sw3= sw3 + au(16*j- 7)*x1 + au(16*j- 6)*x2 + au(16*j- 5)*x3 + au(16*j- 4)*x4
410 sw4= sw4 + au(16*j- 3)*x1 + au(16*j- 2)*x2 + au(16*j- 1)*x3 + au(16*j- 0)*x4
414 if (ncontact.gt.0)
then
415 isu= indexcu(i-1) + 1
424 sw1= sw1 + cau(16*j-15)*x1 + cau(16*j-14)*x2 + cau(16*j-13)*x3 + cau(16*j-12)*x4
425 sw2= sw2 + cau(16*j-11)*x1 + cau(16*j-10)*x2 + cau(16*j- 9)*x3 + cau(16*j- 8)*x4
426 sw3= sw3 + cau(16*j- 7)*x1 + cau(16*j- 6)*x2 + cau(16*j- 5)*x3 + cau(16*j- 4)*x4
427 sw4= sw4 + cau(16*j- 3)*x1 + cau(16*j- 2)*x2 + cau(16*j- 1)*x3 + cau(16*j- 0)*x4
436 x2= x2 - alu(16*i-11)*x1
437 x3= x3 - alu(16*i- 7)*x1 - alu(16*i- 6)*x2
438 x4= x4 - alu(16*i- 3)*x1 - alu(16*i- 2)*x2 - alu(16*i- 1)*x3
440 x3= alu(16*i- 5)*( x3 - alu(16*i- 4)*x4 )
441 x2= alu(16*i-10)*( x2 - alu(16*i- 8)*x4 - alu(16*i- 9)*x3 )
442 x1= alu(16*i-15)*( x1 - alu(16*i-12)*x4 - alu(16*i-13)*x3 - alu(16*i-14)*x2 )
444 zp(4*iold-3)= zp(4*iold-3) - x1
445 zp(4*iold-2)= zp(4*iold-2) - x2
446 zp(4*iold-1)= zp(4*iold-1) - x3
447 zp(4*iold )= zp(4*iold ) - x4
464 integer(kind=kint ) :: nthreads = 1
466 if (
associated(colorindex))
deallocate(colorindex)
467 if (
associated(perm))
deallocate(perm)
468 if (
associated(iperm))
deallocate(iperm)
469 if (
associated(alu))
deallocate(alu)
470 if (nthreads >= 1)
then
471 if (
associated(d))
deallocate(d)
472 if (
associated(al))
deallocate(al)
473 if (
associated(au))
deallocate(au)
474 if (
associated(indexl))
deallocate(indexl)
475 if (
associated(indexu))
deallocate(indexu)
476 if (
associated(iteml))
deallocate(iteml)
477 if (
associated(itemu))
deallocate(itemu)
478 if (ncontact.ne.0)
then
479 if (
associated(cal))
deallocate(cal)
480 if (
associated(cau))
deallocate(cau)
481 if (
associated(indexcl))
deallocate(indexcl)
482 if (
associated(indexcu))
deallocate(indexcu)
483 if (
associated(itemcl))
deallocate(itemcl)
484 if (
associated(itemcu))
deallocate(itemcu)
498 if (ncontact.ne.0)
then
510 subroutine write_debug_info
512 integer(kind=kint) :: my_rank, ic, in
515 if (my_rank.eq.0)
then
516 write(*,*)
'DEBUG: Output fort.19000+myrank and fort.29000+myrank for coloring information'
518 write(19000+my_rank,
'(a)')
'#NCOLORTot'
519 write(19000+my_rank,*) ncolor
520 write(19000+my_rank,
'(a)')
'#ic COLORindex(ic-1)+1 COLORindex(ic)'
522 write(19000+my_rank,*) ic, colorindex(ic-1)+1,colorindex(ic)
524 write(29000+my_rank,
'(a)')
'#n_node'
525 write(29000+my_rank,*) n
526 write(29000+my_rank,
'(a)')
'#in OLDtoNEW(in) NEWtoOLD(in)'
528 write(29000+my_rank,*) in, iperm(in), perm(in)
529 if (perm(iperm(in)) .ne. in)
then
530 write(29000+my_rank,*)
'** WARNING **: NEWtoOLD and OLDtoNEW: ',in
533 end subroutine write_debug_info
535 subroutine check_ordering
537 integer(kind=kint) :: ic, i, j, k
538 integer(kind=kint),
allocatable :: iicolor(:)
540 if (ncolor.gt.1)
then
543 do i= colorindex(ic-1)+1, colorindex(ic)
549 do i= colorindex(ic-1)+1, colorindex(ic)
550 do j= indexl(i-1)+1, indexl(i)
552 if (iicolor(i).eq.iicolor(k))
then
553 write(*,*) .eq.
'** ERROR **: L-part: iicolor(i)iicolor(k)',i,k,iicolor(i)
560 do i= colorindex(ic), colorindex(ic-1)+1, -1
561 do j= indexu(i-1)+1, indexu(i)
563 if (iicolor(i).eq.iicolor(k))
then
564 write(*,*) .eq.
'** ERROR **: U-part: iicolor(i)iicolor(k)',i,k,iicolor(i)
572 end subroutine check_ordering
integer(kind=kint) function, public hecmw_mat_get_ncolor_in(hecmat)
real(kind=kreal) function, public hecmw_mat_get_sigma_diag(hecmat)
subroutine, public hecmw_matrix_reorder_renum_item(n, perm, indexxp, itemxp)
subroutine, public hecmw_matrix_reorder_values(n, ndof, perm, iperm, indexl, indexu, iteml, itemu, al, au, d, indexlp, indexup, itemlp, itemup, alp, aup, dp)
subroutine, public hecmw_matrix_reorder_profile(n, perm, iperm, indexl, indexu, iteml, itemu, indexlp, indexup, itemlp, itemup)
subroutine, public hecmw_precond_ssor_44_apply(zp)
subroutine, public hecmw_precond_ssor_44_clear(hecmat)
subroutine, public hecmw_precond_ssor_44_setup(hecmat)
subroutine, public hecmw_tuning_fx_calc_sector_cache(n, ndof, sectorcachesize0, sectorcachesize1)
integer(kind=4), parameter kreal
integer(kind=kint) function hecmw_comm_get_rank()
subroutine, public hecmw_matrix_ordering_rcm(n, indexl, iteml, indexu, itemu, perm, iperm)
subroutine, public hecmw_matrix_ordering_mc(n, indexl, iteml, indexu, itemu, perm_cur, ncolor_in, ncolor_out, colorindex, perm, iperm)