FrontISTR 5.2.0
Large-scale structural analysis program with finit element method
Loading...
Searching...
No Matches
hecmw_precond_SSOR_11.f90
Go to the documentation of this file.
1!-------------------------------------------------------------------------------
2! Copyright (c) 2019 FrontISTR Commons
3! This software is released under the MIT License, see LICENSE.txt
4!-------------------------------------------------------------------------------
5
6!C
7!C***
8!C*** module hecmw_precond_SSOR_11
9!C***
10!C
12 use hecmw_util
18 !$ use omp_lib
19
20 private
21
25
26 integer(kind=kint) :: N
27 real(kind=kreal), pointer :: d(:) => null()
28 real(kind=kreal), pointer :: al(:) => null()
29 real(kind=kreal), pointer :: au(:) => null()
30 integer(kind=kint), pointer :: indexL(:) => null()
31 integer(kind=kint), pointer :: indexU(:) => null()
32 integer(kind=kint), pointer :: itemL(:) => null()
33 integer(kind=kint), pointer :: itemU(:) => null()
34 real(kind=kreal), pointer :: alu(:) => null()
35
36 integer(kind=kint) :: NContact = 0
37 real(kind=kreal), pointer :: cal(:) => null()
38 real(kind=kreal), pointer :: cau(:) => null()
39 integer(kind=kint), pointer :: indexCL(:) => null()
40 integer(kind=kint), pointer :: indexCU(:) => null()
41 integer(kind=kint), pointer :: itemCL(:) => null()
42 integer(kind=kint), pointer :: itemCU(:) => null()
43
44 integer(kind=kint) :: NColor
45 integer(kind=kint), pointer :: COLORindex(:) => null()
46 integer(kind=kint), pointer :: perm(:) => null()
47 integer(kind=kint), pointer :: iperm(:) => null()
48
49 logical, save :: isFirst = .true.
50
51 logical, save :: INITIALIZED = .false.
52
53contains
54
55 subroutine hecmw_precond_ssor_11_setup(hecMAT)
56 implicit none
57 type(hecmwst_matrix), intent(inout) :: hecmat
58 integer(kind=kint ) :: npl, npu, npcl, npcu
59 real (kind=kreal), allocatable :: cd(:)
60 integer(kind=kint ) :: ncolor_in
61 real (kind=kreal) :: sigma_diag
62 real (kind=kreal) :: alutmp(1,1), pw(1)
63 integer(kind=kint ) :: ii, i, j, k
64 integer(kind=kint ) :: nthreads = 1
65 integer(kind=kint ), allocatable :: perm_tmp(:)
66 !real (kind=kreal) :: t0
67
68 !t0 = hecmw_Wtime()
69 !write(*,*) 'DEBUG: SSOR setup start', hecmw_Wtime()-t0
70
71 if (initialized) then
72 if (hecmat%Iarray(98) == 1) then ! need symbolic and numerical setup
74 else if (hecmat%Iarray(97) == 1) then ! need numerical setup only
75 call hecmw_precond_ssor_11_clear(hecmat) ! TEMPORARY
76 else
77 return
78 endif
79 endif
80
81 !$ nthreads = omp_get_max_threads()
82
83 n = hecmat%N
84 ! N = hecMAT%NP
85 ncolor_in = hecmw_mat_get_ncolor_in(hecmat)
86 sigma_diag = hecmw_mat_get_sigma_diag(hecmat)
87 ncontact = hecmat%cmat%n_val
88
89 if (ncontact.gt.0) then
90 call hecmw_cmat_lu( hecmat )
91 endif
92
93 if (nthreads == 1) then
94 ncolor = 1
95 allocate(colorindex(0:1), perm(n), iperm(n))
96 colorindex(0) = 0
97 colorindex(1) = n
98 do i=1,n
99 perm(i) = i
100 iperm(i) = i
101 end do
102 else
103 allocate(colorindex(0:n), perm_tmp(n), perm(n), iperm(n))
104 call hecmw_matrix_ordering_rcm(n, hecmat%indexL, hecmat%itemL, &
105 hecmat%indexU, hecmat%itemU, perm_tmp, iperm)
106 !write(*,*) 'DEBUG: RCM ordering done', hecmw_Wtime()-t0
107 call hecmw_matrix_ordering_mc(n, hecmat%indexL, hecmat%itemL, &
108 hecmat%indexU, hecmat%itemU, perm_tmp, &
109 ncolor_in, ncolor, colorindex, perm, iperm)
110 !write(*,*) 'DEBUG: MC ordering done', hecmw_Wtime()-t0
111 deallocate(perm_tmp)
112
113 !call write_debug_info
114 endif
115
116 npl = hecmat%indexL(n)
117 npu = hecmat%indexU(n)
118 allocate(indexl(0:n), indexu(0:n), iteml(npl), itemu(npu))
119 call hecmw_matrix_reorder_profile(n, perm, iperm, &
120 hecmat%indexL, hecmat%indexU, hecmat%itemL, hecmat%itemU, &
121 indexl, indexu, iteml, itemu)
122 !write(*,*) 'DEBUG: reordering profile done', hecmw_Wtime()-t0
123
124 !call check_ordering
125
126 allocate(d(n), al(npl), au(npu))
127 call hecmw_matrix_reorder_values(n, 1, perm, iperm, &
128 hecmat%indexL, hecmat%indexU, hecmat%itemL, hecmat%itemU, &
129 hecmat%AL, hecmat%AU, hecmat%D, &
130 indexl, indexu, iteml, itemu, al, au, d)
131 !write(*,*) 'DEBUG: reordering values done', hecmw_Wtime()-t0
132
133 call hecmw_matrix_reorder_renum_item(n, perm, indexl, iteml)
134 call hecmw_matrix_reorder_renum_item(n, perm, indexu, itemu)
135
136 if (ncontact.gt.0) then
137 npcl = hecmat%indexCL(n)
138 npcu = hecmat%indexCU(n)
139 allocate(indexcl(0:n), indexcu(0:n), itemcl(npcl), itemcu(npcu))
140 call hecmw_matrix_reorder_profile(n, perm, iperm, &
141 hecmat%indexCL, hecmat%indexCU, hecmat%itemCL, hecmat%itemCU, &
142 indexcl, indexcu, itemcl, itemcu)
143
144 allocate(cd(n), cal(npcl), cau(npcu))
145 call hecmw_matrix_reorder_values(n, 1, perm, iperm, &
146 hecmat%indexCL, hecmat%indexCU, hecmat%itemCL, hecmat%itemCU, &
147 hecmat%CAL, hecmat%CAU, hecmat%D, &
148 indexcl, indexcu, itemcl, itemcu, cal, cau, cd)
149 deallocate(cd)
150
151 call hecmw_matrix_reorder_renum_item(n, perm, indexcl, itemcl)
152 call hecmw_matrix_reorder_renum_item(n, perm, indexcu, itemcu)
153 end if
154
155 allocate(alu(n))
156 alu = 0.d0
157
158 do ii= 1, n
159 alu(ii) = d(ii)
160 enddo
161
162 if (ncontact.gt.0) then
163 do k= 1, hecmat%cmat%n_val
164 if (hecmat%cmat%pair(k)%i.ne.hecmat%cmat%pair(k)%j) cycle
165 ii = iperm( hecmat%cmat%pair(k)%i )
166 alu(ii) = alu(ii) + hecmat%cmat%pair(k)%val(1, 1)
167 enddo
168 endif
169
170 !$omp parallel default(none),private(ii,ALUtmp,k,i,j,PW),shared(N,ALU,SIGMA_DIAG)
171 !$omp do
172 do ii= 1, n
173 alutmp(1,1)= alu(ii) * sigma_diag
174 alutmp(1,1)= 1.d0/alutmp(1,1)
175 alu(ii)= alutmp(1,1)
176 enddo
177 !$omp end do
178 !$omp end parallel
179
180 isfirst = .true.
181
182 initialized = .true.
183 hecmat%Iarray(98) = 0 ! symbolic setup done
184 hecmat%Iarray(97) = 0 ! numerical setup done
185
186 !write(*,*) 'DEBUG: SSOR setup done', hecmw_Wtime()-t0
187
188 end subroutine hecmw_precond_ssor_11_setup
189
192 implicit none
193 real(kind=kreal), intent(inout) :: zp(:)
194 integer(kind=kint) :: ic, i, iold, j, isl, iel, isu, ieu, k
195 real(kind=kreal) :: sw(1), x(1)
196
197 ! added for turning >>>
198 integer(kind=kint), parameter :: numofblockperthread = 100
199 integer(kind=kint), save :: numofthread = 1, numofblock
200 integer(kind=kint), save, allocatable :: ictoblockindex(:)
201 integer(kind=kint), save, allocatable :: blockindextocolorindex(:)
202 integer(kind=kint), save :: sectorcachesize0, sectorcachesize1
203 integer(kind=kint) :: blockindex, elementcount, numofelement, ii
204 real(kind=kreal) :: numofelementperblock
205 integer(kind=kint) :: my_rank
206
207 if (isfirst) then
208 !$ numOfThread = omp_get_max_threads()
209 numofblock = numofthread * numofblockperthread
210 if (allocated(ictoblockindex)) deallocate(ictoblockindex)
211 if (allocated(blockindextocolorindex)) deallocate(blockindextocolorindex)
212 allocate (ictoblockindex(0:ncolor), &
213 blockindextocolorindex(0:numofblock + ncolor))
214 numofelement = n + indexl(n) + indexu(n)
215 numofelementperblock = dble(numofelement) / numofblock
216 blockindex = 0
217 ictoblockindex = -1
218 ictoblockindex(0) = 0
219 blockindextocolorindex = -1
220 blockindextocolorindex(0) = 0
221 my_rank = hecmw_comm_get_rank()
222 ! write(9000+my_rank,*) &
223 ! '# numOfElementPerBlock =', numOfElementPerBlock
224 ! write(9000+my_rank,*) &
225 ! '# ic, blockIndex, colorIndex, elementCount'
226 do ic = 1, ncolor
227 elementcount = 0
228 ii = 1
229 do i = colorindex(ic-1)+1, colorindex(ic)
230 elementcount = elementcount + 1
231 elementcount = elementcount + (indexl(i) - indexl(i-1))
232 elementcount = elementcount + (indexu(i) - indexu(i-1))
233 if (elementcount > ii * numofelementperblock &
234 .or. i == colorindex(ic)) then
235 ii = ii + 1
236 blockindex = blockindex + 1
237 blockindextocolorindex(blockindex) = i
238 ! write(9000+my_rank,*) ic, blockIndex, &
239 ! blockIndexToColorIndex(blockIndex), elementCount
240 endif
241 enddo
242 ictoblockindex(ic) = blockindex
243 enddo
244 numofblock = blockindex
245
247 sectorcachesize0, sectorcachesize1 )
248
249 isfirst = .false.
250 endif
251 ! <<< added for turning
252
253 !call start_collection("loopInPrecond33")
254
255 !OCL CACHE_SECTOR_SIZE(sectorCacheSize0,sectorCacheSize1)
256 !OCL CACHE_SUBSECTOR_ASSIGN(ZP)
257
258 !$omp parallel default(none) &
259 !$omp&shared(NColor,indexL,itemL,indexU,itemU,AL,AU,D,ALU,perm,&
260 !$omp& NContact,indexCL,itemCL,indexCU,itemCU,CAL,CAU,&
261 !$omp& ZP,icToBlockIndex,blockIndexToColorIndex) &
262 !$omp&private(SW,X,ic,i,iold,isL,ieL,isU,ieU,j,k,blockIndex)
263
264 !C-- FORWARD
265 do ic=1,ncolor
266 !$omp do schedule (static, 1)
267 do blockindex = ictoblockindex(ic-1)+1, ictoblockindex(ic)
268 do i = blockindextocolorindex(blockindex-1)+1, &
269 blockindextocolorindex(blockindex)
270 iold = perm(i)
271 sw(1)= zp(iold)
272 isl= indexl(i-1)+1
273 iel= indexl(i)
274 do j= isl, iel
275 k= iteml(j)
276 x(1)= zp(k)
277 sw(1)= sw(1) - al(j)*x(1)
278 enddo ! j
279
280 if (ncontact.ne.0) then
281 isl= indexcl(i-1)+1
282 iel= indexcl(i)
283 do j= isl, iel
284 k= itemcl(j)
285 x(1)= zp(k)
286 sw(1)= sw(1) - cal(j)*x(1)
287 enddo ! j
288 endif
289
290 x = sw
291 x(1)= alu(i )* x(1)
292 zp(iold)= x(1)
293 enddo ! i
294 enddo ! blockIndex
295 !$omp end do
296 enddo ! ic
297
298 !C-- BACKWARD
299 do ic=ncolor, 1, -1
300 !$omp do schedule (static, 1)
301 do blockindex = ictoblockindex(ic), ictoblockindex(ic-1)+1, -1
302 do i = blockindextocolorindex(blockindex), &
303 blockindextocolorindex(blockindex-1)+1, -1
304 ! do blockIndex = icToBlockIndex(ic-1)+1, icToBlockIndex(ic)
305 ! do i = blockIndexToColorIndex(blockIndex-1)+1, &
306 ! blockIndexToColorIndex(blockIndex)
307 ! do i = endPos(threadNum, ic), startPos(threadNum, ic), -1
308 sw= 0.d0
309 isu= indexu(i-1) + 1
310 ieu= indexu(i)
311 do j= ieu, isu, -1
312 k= itemu(j)
313 x(1)= zp(k)
314 sw(1)= sw(1) + au(j)*x(1)
315 enddo ! j
316
317 if (ncontact.gt.0) then
318 isu= indexcu(i-1) + 1
319 ieu= indexcu(i)
320 do j= ieu, isu, -1
321 k= itemcu(j)
322 x(1)= zp(k)
323 sw(1)= sw(1) + cau(j)*x(1)
324 enddo ! j
325 endif
326
327 x = sw
328 x(1)= alu(i)* x(1)
329
330 iold = perm(i)
331 zp(iold)= zp(iold) - x(1)
332 enddo ! i
333 enddo ! blockIndex
334 !$omp end do
335 enddo ! ic
336 !$omp end parallel
337
338 !OCL END_CACHE_SUBSECTOR
339 !OCL END_CACHE_SECTOR_SIZE
340
341 !call stop_collection("loopInPrecond33")
342
343 end subroutine hecmw_precond_ssor_11_apply
344
346 implicit none
347 type(hecmwst_matrix), intent(inout) :: hecmat
348 integer(kind=kint ) :: nthreads = 1
349 !$ nthreads = omp_get_max_threads()
350 if (associated(colorindex)) deallocate(colorindex)
351 if (associated(perm)) deallocate(perm)
352 if (associated(iperm)) deallocate(iperm)
353 if (associated(alu)) deallocate(alu)
354 if (nthreads >= 1) then
355 if (associated(d)) deallocate(d)
356 if (associated(al)) deallocate(al)
357 if (associated(au)) deallocate(au)
358 if (associated(indexl)) deallocate(indexl)
359 if (associated(indexu)) deallocate(indexu)
360 if (associated(iteml)) deallocate(iteml)
361 if (associated(itemu)) deallocate(itemu)
362 if (ncontact.ne.0) then
363 if (associated(cal)) deallocate(cal)
364 if (associated(cau)) deallocate(cau)
365 if (associated(indexcl)) deallocate(indexcl)
366 if (associated(indexcu)) deallocate(indexcu)
367 if (associated(itemcl)) deallocate(itemcl)
368 if (associated(itemcu)) deallocate(itemcu)
369 end if
370 end if
371 nullify(colorindex)
372 nullify(perm)
373 nullify(iperm)
374 nullify(alu)
375 nullify(d)
376 nullify(al)
377 nullify(au)
378 nullify(indexl)
379 nullify(indexu)
380 nullify(iteml)
381 nullify(itemu)
382 if (ncontact.ne.0) then
383 nullify(cal)
384 nullify(cau)
385 nullify(indexcl)
386 nullify(indexcu)
387 nullify(itemcl)
388 nullify(itemcu)
389 call hecmw_cmat_lu_free( hecmat )
390 endif
391 ncontact = 0
392 initialized = .false.
393 end subroutine hecmw_precond_ssor_11_clear
394
395 subroutine write_debug_info
396 implicit none
397 integer(kind=kint) :: my_rank, ic, in
398 my_rank = hecmw_comm_get_rank()
399 !--------------------> debug: shizawa
400 if (my_rank.eq.0) then
401 write(*,*) 'DEBUG: Output fort.19000+myrank and fort.29000+myrank for coloring information'
402 endif
403 write(19000+my_rank,'(a)') '#NCOLORTot'
404 write(19000+my_rank,*) ncolor
405 write(19000+my_rank,'(a)') '#ic COLORindex(ic-1)+1 COLORindex(ic)'
406 do ic=1,ncolor
407 write(19000+my_rank,*) ic, colorindex(ic-1)+1,colorindex(ic)
408 enddo ! ic
409 write(29000+my_rank,'(a)') '#n_node'
410 write(29000+my_rank,*) n
411 write(29000+my_rank,'(a)') '#in OLDtoNEW(in) NEWtoOLD(in)'
412 do in=1,n
413 write(29000+my_rank,*) in, iperm(in), perm(in)
414 if (perm(iperm(in)) .ne. in) then
415 write(29000+my_rank,*) '** WARNING **: NEWtoOLD and OLDtoNEW: ',in
416 endif
417 enddo
418 end subroutine write_debug_info
419
420 subroutine check_ordering
421 implicit none
422 integer(kind=kint) :: ic, i, j, k
423 integer(kind=kint), allocatable :: iicolor(:)
424 ! check color dependence of neighbouring nodes
425 if (ncolor.gt.1) then
426 allocate(iicolor(n))
427 do ic=1,ncolor
428 do i= colorindex(ic-1)+1, colorindex(ic)
429 iicolor(i) = ic
430 enddo ! i
431 enddo ! ic
432 ! FORWARD: L-part
433 do ic=1,ncolor
434 do i= colorindex(ic-1)+1, colorindex(ic)
435 do j= indexl(i-1)+1, indexl(i)
436 k= iteml(j)
437 if (iicolor(i).eq.iicolor(k)) then
438 write(*,*) .eq.'** ERROR **: L-part: iicolor(i)iicolor(k)',i,k,iicolor(i)
439 endif
440 enddo ! j
441 enddo ! i
442 enddo ! ic
443 ! BACKWARD: U-part
444 do ic=ncolor, 1, -1
445 do i= colorindex(ic), colorindex(ic-1)+1, -1
446 do j= indexu(i-1)+1, indexu(i)
447 k= itemu(j)
448 if (iicolor(i).eq.iicolor(k)) then
449 write(*,*) .eq.'** ERROR **: U-part: iicolor(i)iicolor(k)',i,k,iicolor(i)
450 endif
451 enddo ! j
452 enddo ! i
453 enddo ! ic
454 deallocate(iicolor)
455 endif ! if (NColor.gt.1)
456 !--------------------< debug: shizawa
457 end subroutine check_ordering
458
459end module hecmw_precond_ssor_11
subroutine, public hecmw_cmat_lu(hecmat)
subroutine, public hecmw_cmat_lu_free(hecmat)
integer(kind=kint) function, public hecmw_mat_get_ncolor_in(hecmat)
real(kind=kreal) function, public hecmw_mat_get_sigma_diag(hecmat)
subroutine, public hecmw_matrix_reorder_renum_item(n, perm, indexxp, itemxp)
subroutine, public hecmw_matrix_reorder_values(n, ndof, perm, iperm, indexl, indexu, iteml, itemu, al, au, d, indexlp, indexup, itemlp, itemup, alp, aup, dp)
subroutine, public hecmw_matrix_reorder_profile(n, perm, iperm, indexl, indexu, iteml, itemu, indexlp, indexup, itemlp, itemup)
subroutine, public hecmw_precond_ssor_11_apply(zp)
subroutine, public hecmw_precond_ssor_11_setup(hecmat)
subroutine, public hecmw_precond_ssor_11_clear(hecmat)
subroutine, public hecmw_tuning_fx_calc_sector_cache(n, ndof, sectorcachesize0, sectorcachesize1)
I/O and Utility.
Definition: hecmw_util_f.F90:7
integer(kind=4), parameter kreal
integer(kind=kint) function hecmw_comm_get_rank()
subroutine, public hecmw_matrix_ordering_rcm(n, indexl, iteml, indexu, itemu, perm, iperm)
subroutine, public hecmw_matrix_ordering_mc(n, indexl, iteml, indexu, itemu, perm_cur, ncolor_in, ncolor_out, colorindex, perm, iperm)