FrontISTR 5.2.0
Large-scale structural analysis program with finit element method
Loading...
Searching...
No Matches
hecmw_solver_GPBiCG.f90
Go to the documentation of this file.
1!-------------------------------------------------------------------------------
2! Copyright (c) 2019 FrontISTR Commons
3! This software is released under the MIT License, see LICENSE.txt
4!-------------------------------------------------------------------------------
5
6!C***
7!C*** module hecmw_solver_GPBiCG
8!C***
9!
11
12 private
13 public :: hecmw_solve_gpbicg
14
15contains
16 !C
17 !C*** hecmw_solve_GPBiCG
18 !C
19 subroutine hecmw_solve_gpbicg( hecMESH, hecMAT, ITER, RESID, error, &
20 & Tset, Tsol, Tcomm )
21 use hecmw_util
29
30 implicit none
31
32 type(hecmwst_local_mesh) :: hecmesh
33 type(hecmwst_matrix) :: hecmat
34 integer(kind=kint ), intent(inout):: iter, error
35 real (kind=kreal), intent(inout):: resid, tset, tsol, tcomm
36
37 integer(kind=kint ) :: n, np, ndof, nndof
38 integer(kind=kint ) :: my_rank
39
40 real (kind=kreal), pointer :: b(:)
41 real (kind=kreal), pointer :: x(:)
42
43 integer(kind=kint ) :: iterlog, timelog
44
45 real(kind=kreal), dimension(:,:), allocatable :: ww
46
47 real(kind=kreal), dimension(2) :: rr
48
49 integer(kind=kint ) :: maxit
50 real (kind=kreal) :: tol
51 integer(kind=kint ) :: i,j
52 real (kind=kreal) :: s_time,s1_time,e_time,e1_time
53 real (kind=kreal) :: bnrm2
54 real (kind=kreal) :: rho,rho1,beta,alpha,dnrm2
55 real (kind=kreal) :: qsi,eta,coef1
56 real (kind=kreal) :: t_max,t_min,t_avg,t_sd
57
58 integer(kind=kint), parameter :: r= 1
59 integer(kind=kint), parameter ::rt= 2
60 integer(kind=kint), parameter :: t= 3
61 integer(kind=kint), parameter ::tt= 4
62 integer(kind=kint), parameter ::t0= 5
63 integer(kind=kint), parameter :: p= 6
64 integer(kind=kint), parameter ::pt= 7
65 integer(kind=kint), parameter :: u= 8
66 integer(kind=kint), parameter ::w1= 9
67 integer(kind=kint), parameter :: y=10
68 integer(kind=kint), parameter :: z=11
69 integer(kind=kint), parameter ::wk=12
70 integer(kind=kint), parameter ::w2=13
71 integer(kind=kint), parameter ::zq=14
72
73 integer(kind=kint), parameter :: n_iter_recompute_r= 20
74
75 call hecmw_barrier(hecmesh)
76 s_time= hecmw_wtime()
77 !C
78 !C-- INIT.
79 n = hecmat%N
80 np = hecmat%NP
81 ndof = hecmat%NDOF
82 nndof = n * ndof
83 my_rank = hecmesh%my_rank
84 x => hecmat%X
85 b => hecmat%B
86
87 iterlog = hecmw_mat_get_iterlog( hecmat )
88 timelog = hecmw_mat_get_timelog( hecmat )
89 maxit = hecmw_mat_get_iter( hecmat )
90 tol = hecmw_mat_get_resid( hecmat )
91
92 error= 0
93 beta = 0.0d0
94
95 allocate (ww(ndof*np,14))
96 ww= 0.d0
97
98 !C
99 !C-- SCALING
100 call hecmw_solver_scaling_fw(hecmesh, hecmat, tcomm)
101
102 !C===
103 !C +----------------------+
104 !C | SETUP PRECONDITIONER |
105 !C +----------------------+
106 !C===
107 call hecmw_precond_setup(hecmat, hecmesh, 0)
108
109 !C
110 !C +----------------------+
111 !C | {r}= {b} - [A]{xini} |
112 !C +----------------------+
113 !C===
114 call hecmw_matresid(hecmesh, hecmat, x, b, ww(:,r), tcomm)
115
116 do i= 1, nndof
117 ww(i,rt)= ww(i,r)
118 enddo
119 !C==
120 call hecmw_innerproduct_r(hecmesh, ndof, b, b, bnrm2, tcomm)
121 if (bnrm2.eq.0.d0) then
122 iter = 0
123 maxit = 0
124 resid = 0.d0
125 x = 0.d0
126 endif
127
128 call hecmw_innerproduct_r(hecmesh, ndof, ww(:,rt), ww(:,r), rho, tcomm)
129
130 e_time= hecmw_wtime()
131 if (timelog.eq.2) then
132 call hecmw_time_statistics(hecmesh, e_time - s_time, &
133 t_max, t_min, t_avg, t_sd)
134 if (hecmesh%my_rank.eq.0) then
135 write(*,*) 'Time solver setup'
136 write(*,*) ' Max :',t_max
137 write(*,*) ' Min :',t_min
138 write(*,*) ' Avg :',t_avg
139 write(*,*) ' Std Dev :',t_sd
140 endif
141 tset = t_max
142 else
143 tset = e_time - s_time
144 endif
145 !C===
146
147 !C
148 !C*************************************************************** ITERATIVE PROC.
149 !C
150 call hecmw_barrier(hecmesh)
151 s1_time= hecmw_wtime()
152 do iter= 1, maxit
153 !C
154 !C +----------------+
155 !C | {r}= [Minv]{r} |
156 !C +----------------+
157 !C===
158 do j= 1, nndof
159 ww(j,wk)= ww(j,r)
160 enddo
161
162 call hecmw_precond_apply(hecmesh, hecmat, ww(:,wk), ww(:,r), ww(:,zq), tcomm)
163 !C===
164
165 !C
166 !C +----------------------------------+
167 !C | {p} = {r} + BETA * ( {p} - {u} ) |
168 !C +----------------------------------+
169 !C===
170 if (iter.gt.1) then
171 do j= 1, nndof
172 ww(j,p)= ww(j,r) + beta*( ww(j,p)-ww(j,u))
173 enddo
174 else
175 do j= 1, nndof
176 ww(j,p)= ww(j,r)
177 enddo
178 endif
179 !C===
180
181 !C
182 !C +--------------------------------+
183 !C | ALPHA= {r_tld}{r}/{r_tld} A{p} |
184 !C +--------------------------------+
185 !C===
186
187 !C
188 !C-- calc. {p_tld}= A{p}
189 call hecmw_matvec(hecmesh, hecmat, ww(:,p), ww(:,pt), tcomm)
190
191 !C
192 !C-- calc. ALPHA
193 call hecmw_innerproduct_r(hecmesh, ndof, ww(:,rt), ww(:,pt), rho1, tcomm)
194
195 alpha= rho / rho1
196 !C===
197
198 !C
199 !C +------------------------------------------+
200 !C | {y}= {t} - {r} - ALPHA{w} + ALPHA{p_tld} |
201 !C | {t}= {r} - ALPHA{p_tld} |
202 !C +------------------------------------------+
203 !C===
204 do j= 1, nndof
205 ww(j,y)= ww(j,t) - ww(j,wk) + alpha*(-ww(j,w1) + ww(j,pt))
206 ww(j,t)= ww(j,wk) - alpha*ww(j,pt)
207 enddo
208 !C===
209
210 !C
211 !C +-----------------------+
212 !C | {t_tld}= [A][Minv]{t} |
213 !C +-----------------------+
214 !C===
215
216 !C
217 !C-- calc. {t_tld} and {t0} by [M] inversion
218 !C {W2} = [Minv]{p_tld}
219 !C
220 call hecmw_precond_apply(hecmesh, hecmat, ww(:,t), ww(:,tt), ww(:,zq), tcomm)
221 call hecmw_precond_apply(hecmesh, hecmat, ww(:,t0), ww(:,w2), ww(:,zq), tcomm)
222 do i= 1, nndof
223 ww(i,t0)= ww(i,w2)
224 enddo
225 call hecmw_precond_apply(hecmesh, hecmat, ww(:,pt), ww(:,w2), ww(:,zq), tcomm)
226
227 !C===
228
229 !C
230 !C-- calc. [A]{t_tld}
231 call hecmw_matvec(hecmesh, hecmat, ww(:,tt), ww(:,wk), tcomm)
232
233 do i= 1, nndof
234 ww(i,tt)= ww(i,wk)
235 enddo
236 !C===
237
238 !C
239 !C +-------------------+
240 !C | calc. QSI and ETA |
241 !C +-------------------+
242 !C===
243 !call pol_coef(iter, WW, T, TT, Y, QSI, ETA)
244 !call pol_coef_vanilla(iter, WW, T, TT, Y, QSI, ETA)
245 call pol_coef_vanilla2(iter, ww, t, tt, y, qsi, eta)
246 !C===
247
248 !C
249 !C +----------------------------------------------------------+
250 !C | {u} = QSI [Minv]{pt} + ETA([Minv]{t0}-[Minv]{r}+BETA*{u} |
251 !C | {z} = QSI [Minv]{r} + ETA*{z} - ALPHA*{u} |
252 !C +----------------------------------------------------------+
253 !C===
254
255 !C
256 !C-- compute. {u},{z}
257
258 if (iter.gt.1) then
259 do j= 1, nndof
260 ww(j,u)= qsi* ww(j,w2) + eta*(ww(j,t0) - ww(j,r) + beta*ww(j,u))
261 ww(j,z)= qsi* ww(j, r) + eta* ww(j, z) - alpha*ww(j,u)
262 enddo
263 else
264 do j= 1, nndof
265 ww(j,u)= qsi* ww(j,w2) + eta*(ww(j,t0) - ww(j,r))
266 ww(j,z)= qsi* ww(j, r) + eta* ww(j, z) - alpha*ww(j,u)
267 enddo
268 endif
269 !C===
270
271 !C
272 !C +--------------------+
273 !C | update {x},{r},{w} |
274 !C +--------------------+
275 !C===
276 do j= 1, nndof
277 x(j)= x(j) + alpha*ww(j,p) + ww(j,z)
278 ! WW(j,R)= WW(j,T) - ETA*WW(j,Y) - QSI*WW(j,TT)
279 ww(j,t0)= ww(j,t)
280 enddo
281 !C
282 !C--- recompute R sometimes
283 if ( mod(iter,n_iter_recompute_r)==0 ) then
284 call hecmw_matresid(hecmesh, hecmat, x, b, ww(:,r), tcomm)
285 else
286 do j= 1, nndof
287 ww(j,r)= ww(j,t) - eta*ww(j,y) - qsi*ww(j,tt)
288 enddo
289 endif
290
291 call hecmw_innerproduct_r_nocomm(hecmesh, ndof, ww(:,r), ww(:,r), rr(1))
292 call hecmw_innerproduct_r_nocomm(hecmesh, ndof, ww(:,r), ww(:,rt), rr(2))
293 s_time= hecmw_wtime()
294 call hecmw_allreduce_r(hecmesh, rr, 2, hecmw_sum)
295 e_time= hecmw_wtime()
296 tcomm = tcomm + e_time - s_time
297 dnrm2 = rr(1)
298 coef1 = rr(2)
299
300 beta = alpha*coef1 / (qsi*rho)
301 do j= 1, nndof
302 ww(j,w1)= ww(j,tt) + beta*ww(j,pt)
303 enddo
304
305 resid= dsqrt(dnrm2/bnrm2)
306 rho = coef1
307
308 !C##### ITERATION HISTORY
309 if (my_rank.eq.0 .and. iterlog.eq.1) &
310 & write (*, 1000) iter, resid
311 1000 format (i5, 1pe16.6)
312 !C#####
313
314 if (resid.le.tol ) then
315 if ( mod(iter,n_iter_recompute_r)==0 ) exit
316 !C----- recompute R to make sure it is really converged
317 call hecmw_matresid(hecmesh, hecmat, x, b, ww(:,r), tcomm)
318 call hecmw_innerproduct_r(hecmesh, ndof, ww(:,r), ww(:,r), dnrm2, tcomm)
319 resid= dsqrt(dnrm2/bnrm2)
320 if (resid.le.tol) exit
321 endif
322 if ( iter.eq.maxit ) error= hecmw_solver_error_noconv_maxit
323 !C===
324 enddo
325
326 call hecmw_solver_scaling_bk(hecmat)
327 !C
328 !C-- INTERFACE data EXCHANGE
329
330 s_time = hecmw_wtime()
331 call hecmw_update_m_r (hecmesh, x, hecmat%NP, hecmat%NDOF)
332 e_time = hecmw_wtime()
333 tcomm = tcomm + e_time - s_time
334
335 deallocate (ww)
336 !call hecmw_precond_clear(hecMAT)
337
338 e1_time= hecmw_wtime()
339 if (timelog.eq.2) then
340 call hecmw_time_statistics(hecmesh, e1_time - s1_time, &
341 t_max, t_min, t_avg, t_sd)
342 if (hecmesh%my_rank.eq.0) then
343 write(*,*) 'Time solver iterations'
344 write(*,*) ' Max :',t_max
345 write(*,*) ' Min :',t_min
346 write(*,*) ' Avg :',t_avg
347 write(*,*) ' Std Dev :',t_sd
348 endif
349 tsol = t_max
350 else
351 tsol = e1_time - s1_time
352 endif
353
354 contains
355
356 !C
357 !C*** pol_coef : computes QSI and ETA in original GPBiCG way
358 !C
359 subroutine pol_coef(iter, WW, T, TT, Y, QSI, ETA)
360 implicit none
361 integer(kind=kint), intent(in) :: iter
362 real(kind=kreal), intent(in) :: ww(:,:)
363 integer(kind=kint), intent(in) :: T, TT, Y
364 real(kind=kreal), intent(out) :: qsi, eta
365
366 real(kind=kreal), dimension(5) :: cg
367 real(kind=kreal), dimension(2) :: eq
368 real(kind=kreal) :: delta
369
370 call hecmw_innerproduct_r_nocomm(hecmesh, ndof, ww(:,y ), ww(:,y ), cg(1)) ! myu1
371 call hecmw_innerproduct_r_nocomm(hecmesh, ndof, ww(:,tt), ww(:,t ), cg(2)) ! omega2
372 call hecmw_innerproduct_r_nocomm(hecmesh, ndof, ww(:,y ), ww(:,t ), cg(3)) ! omega1
373 call hecmw_innerproduct_r_nocomm(hecmesh, ndof, ww(:,tt), ww(:,y ), cg(4)) ! nyu
374 call hecmw_innerproduct_r_nocomm(hecmesh, ndof, ww(:,tt), ww(:,tt), cg(5)) ! myu2
375 s_time= hecmw_wtime()
376 call hecmw_allreduce_r(hecmesh, cg, 5, hecmw_sum)
377 e_time= hecmw_wtime()
378 tcomm = tcomm + e_time - s_time
379
380 if (iter.eq.1) then
381 eq(1)= cg(2)/cg(5) ! omega2 / myu2
382 eq(2)= 0.d0
383 else
384 delta= (cg(5)*cg(1)-cg(4)*cg(4)) ! myu1*myu2 - nyu^2
385 eq(1)= (cg(1)*cg(2)-cg(3)*cg(4)) / delta ! (myu1*omega2-nyu*omega1)/delta
386 eq(2)= (cg(5)*cg(3)-cg(4)*cg(2)) / delta ! (myu2*omega1-nyu*omega2)/delta
387 endif
388
389 qsi= eq(1)
390 eta= eq(2)
391 end subroutine pol_coef
392
393 !C
394 !C*** pol_coef_vanilla : computes QSI and ETA with vanilla strategy
395 !C see Fujino, Abe, Sugihara and Nakashima(2013) ISBN978-4-621-08741-1
396 !C
397 subroutine pol_coef_vanilla(iter, WW, T, TT, Y, QSI, ETA)
398 implicit none
399 integer(kind=kint), intent(in) :: iter
400 real(kind=kreal), intent(inout) :: ww(:,:)
401 integer(kind=kint), intent(in) :: T, TT, Y
402 real(kind=kreal), intent(out) :: qsi, eta
403
404 real(kind=kreal), dimension(3) :: cg
405 real(kind=kreal) :: gamma1, gamma2
406 real(kind=kreal) :: c, c_abs
407
408 real(kind=kreal), parameter :: omega = 0.707106781d0
409
410 if (iter.eq.1) then
411 gamma1 = 0.d0
412 gamma2 = 0.d0
413 else
414 call hecmw_innerproduct_r_nocomm(hecmesh, ndof, ww(:,y ), ww(:,y ), cg(1)) ! myu
415 call hecmw_innerproduct_r_nocomm(hecmesh, ndof, ww(:,y ), ww(:,tt), cg(2)) ! nyu
416 call hecmw_innerproduct_r_nocomm(hecmesh, ndof, ww(:,y ), ww(:,t ), cg(3)) ! omega
417 s_time= hecmw_wtime()
418 call hecmw_allreduce_r(hecmesh, cg, 3, hecmw_sum)
419 e_time= hecmw_wtime()
420 tcomm = tcomm + e_time - s_time
421 gamma1 = cg(3)/cg(1) ! omega / myu
422 gamma2 = cg(2)/cg(1) ! nyu / myu
423 !!! COMMENTED OUT because no convergence obtained with following updates.
424 ! do j= 1, NNDOF
425 ! WW(j,T )= WW(j,T ) - gamma1*WW(j,Y)
426 ! WW(j,TT)= WW(j,TT) - gamma2*WW(j,Y)
427 ! enddo
428 endif
429
430 call hecmw_innerproduct_r_nocomm(hecmesh, ndof, ww(:,t ), ww(:,t ), cg(1)) ! |r|^2
431 call hecmw_innerproduct_r_nocomm(hecmesh, ndof, ww(:,tt), ww(:,tt), cg(2)) ! |s|^2
432 call hecmw_innerproduct_r_nocomm(hecmesh, ndof, ww(:,t ), ww(:,tt), cg(3)) ! r.s
433 s_time= hecmw_wtime()
434 call hecmw_allreduce_r(hecmesh, cg, 3, hecmw_sum)
435 e_time= hecmw_wtime()
436 tcomm = tcomm + e_time - s_time
437
438 c = cg(3) / dsqrt(cg(1)*cg(2))
439 c_abs = dabs(c)
440 if (c_abs > omega) then
441 qsi = c * dsqrt(cg(1)/cg(2))
442 else
443 ! QSI = (c / c_abs) * OMEGA * dsqrt(CG(1)/CG(2))
444 if (c >= 0.d0) then
445 qsi = omega * dsqrt(cg(1)/cg(2))
446 else
447 qsi = -omega * dsqrt(cg(1)/cg(2))
448 endif
449 endif
450 eta = gamma1 - qsi*gamma2
451 end subroutine pol_coef_vanilla
452
453 !C
454 !C*** pol_coef_vanilla2 : optimized version of pol_coef_vanilla
455 !C
456 subroutine pol_coef_vanilla2(iter, WW, T, TT, Y, QSI, ETA)
457 implicit none
458 integer(kind=kint), intent(in) :: iter
459 real(kind=kreal), intent(inout) :: ww(:,:)
460 integer(kind=kint), intent(in) :: T, TT, Y
461 real(kind=kreal), intent(out) :: qsi, eta
462
463 real(kind=kreal), dimension(6) :: cg
464 real(kind=kreal) :: gamma1, gamma2
465 real(kind=kreal) :: c, c_abs
466
467 real(kind=kreal), parameter :: omega = 0.707106781d0
468
469 call hecmw_innerproduct_r_nocomm(hecmesh, ndof, ww(:,t ), ww(:,t ), cg(1)) ! |r|^2
470 call hecmw_innerproduct_r_nocomm(hecmesh, ndof, ww(:,tt), ww(:,tt), cg(2)) ! |s|^2
471 call hecmw_innerproduct_r_nocomm(hecmesh, ndof, ww(:,t ), ww(:,tt), cg(3)) ! r.s
472
473 if (iter.gt.1) then
474 call hecmw_innerproduct_r_nocomm(hecmesh, ndof, ww(:,y ), ww(:,y ), cg(4)) ! myu
475 call hecmw_innerproduct_r_nocomm(hecmesh, ndof, ww(:,y ), ww(:,tt), cg(5)) ! nyu
476 call hecmw_innerproduct_r_nocomm(hecmesh, ndof, ww(:,y ), ww(:,t ), cg(6)) ! omega
477 s_time= hecmw_wtime()
478 call hecmw_allreduce_r(hecmesh, cg, 6, hecmw_sum)
479 e_time= hecmw_wtime()
480 tcomm = tcomm + e_time - s_time
481 gamma1 = cg(6)/cg(4) ! omega / myu
482 gamma2 = cg(5)/cg(4) ! nyu / myu
483 else
484 s_time= hecmw_wtime()
485 call hecmw_allreduce_r(hecmesh, cg, 3, hecmw_sum)
486 e_time= hecmw_wtime()
487 tcomm = tcomm + e_time - s_time
488 gamma1 = 0.d0
489 gamma2 = 0.d0
490 endif
491
492 c = cg(3) / dsqrt(cg(1)*cg(2))
493 c_abs = dabs(c)
494 if (c_abs > omega) then
495 qsi = c * dsqrt(cg(1)/cg(2))
496 else
497 if (c >= 0.d0) then
498 qsi = omega * dsqrt(cg(1)/cg(2))
499 else
500 qsi = -omega * dsqrt(cg(1)/cg(2))
501 endif
502 endif
503 eta = gamma1 - qsi*gamma2
504 end subroutine pol_coef_vanilla2
505
506 end subroutine hecmw_solve_gpbicg
507
508end module hecmw_solver_gpbicg
subroutine pol_coef(iter, ww, t, tt, y, qsi, eta)
integer(kind=kint) function, public hecmw_mat_get_timelog(hecmat)
real(kind=kreal) function, public hecmw_mat_get_resid(hecmat)
integer(kind=kint) function, public hecmw_mat_get_iter(hecmat)
integer(kind=kint) function, public hecmw_mat_get_iterlog(hecmat)
subroutine, public hecmw_precond_setup(hecmat, hecmesh, sym)
subroutine, public hecmw_precond_apply(hecmesh, hecmat, r, z, zp, commtime)
subroutine, public hecmw_solve_gpbicg(hecmesh, hecmat, iter, resid, error, tset, tsol, tcomm)
subroutine, public hecmw_matresid(hecmesh, hecmat, x, b, r, commtime)
subroutine, public hecmw_matvec(hecmesh, hecmat, x, y, commtime)
subroutine hecmw_innerproduct_r_nocomm(hecmesh, ndof, x, y, sum)
subroutine hecmw_time_statistics(hecmesh, time, t_max, t_min, t_avg, t_sd)
subroutine hecmw_innerproduct_r(hecmesh, ndof, x, y, sum, commtime)
subroutine, public hecmw_solver_scaling_fw(hecmesh, hecmat, commtime)
subroutine, public hecmw_solver_scaling_bk(hecmat)
I/O and Utility.
Definition: hecmw_util_f.F90:7
integer(kind=kint), parameter hecmw_sum
integer(kind=4), parameter kreal
real(kind=kreal) function hecmw_wtime()
subroutine hecmw_barrier(hecmesh)
subroutine hecmw_update_m_r(hecmesh, val, n, m)
subroutine hecmw_allreduce_r(hecmesh, val, n, ntag)
integer(kind=kint), parameter hecmw_solver_error_noconv_maxit