FrontISTR 5.2.0
Large-scale structural analysis program with finit element method
Loading...
Searching...
No Matches
hecmw_solver_las_66.f90
Go to the documentation of this file.
1!-------------------------------------------------------------------------------
2! Copyright (c) 2019 FrontISTR Commons
3! This software is released under the MIT License, see LICENSE.txt
4!-------------------------------------------------------------------------------
5
7 use hecmw_util
8 implicit none
9
10 private
11
12 public :: hecmw_matvec_66
13 public :: hecmw_matresid_66
14 public :: hecmw_rel_resid_l2_66
15 public :: hecmw_tvec_66
16 public :: hecmw_ttvec_66
17 public :: hecmw_ttmattvec_66
18
19contains
20
21 !C
22 !C***
23 !C*** hecmw_matvec_66
24 !C***
25 !C
26 subroutine hecmw_matvec_66 (hecMESH, hecMAT, X, Y, time_Ax, COMMtime)
27 use hecmw_util
33 !$ use omp_lib
34
35 implicit none
36 type (hecmwst_local_mesh), intent(in) :: hecmesh
37 type (hecmwst_matrix), intent(in), target :: hecmat
38 real(kind=kreal), intent(in) :: x(:)
39 real(kind=kreal), intent(out) :: y(:)
40 real(kind=kreal), intent(inout) :: time_ax
41 real(kind=kreal), intent(inout), optional :: commtime
42
43 real(kind=kreal) :: start_time, end_time, tcomm
44 integer(kind=kint) :: i, j, js, je, in
45 real(kind=kreal) :: yv1, yv2, yv3, x1, x2, x3
46 real(kind=kreal) :: yv4, yv5, yv6, x4, x5, x6
47
48 integer(kind=kint) :: n, np
49 integer(kind=kint), pointer :: indexl(:), iteml(:), indexu(:), itemu(:)
50 real(kind=kreal), pointer :: al(:), au(:), d(:)
51
52 ! added for turning >>>
53 integer, parameter :: numofblockperthread = 100
54 logical, save :: isfirst = .true.
55 integer, save :: numofthread = 1
56 integer, save, allocatable :: startpos(:), endpos(:)
57 integer(kind=kint), save :: sectorcachesize0, sectorcachesize1
58 integer(kind=kint) :: threadnum, blocknum, numofblock
59 integer(kind=kint) :: numofelement, elementcount, blockindex
60 real(kind=kreal) :: numofelementperblock
61 ! <<< added for turning
62
63 if (hecmw_mat_get_usejad(hecmat).ne.0) then
64 tcomm = 0.d0
65 start_time = hecmw_wtime()
66 call hecmw_jad_matvec(hecmesh, hecmat, x, y, tcomm)
67 end_time = hecmw_wtime()
68 time_ax = time_ax + end_time - start_time - tcomm
69 if (present(commtime)) commtime = commtime + tcomm
70 else
71
72 n = hecmat%N
73 np = hecmat%NP
74 indexl => hecmat%indexL
75 indexu => hecmat%indexU
76 iteml => hecmat%itemL
77 itemu => hecmat%itemU
78 al => hecmat%AL
79 au => hecmat%AU
80 d => hecmat%D
81
82 ! added for turning >>>
83 if (isfirst .eqv. .true.) then
84 !$ numOfThread = omp_get_max_threads()
85 numofblock = numofthread * numofblockperthread
86 allocate (startpos(0 : numofblock - 1), endpos(0 : numofblock - 1))
87 numofelement = n + indexl(n) + indexu(n)
88 numofelementperblock = dble(numofelement) / numofblock
89 blocknum = 0
90 elementcount = 0
91 startpos(blocknum) = 1
92 do i= 1, n
93 elementcount = elementcount + 1
94 elementcount = elementcount + (indexl(i) - indexl(i-1))
95 elementcount = elementcount + (indexu(i) - indexu(i-1))
96 if (elementcount > (blocknum + 1) * numofelementperblock) then
97 endpos(blocknum) = i
98 ! write(9000+hecMESH%my_rank,*) mod(blockNum, numOfThread), &
99 ! startPos(blockNum), endPos(blockNum)
100 blocknum = blocknum + 1
101 startpos(blocknum) = i + 1
102 if (blocknum == (numofblock - 1)) exit
103 endif
104 enddo
105 endpos(blocknum) = n
106 ! write(9000+hecMESH%my_rank,*) mod(blockNum, numOfThread), &
107 ! startPos(blockNum), endPos(blockNum)
108 ! for irregular data
109 do i= blocknum+1, numofblock-1
110 startpos(i) = n
111 endpos(i) = n-1
112 ! write(9000+hecMESH%my_rank,*) mod(i, numOfThread), &
113 ! startPos(i), endPos(i)
114 end do
115
117 sectorcachesize0, sectorcachesize1)
118
119 isfirst = .false.
120 endif
121 ! <<< added for turning
122
123 start_time= hecmw_wtime()
124 call hecmw_update_6_r (hecmesh, x, np)
125 end_time= hecmw_wtime()
126 if (present(commtime)) commtime = commtime + end_time - start_time
127
128 start_time = hecmw_wtime()
129
130 !call fapp_start("loopInMatvec66", 1, 0)
131 !call start_collection("loopInMatvec66")
132
133 !OCL CACHE_SECTOR_SIZE(sectorCacheSize0,sectorCacheSize1)
134 !OCL CACHE_SUBSECTOR_ASSIGN(X)
135
136 !$OMP PARALLEL DEFAULT(NONE) &
137 !$OMP&PRIVATE(i,X1,X2,X3,X4,X5,X6,YV1,YV2,YV3,YV4,YV5,YV6,jS,jE,j,in,threadNum,blockNum,blockIndex) &
138 !$OMP&SHARED(D,AL,AU,indexL,itemL,indexU,itemU,X,Y,startPos,endPos,numOfThread)
139 threadnum = 0
140 !$ threadNum = omp_get_thread_num()
141 do blocknum = 0 , numofblockperthread - 1
142 blockindex = blocknum * numofthread + threadnum
143 do i = startpos(blockindex), endpos(blockindex)
144 x1= x(6*i-5)
145 x2= x(6*i-4)
146 x3= x(6*i-3)
147 x4= x(6*i-2)
148 x5= x(6*i-1)
149 x6= x(6*i )
150 yv1= d(36*i-35)*x1 + d(36*i-34)*x2 + d(36*i-33)*x3 + d(36*i-32)*x4 + d(36*i-31)*x5 + d(36*i-30)*x6
151 yv2= d(36*i-29)*x1 + d(36*i-28)*x2 + d(36*i-27)*x3 + d(36*i-26)*x4 + d(36*i-25)*x5 + d(36*i-24)*x6
152 yv3= d(36*i-23)*x1 + d(36*i-22)*x2 + d(36*i-21)*x3 + d(36*i-20)*x4 + d(36*i-19)*x5 + d(36*i-18)*x6
153 yv4= d(36*i-17)*x1 + d(36*i-16)*x2 + d(36*i-15)*x3 + d(36*i-14)*x4 + d(36*i-13)*x5 + d(36*i-12)*x6
154 yv5= d(36*i-11)*x1 + d(36*i-10)*x2 + d(36*i-9 )*x3 + d(36*i-8 )*x4 + d(36*i-7 )*x5 + d(36*i-6 )*x6
155 yv6= d(36*i-5 )*x1 + d(36*i-4 )*x2 + d(36*i-3 )*x3 + d(36*i-2 )*x4 + d(36*i-1 )*x5 + d(36*i )*x6
156
157 js= indexl(i-1) + 1
158 je= indexl(i )
159 do j= js, je
160 in = iteml(j)
161 x1= x(6*in-5)
162 x2= x(6*in-4)
163 x3= x(6*in-3)
164 x4= x(6*in-2)
165 x5= x(6*in-1)
166 x6= x(6*in )
167 yv1= yv1 + al(36*j-35)*x1 + al(36*j-34)*x2 + al(36*j-33)*x3 + al(36*j-32)*x4 + al(36*j-31)*x5 + al(36*j-30)*x6
168 yv2= yv2 + al(36*j-29)*x1 + al(36*j-28)*x2 + al(36*j-27)*x3 + al(36*j-26)*x4 + al(36*j-25)*x5 + al(36*j-24)*x6
169 yv3= yv3 + al(36*j-23)*x1 + al(36*j-22)*x2 + al(36*j-21)*x3 + al(36*j-20)*x4 + al(36*j-19)*x5 + al(36*j-18)*x6
170 yv4= yv4 + al(36*j-17)*x1 + al(36*j-16)*x2 + al(36*j-15)*x3 + al(36*j-14)*x4 + al(36*j-13)*x5 + al(36*j-12)*x6
171 yv5= yv5 + al(36*j-11)*x1 + al(36*j-10)*x2 + al(36*j-9 )*x3 + al(36*j-8 )*x4 + al(36*j-7 )*x5 + al(36*j-6 )*x6
172 yv6= yv6 + al(36*j-5 )*x1 + al(36*j-4 )*x2 + al(36*j-3 )*x3 + al(36*j-2 )*x4 + al(36*j-1 )*x5 + al(36*j )*x6
173 enddo
174 js= indexu(i-1) + 1
175 je= indexu(i )
176 do j= js, je
177 in = itemu(j)
178 x1= x(6*in-5)
179 x2= x(6*in-4)
180 x3= x(6*in-3)
181 x4= x(6*in-2)
182 x5= x(6*in-1)
183 x6= x(6*in )
184 yv1= yv1 + au(36*j-35)*x1 + au(36*j-34)*x2 + au(36*j-33)*x3 + au(36*j-32)*x4 + au(36*j-31)*x5 + au(36*j-30)*x6
185 yv2= yv2 + au(36*j-29)*x1 + au(36*j-28)*x2 + au(36*j-27)*x3 + au(36*j-26)*x4 + au(36*j-25)*x5 + au(36*j-24)*x6
186 yv3= yv3 + au(36*j-23)*x1 + au(36*j-22)*x2 + au(36*j-21)*x3 + au(36*j-20)*x4 + au(36*j-19)*x5 + au(36*j-18)*x6
187 yv4= yv4 + au(36*j-17)*x1 + au(36*j-16)*x2 + au(36*j-15)*x3 + au(36*j-14)*x4 + au(36*j-13)*x5 + au(36*j-12)*x6
188 yv5= yv5 + au(36*j-11)*x1 + au(36*j-10)*x2 + au(36*j-9 )*x3 + au(36*j-8 )*x4 + au(36*j-7 )*x5 + au(36*j-6 )*x6
189 yv6= yv6 + au(36*j-5 )*x1 + au(36*j-4 )*x2 + au(36*j-3 )*x3 + au(36*j-2 )*x4 + au(36*j-1 )*x5 + au(36*j )*x6
190 enddo
191 y(6*i-5)= yv1
192 y(6*i-4)= yv2
193 y(6*i-3)= yv3
194 y(6*i-2)= yv4
195 y(6*i-1)= yv5
196 y(6*i )= yv6
197 enddo
198 enddo
199 !$OMP END PARALLEL
200
201 !OCL END_CACHE_SUBSECTOR
202 !OCL END_CACHE_SECTOR_SIZE
203
204 !call stop_collection("loopInMatvec66")
205 !call fapp_stop("loopInMatvec66", 1, 0)
206
207 end_time = hecmw_wtime()
208 time_ax = time_ax + end_time - start_time
209
210 endif
211
212 if (hecmat%cmat%n_val > 0) then
213 call hecmw_cmat_multvec_add( hecmat%cmat, x, y, np * hecmat%NDOF )
214 end if
215
216 end subroutine hecmw_matvec_66
217
218 !C
219 !C***
220 !C*** hecmw_matresid_66
221 !C***
222 !C
223 subroutine hecmw_matresid_66 (hecMESH, hecMAT, X, B, R, COMMtime)
224 use hecmw_util
225 implicit none
226 type (hecmwst_local_mesh), intent(in) :: hecmesh
227 type (hecmwst_matrix), intent(in) :: hecmat
228 real(kind=kreal), intent(in) :: x(:), b(:)
229 real(kind=kreal), intent(out) :: r(:)
230 real(kind=kreal), intent(inout), optional :: commtime
231
232 integer(kind=kint) :: i
233 real(kind=kreal) :: tcomm
234
235 tcomm = 0.d0
236 call hecmw_matvec_66 (hecmesh, hecmat, x, r, tcomm)
237 if (present(commtime)) commtime = commtime + tcomm
238 do i = 1, hecmat%N * 6
239 r(i) = b(i) - r(i)
240 enddo
241
242 end subroutine hecmw_matresid_66
243
244 !C
245 !C***
246 !C*** hecmw_rel_resid_L2_66
247 !C***
248 !C
249 function hecmw_rel_resid_l2_66 (hecMESH, hecMAT, COMMtime)
250 use hecmw_util
252 implicit none
253 real(kind=kreal) :: hecmw_rel_resid_l2_66
254 type ( hecmwst_local_mesh ), intent(in) :: hecmesh
255 type ( hecmwst_matrix ), intent(in) :: hecmat
256 real(kind=kreal), intent(inout), optional :: commtime
257
258 real(kind=kreal), allocatable :: r(:)
259 real(kind=kreal) :: bnorm2, rnorm2
260 real(kind=kreal) :: tcomm
261
262 allocate(r(hecmat%NDOF*hecmat%NP))
263
264 tcomm = 0.d0
265 call hecmw_innerproduct_r(hecmesh, hecmat%NDOF, &
266 hecmat%B, hecmat%B, bnorm2, tcomm)
267 if (bnorm2 == 0.d0) then
268 bnorm2 = 1.d0
269 endif
270 call hecmw_matresid_66(hecmesh, hecmat, hecmat%X, hecmat%B, r, tcomm)
271 call hecmw_innerproduct_r(hecmesh, hecmat%NDOF, r, r, rnorm2, tcomm)
272 hecmw_rel_resid_l2_66 = sqrt(rnorm2 / bnorm2)
273
274 if (present(commtime)) commtime = commtime + tcomm
275
276 deallocate(r)
277 end function hecmw_rel_resid_l2_66
278
279 !C
280 !C***
281 !C*** hecmw_Tvec_66
282 !C***
283 !C
284 subroutine hecmw_tvec_66 (hecMESH, X, Y, COMMtime)
285 use hecmw_util
287 implicit none
288 type (hecmwst_local_mesh), intent(in) :: hecmesh
289 real(kind=kreal), intent(in) :: x(:)
290 real(kind=kreal), intent(out) :: y(:)
291 real(kind=kreal), intent(inout) :: commtime
292
293 real(kind=kreal) :: start_time, end_time
294 integer(kind=kint) :: i
295
296 start_time= hecmw_wtime()
297 call hecmw_update_6_r (hecmesh, x, hecmesh%n_node)
298 end_time= hecmw_wtime()
299 commtime = commtime + end_time - start_time
300
301 do i= 1, hecmesh%nn_internal * hecmesh%n_dof
302 y(i)= x(i)
303 enddo
304
305 ! do i= 1, hecMESH%mpc%n_mpc
306 ! k = hecMESH%mpc%mpc_index(i-1) + 1
307 ! kk = 3 * (hecMESH%mpc%mpc_item(k) - 1) + hecMESH%mpc%mpc_dof(k)
308 ! Y(kk) = 0.d0
309 ! do j= hecMESH%mpc%mpc_index(i-1) + 2, hecMESH%mpc%mpc_index(i)
310 ! jj = 3 * (hecMESH%mpc%mpc_item(j) - 1) + hecMESH%mpc%mpc_dof(j)
311 ! Y(kk) = Y(kk) - hecMESH%mpc%mpc_val(j) * X(jj)
312 ! enddo
313 ! enddo
314
315 end subroutine hecmw_tvec_66
316
317 !C
318 !C***
319 !C*** hecmw_Ttvec_66
320 !C***
321 !C
322 subroutine hecmw_ttvec_66 (hecMESH, X, Y, COMMtime)
323 use hecmw_util
325 implicit none
326 type (hecmwst_local_mesh), intent(in) :: hecmesh
327 real(kind=kreal), intent(in) :: x(:)
328 real(kind=kreal), intent(out) :: y(:)
329 real(kind=kreal), intent(inout) :: commtime
330
331 real(kind=kreal) :: start_time, end_time
332 integer(kind=kint) :: i
333
334 start_time= hecmw_wtime()
335 call hecmw_update_6_r (hecmesh, x, hecmesh%n_node)
336 end_time= hecmw_wtime()
337 commtime = commtime + end_time - start_time
338
339 do i= 1, hecmesh%nn_internal * hecmesh%n_dof
340 y(i)= x(i)
341 enddo
342
343 ! do i= 1, hecMESH%mpc%n_mpc
344 ! k = hecMESH%mpc%mpc_index(i-1) + 1
345 ! kk = 3 * (hecMESH%mpc%mpc_item(k) - 1) + hecMESH%mpc%mpc_dof(k)
346 ! Y(kk) = 0.d0
347 ! do j= hecMESH%mpc%mpc_index(i-1) + 2, hecMESH%mpc%mpc_index(i)
348 ! jj = 3 * (hecMESH%mpc%mpc_item(j) - 1) + hecMESH%mpc%mpc_dof(j)
349 ! Y(jj) = Y(jj) - hecMESH%mpc%mpc_val(j) * X(kk)
350 ! enddo
351 ! enddo
352
353 end subroutine hecmw_ttvec_66
354
355 !C
356 !C***
357 !C*** hecmw_TtmatTvec_66
358 !C***
359 !C
360 subroutine hecmw_ttmattvec_66 (hecMESH, hecMAT, X, Y, W, COMMtime)
361 use hecmw_util
362 implicit none
363 type (hecmwst_local_mesh), intent(in) :: hecmesh
364 type (hecmwst_matrix), intent(in) :: hecmat
365 real(kind=kreal), intent(in) :: x(:)
366 real(kind=kreal), intent(out) :: y(:), w(:)
367 real(kind=kreal), intent(inout) :: commtime
368
369 call hecmw_tvec_66(hecmesh, x, y, commtime)
370 call hecmw_matvec_66(hecmesh, hecmat, y, w, commtime)
371 call hecmw_ttvec_66(hecmesh, w, y, commtime)
372
373 end subroutine hecmw_ttmattvec_66
374
375
376end module hecmw_solver_las_66
Jagged Diagonal Matrix storage for vector processors. Original code was provided by JAMSTEC.
Definition: hecmw_jadm.f90:8
subroutine, public hecmw_jad_matvec(hecmesh, hecmat, x, y, commtime)
Definition: hecmw_jadm.f90:61
subroutine, public hecmw_cmat_multvec_add(cmat, x, y, len)
integer(kind=kint) function, public hecmw_mat_get_usejad(hecmat)
subroutine, public hecmw_matvec_66(hecmesh, hecmat, x, y, time_ax, commtime)
real(kind=kreal) function, public hecmw_rel_resid_l2_66(hecmesh, hecmat, commtime)
subroutine, public hecmw_matresid_66(hecmesh, hecmat, x, b, r, commtime)
subroutine, public hecmw_tvec_66(hecmesh, x, y, commtime)
subroutine, public hecmw_ttvec_66(hecmesh, x, y, commtime)
subroutine, public hecmw_ttmattvec_66(hecmesh, hecmat, x, y, w, commtime)
subroutine hecmw_innerproduct_r(hecmesh, ndof, x, y, sum, commtime)
subroutine, public hecmw_tuning_fx_calc_sector_cache(n, ndof, sectorcachesize0, sectorcachesize1)
I/O and Utility.
Definition: hecmw_util_f.F90:7
integer(kind=4), parameter kreal
real(kind=kreal) function hecmw_wtime()
subroutine hecmw_update_6_r(hecmesh, val, n)