FrontISTR  5.2.0
Large-scale structural analysis program with finit element method
hecmw_solver_las_22.f90
Go to the documentation of this file.
1 !-------------------------------------------------------------------------------
2 ! Copyright (c) 2019 FrontISTR Commons
3 ! This software is released under the MIT License, see LICENSE.txt
4 !-------------------------------------------------------------------------------
5 
7 
8  private
9 
10  public :: hecmw_matvec_22
13  public :: hecmw_matresid_22
14  public :: hecmw_rel_resid_l2_22
15  public :: hecmw_tvec_22
16  public :: hecmw_ttvec_22
17  public :: hecmw_ttmattvec_22
18  public :: hecmw_mat_diag_sr_22
19 
20  ! ! for communication hiding in matvec
21  ! integer(kind=kint), save, allocatable :: index_o(:), item_o(:)
22  ! real(kind=kreal), save, allocatable :: A_o(:)
23  logical, save :: async_matvec_flg = .false.
24 contains
25 
26  !C
27  !C***
28  !C*** hecmw_matvec_22
29  !C***
30  !C
31  subroutine hecmw_matvec_22 (hecMESH, hecMAT, X, Y, time_Ax, COMMtime)
32  use hecmw_util
34  implicit none
35  type (hecmwst_local_mesh), intent(in) :: hecmesh
36  type (hecmwst_matrix), intent(in), target :: hecmat
37  real(kind=kreal), intent(in) :: x(:)
38  real(kind=kreal), intent(out) :: y(:)
39  real(kind=kreal), intent(inout) :: time_ax
40  real(kind=kreal), intent(inout), optional :: commtime
41 
42  real(kind=kreal) :: tcomm
43  real(kind=kreal), allocatable :: wk(:)
44 
45  tcomm = 0.d0
46 
47  if (hecmw_mat_get_flag_mpcmatvec(hecmat) /= 0) then
48  allocate(wk(hecmat%NP * hecmat%NDOF))
49  call hecmw_ttmattvec_22(hecmesh, hecmat, x, y, wk, tcomm)
50  deallocate(wk)
51  else
52  call hecmw_matvec_22_inner(hecmesh, hecmat, x, y, time_ax, tcomm)
53  endif
54 
55  if (present(commtime)) commtime = commtime + tcomm
56  end subroutine hecmw_matvec_22
57 
58  subroutine hecmw_matvec_22_set_async (hecMAT)
59  use hecmw_util
60  implicit none
61  type (hecmwst_matrix), intent(in) :: hecmat
62  end subroutine hecmw_matvec_22_set_async
64  implicit none
65  end subroutine hecmw_matvec_22_unset_async
66 
67  !C
68  !C***
69  !C*** hecmw_matvec_22_inner ( private subroutine )
70  !C***
71  !C
72  subroutine hecmw_matvec_22_inner (hecMESH, hecMAT, X, Y, time_Ax, COMMtime)
73  use hecmw_util
74  use m_hecmw_comm_f
77  use hecmw_jad_type
78  use hecmw_tuning_fx
79  !$ use omp_lib
80 
81  implicit none
82  type (hecmwst_local_mesh), intent(in) :: hecmesh
83  type (hecmwst_matrix), intent(in), target :: hecmat
84  real(kind=kreal), intent(in) :: x(:)
85  real(kind=kreal), intent(out) :: y(:)
86  real(kind=kreal), intent(inout) :: time_ax
87  real(kind=kreal), intent(inout), optional :: commtime
88 
89  real(kind=kreal) :: start_time, end_time, tcomm
90  integer(kind=kint) :: i, j, js, je, in
91  real(kind=kreal) :: yv1, yv2, x1, x2
92 
93  integer(kind=kint) :: n, np
94  integer(kind=kint), pointer :: indexl(:), iteml(:), indexu(:), itemu(:)
95  real(kind=kreal), pointer :: al(:), au(:), d(:)
96 
97  ! added for turning >>>
98  integer, parameter :: numofblockperthread = 100
99  logical, save :: isfirst = .true.
100  integer, save :: numofthread = 1
101  integer, save, allocatable :: startpos(:), endpos(:)
102  integer(kind=kint), save :: sectorcachesize0, sectorcachesize1
103  integer(kind=kint) :: threadnum, blocknum, numofblock
104  integer(kind=kint) :: numofelement, elementcount, blockindex
105  real(kind=kreal) :: numofelementperblock
106  ! <<< added for turning
107 
108  if (hecmw_jad_is_initialized().ne.0) then
109  tcomm = 0.d0
110  start_time = hecmw_wtime()
111  call hecmw_jad_matvec(hecmesh, hecmat, x, y, tcomm)
112  end_time = hecmw_wtime()
113  time_ax = time_ax + end_time - start_time - tcomm
114  if (present(commtime)) commtime = commtime + tcomm
115  else
116 
117  n = hecmat%N
118  np = hecmat%NP
119  indexl => hecmat%indexL
120  indexu => hecmat%indexU
121  iteml => hecmat%itemL
122  itemu => hecmat%itemU
123  al => hecmat%AL
124  au => hecmat%AU
125  d => hecmat%D
126 
127  ! added for turning >>>
128  if (.not. isfirst) then
129  numofblock = numofthread * numofblockperthread
130  if (endpos(numofblock-1) .ne. n-1) then
131  deallocate(startpos, endpos)
132  isfirst = .true.
133  endif
134  endif
135  if (isfirst) then
136  !$ numOfThread = omp_get_max_threads()
137  numofblock = numofthread * numofblockperthread
138  allocate (startpos(0 : numofblock - 1), endpos(0 : numofblock - 1))
139  numofelement = n + indexl(n) + indexu(n)
140  numofelementperblock = dble(numofelement) / numofblock
141  blocknum = 0
142  elementcount = 0
143  startpos(blocknum) = 1
144  do i= 1, n
145  elementcount = elementcount + 1
146  elementcount = elementcount + (indexl(i) - indexl(i-1))
147  elementcount = elementcount + (indexu(i) - indexu(i-1))
148  if (elementcount > (blocknum + 1) * numofelementperblock) then
149  endpos(blocknum) = i
150  blocknum = blocknum + 1
151  startpos(blocknum) = i + 1
152  if (blocknum == (numofblock - 1)) exit
153  endif
154  enddo
155  endpos(blocknum) = n
156  ! for irregular data
157  do i= blocknum+1, numofblock-1
158  startpos(i) = n
159  endpos(i) = n-1
160  end do
161 
163  sectorcachesize0, sectorcachesize1)
164 
165  isfirst = .false.
166  endif
167  ! <<< added for turning
168 
169  start_time= hecmw_wtime()
170  call hecmw_update_2_r (hecmesh, x, np)
171  end_time= hecmw_wtime()
172  if (present(commtime)) commtime = commtime + end_time - start_time
173 
174  start_time = hecmw_wtime()
175 
176  !call fapp_start("loopInMatvec33", 1, 0)
177  !call start_collection("loopInMatvec33")
178 
179  !OCL CACHE_SECTOR_SIZE(sectorCacheSize0,sectorCacheSize1)
180  !OCL CACHE_SUBSECTOR_ASSIGN(X)
181 
182  !$OMP PARALLEL DEFAULT(NONE) &
183  !$OMP&PRIVATE(i,X1,X2,YV1,YV2,jS,jE,j,in,threadNum,blockNum,blockIndex) &
184  !$OMP&SHARED(D,AL,AU,indexL,itemL,indexU,itemU,X,Y,startPos,endPos,numOfThread,N,async_matvec_flg)
185  threadnum = 0
186  !$ threadNum = omp_get_thread_num()
187  do blocknum = 0 , numofblockperthread - 1
188  blockindex = blocknum * numofthread + threadnum
189  do i = startpos(blockindex), endpos(blockindex)
190  x1= x(2*i-1)
191  x2= x(2*i )
192  yv1= d(4*i-3)*x1 + d(4*i-2)*x2
193  yv2= d(4*i-1)*x1 + d(4*i )*x2
194 
195  js= indexl(i-1) + 1
196  je= indexl(i )
197  do j= js, je
198  in = iteml(j)
199  x1 = x(2*in-1)
200  x2 = x(2*in )
201  yv1= yv1 + al(4*j-3)*x1 + al(4*j-2)*x2
202  yv2= yv2 + al(4*j-1)*x1 + al(4*j )*x2
203  enddo
204  js= indexu(i-1) + 1
205  je= indexu(i )
206  do j= js, je
207  in = itemu(j)
208  x1 = x(2*in-1)
209  x2 = x(2*in )
210  yv1= yv1 + au(4*j-3)*x1 + au(4*j-2)*x2
211  yv2= yv2 + au(4*j-1)*x1 + au(4*j )*x2
212  enddo
213  y(2*i-1)= yv1
214  y(2*i )= yv2
215  enddo
216  enddo
217 
218  !$OMP END PARALLEL
219 
220  !OCL END_CACHE_SUBSECTOR
221  !OCL END_CACHE_SECTOR_SIZE
222 
223 
224  end_time = hecmw_wtime()
225  time_ax = time_ax + end_time - start_time
226 
227 
228  endif
229 
230  if (hecmat%cmat%n_val > 0) then
231  call hecmw_cmat_multvec_add( hecmat%cmat, x, y, np * hecmat%NDOF )
232  end if
233 
234  end subroutine hecmw_matvec_22_inner
235 
236 
237 
238 
239 
240 
241  !C
242  !C***
243  !C*** hecmw_matresid_22
244  !C***
245  !C
246  subroutine hecmw_matresid_22 (hecMESH, hecMAT, X, B, R, COMMtime)
247  use hecmw_util
248 
249  implicit none
250  real(kind=kreal) :: x(:), b(:), r(:)
251  type (hecmwst_matrix) :: hecmat
252  type (hecmwst_local_mesh) :: hecmesh
253  real(kind=kreal), optional :: commtime
254 
255  integer(kind=kint) :: i
256  real(kind=kreal) :: tcomm
257 
258  tcomm = 0.d0
259  call hecmw_matvec_22 (hecmesh, hecmat, x, r, tcomm)
260  if (present(commtime)) commtime = commtime + tcomm
261  !$omp parallel default(none),private(i),shared(hecMAT,R,B)
262  !$omp do
263  do i = 1, hecmat%N * 2
264  r(i) = b(i) - r(i)
265  enddo
266  !$omp end do
267  !$omp end parallel
268  end subroutine hecmw_matresid_22
269 
270  !C
271  !C***
272  !C*** hecmw_rel_resid_L2_22
273  !C***
274  !C
275  function hecmw_rel_resid_l2_22 (hecMESH, hecMAT, COMMtime)
276  use hecmw_util
278 
279  implicit none
280  real(kind=kreal) :: hecmw_rel_resid_l2_22
281  type ( hecmwst_local_mesh ), intent(in) :: hecmesh
282  type ( hecmwst_matrix ), intent(in) :: hecmat
283  real(kind=kreal), optional :: commtime
284 
285  real(kind=kreal) :: r(hecmat%NDOF*hecmat%NP)
286  real(kind=kreal) :: bnorm2, rnorm2
287  real(kind=kreal) :: tcomm
288 
289  tcomm = 0.d0
290  call hecmw_innerproduct_r(hecmesh, hecmat%NDOF, hecmat%B, hecmat%B, bnorm2, tcomm)
291  if (bnorm2 == 0.d0) then
292  bnorm2 = 1.d0
293  endif
294  call hecmw_matresid_22(hecmesh, hecmat, hecmat%X, hecmat%B, r, tcomm)
295  call hecmw_innerproduct_r(hecmesh, hecmat%NDOF, r, r, rnorm2, tcomm)
296  if (present(commtime)) commtime = commtime + tcomm
297 
298  hecmw_rel_resid_l2_22 = sqrt(rnorm2 / bnorm2)
299 
300  end function hecmw_rel_resid_l2_22
301  !C
302  !C***
303  !C*** hecmw_Tvec_22
304  !C***
305  !C
306  subroutine hecmw_tvec_22 (hecMESH, X, Y, COMMtime)
307  use hecmw_util
308  use m_hecmw_comm_f
309  implicit none
310  type (hecmwst_local_mesh), intent(in) :: hecmesh
311  real(kind=kreal), intent(in) :: x(:)
312  real(kind=kreal), intent(out) :: y(:)
313  real(kind=kreal), intent(inout) :: commtime
314 
315  real(kind=kreal) :: start_time, end_time
316  integer(kind=kint) :: i, j, jj, k, kk
317 
318  start_time= hecmw_wtime()
319  call hecmw_update_3_r (hecmesh, x, hecmesh%n_node)
320  end_time= hecmw_wtime()
321  commtime = commtime + end_time - start_time
322 
323  !$omp parallel default(none),private(i,k,kk,j,jj),shared(hecMESH,X,Y)
324  !$omp do
325  do i= 1, hecmesh%nn_internal * hecmesh%n_dof
326  y(i)= x(i)
327  enddo
328  !$omp end do
329 
330  !$omp do
331  outer: do i= 1, hecmesh%mpc%n_mpc
332  do j= hecmesh%mpc%mpc_index(i-1) + 1, hecmesh%mpc%mpc_index(i)
333  if (hecmesh%mpc%mpc_dof(j) > 2) cycle outer
334  enddo
335  k = hecmesh%mpc%mpc_index(i-1) + 1
336  kk = 2 * (hecmesh%mpc%mpc_item(k) - 1) + hecmesh%mpc%mpc_dof(k)
337  y(kk) = 0.d0
338  do j= hecmesh%mpc%mpc_index(i-1) + 2, hecmesh%mpc%mpc_index(i)
339  jj = 2 * (hecmesh%mpc%mpc_item(j) - 1) + hecmesh%mpc%mpc_dof(j)
340  y(kk) = y(kk) - hecmesh%mpc%mpc_val(j) * x(jj)
341  enddo
342  enddo outer
343  !$omp end do
344  !$omp end parallel
345 
346  end subroutine hecmw_tvec_22
347 
348  !C
349  !C***
350  !C*** hecmw_Ttvec_22
351  !C***
352  !C
353  subroutine hecmw_ttvec_22 (hecMESH, X, Y, COMMtime)
354  use hecmw_util
355  use m_hecmw_comm_f
356  implicit none
357  type (hecmwst_local_mesh), intent(in) :: hecmesh
358  real(kind=kreal), intent(in) :: x(:)
359  real(kind=kreal), intent(out) :: y(:)
360  real(kind=kreal), intent(inout) :: commtime
361 
362  real(kind=kreal) :: start_time, end_time
363  integer(kind=kint) :: i, j, jj, k, kk
364 
365  start_time= hecmw_wtime()
366  call hecmw_update_2_r (hecmesh, x, hecmesh%n_node)
367  end_time= hecmw_wtime()
368  commtime = commtime + end_time - start_time
369 
370  !$omp parallel default(none),private(i,k,kk,j,jj),shared(hecMESH,X,Y)
371  !$omp do
372  do i= 1, hecmesh%nn_internal * hecmesh%n_dof
373  y(i)= x(i)
374  enddo
375  !$omp end do
376 
377  !$omp do
378  outer: do i= 1, hecmesh%mpc%n_mpc
379  do j= hecmesh%mpc%mpc_index(i-1) + 1, hecmesh%mpc%mpc_index(i)
380  if (hecmesh%mpc%mpc_dof(j) > 2) cycle outer
381  enddo
382  k = hecmesh%mpc%mpc_index(i-1) + 1
383  kk = 2 * (hecmesh%mpc%mpc_item(k) - 1) + hecmesh%mpc%mpc_dof(k)
384  y(kk) = 0.d0
385  do j= hecmesh%mpc%mpc_index(i-1) + 2, hecmesh%mpc%mpc_index(i)
386  jj = 2 * (hecmesh%mpc%mpc_item(j) - 1) + hecmesh%mpc%mpc_dof(j)
387  y(jj) = y(jj) - hecmesh%mpc%mpc_val(j) * x(kk)
388  enddo
389  enddo outer
390  !$omp end do
391  !$omp end parallel
392 
393  end subroutine hecmw_ttvec_22
394 
395  !C
396  !C***
397  !C*** hecmw_TtmatTvec_22
398  !C***
399  !C
400  subroutine hecmw_ttmattvec_22 (hecMESH, hecMAT, X, Y, W, COMMtime)
401  use hecmw_util
402  implicit none
403  type (hecmwst_local_mesh), intent(in) :: hecmesh
404  type (hecmwst_matrix), intent(in) :: hecmat
405  real(kind=kreal), intent(in) :: x(:)
406  real(kind=kreal), intent(out) :: y(:), w(:)
407  real(kind=kreal), intent(inout) :: commtime
408 
409  call hecmw_tvec_22(hecmesh, x, y, commtime)
410  call hecmw_matvec_22_inner(hecmesh, hecmat, y, w, commtime)
411  call hecmw_ttvec_22(hecmesh, w, y, commtime)
412 
413  end subroutine hecmw_ttmattvec_22
414 
415 
416  !C
417  !C***
418  !C*** hecmw_mat_diag_sr_22
419  !C***
420  !C
421  subroutine hecmw_mat_diag_sr_22(hecMESH, hecMAT, COMMtime)
422  use hecmw_util
423  use m_hecmw_comm_f
424  implicit none
425  type (hecmwst_local_mesh), intent(in) :: hecmesh
426  type (hecmwst_matrix), intent(inout), target :: hecmat
427  real(kind=kreal), intent(inout), optional :: commtime
428  real(kind=kreal), allocatable :: w(:,:)
429  real(kind=kreal), pointer :: d(:)
430  integer(kind=kint) :: ip
431  real(kind=kreal) :: start_time, end_time
432  allocate(w(2*hecmat%NP,2))
433  d => hecmat%D
434  do ip= 1, hecmat%N
435  w(2*ip-1,1)= d(4*ip-3); w(2*ip-1,2)= d(4*ip-2);
436  w(2*ip-0,1)= d(4*ip-1); w(2*ip-0,2)= d(4*ip-0);
437  enddo
438  start_time= hecmw_wtime()
439  call hecmw_update_2_r (hecmesh, w(:,1), hecmat%NP)
440  call hecmw_update_2_r (hecmesh, w(:,2), hecmat%NP)
441  end_time= hecmw_wtime()
442  if (present(commtime)) commtime = commtime + end_time - start_time
443  do ip= hecmat%N+1, hecmat%NP
444  d(4*ip-3)= w(2*ip-1,1); d(4*ip-2)= w(2*ip-1,2);
445  d(4*ip-1)= w(2*ip-0,1); d(4*ip-0)= w(2*ip-0,2);
446  enddo
447  deallocate(w)
448  end subroutine hecmw_mat_diag_sr_22
449 end module hecmw_solver_las_22
Jagged Diagonal Matrix storage for vector processors. Original code was provided by JAMSTEC.
Definition: hecmw_jadm.f90:8
subroutine, public hecmw_jad_matvec(hecMESH, hecMAT, X, Y, COMMtime)
Definition: hecmw_jadm.f90:61
integer(kind=kint) function, public hecmw_jad_is_initialized()
Definition: hecmw_jadm.f90:56
subroutine, public hecmw_cmat_multvec_add(cmat, X, Y, len)
integer(kind=kint) function, public hecmw_mat_get_flag_mpcmatvec(hecMAT)
subroutine, public hecmw_matresid_22(hecMESH, hecMAT, X, B, R, COMMtime)
subroutine, public hecmw_tvec_22(hecMESH, X, Y, COMMtime)
subroutine, public hecmw_mat_diag_sr_22(hecMESH, hecMAT, COMMtime)
subroutine, public hecmw_matvec_22(hecMESH, hecMAT, X, Y, time_Ax, COMMtime)
subroutine, public hecmw_ttvec_22(hecMESH, X, Y, COMMtime)
real(kind=kreal) function, public hecmw_rel_resid_l2_22(hecMESH, hecMAT, COMMtime)
subroutine, public hecmw_matvec_22_set_async(hecMAT)
subroutine, public hecmw_ttmattvec_22(hecMESH, hecMAT, X, Y, W, COMMtime)
subroutine, public hecmw_matvec_22_unset_async
subroutine hecmw_innerproduct_r(hecMESH, ndof, X, Y, sum, COMMtime)
subroutine, public hecmw_tuning_fx_calc_sector_cache(N, NDOF, sectorCacheSize0, sectorCacheSize1)
I/O and Utility.
Definition: hecmw_util_f.F90:7
integer(kind=4), parameter kreal
real(kind=kreal) function hecmw_wtime()
subroutine hecmw_update_2_r(hecMESH, val, n)
subroutine hecmw_update_3_r(hecMESH, val, n)