FrontISTR  5.2.0
Large-scale structural analysis program with finit element method
hecmw_solver_las_44.f90
Go to the documentation of this file.
1 !-------------------------------------------------------------------------------
2 ! Copyright (c) 2019 FrontISTR Commons
3 ! This software is released under the MIT License, see LICENSE.txt
4 !-------------------------------------------------------------------------------
5 
7  use hecmw_util
8  implicit none
9 
10  private
11 
12  public :: hecmw_matvec_44
15  public :: hecmw_matresid_44
16  public :: hecmw_rel_resid_l2_44
17  public :: hecmw_tvec_44
18  public :: hecmw_ttvec_44
19  public :: hecmw_ttmattvec_44
20  public :: hecmw_mat_diag_sr_44
21 
22  ! ! for communication hiding in matvec
23  ! integer(kind=kint), save, allocatable :: index_o(:), item_o(:)
24  ! real(kind=kreal), save, allocatable :: A_o(:)
25  logical, save :: async_matvec_flg = .false.
26 
27 contains
28 
29  !C
30  !C***
31  !C*** hecmw_matvec_44
32  !C***
33  !C
34  subroutine hecmw_matvec_44 (hecMESH, hecMAT, X, Y, time_Ax, COMMtime)
35  use hecmw_util
37  implicit none
38  type (hecmwst_local_mesh), intent(in) :: hecmesh
39  type (hecmwst_matrix), intent(in), target :: hecmat
40  real(kind=kreal), intent(in) :: x(:)
41  real(kind=kreal), intent(out) :: y(:)
42  real(kind=kreal), intent(inout), optional :: time_ax
43  real(kind=kreal), intent(inout), optional :: commtime
44 
45  real(kind=kreal) :: tcomm
46  real(kind=kreal), allocatable :: wk(:)
47 
48  tcomm = 0.d0
49 
50  if (hecmw_mat_get_flag_mpcmatvec(hecmat) /= 0) then
51  allocate(wk(hecmat%NP * hecmat%NDOF))
52  call hecmw_ttmattvec_44(hecmesh, hecmat, x, y, wk, tcomm)
53  deallocate(wk)
54  else
55  call hecmw_matvec_44_inner(hecmesh, hecmat, x, y, time_ax, tcomm)
56  endif
57 
58  if (present(commtime)) commtime = commtime + tcomm
59  end subroutine hecmw_matvec_44
60 
61  !C
62  !C***
63  !C*** hecmw_matvec_44_set_async
64  !C***
65  !C
66  subroutine hecmw_matvec_44_set_async (hecMAT)
67  use hecmw_util
68  implicit none
69  type (hecmwst_matrix), intent(in) :: hecmat
70 
71  end subroutine hecmw_matvec_44_set_async
72 
73  !C
74  !C***
75  !C*** hecmw_matvec_44_unset_async
76  !C***
77  !C
79  implicit none
80 
81  end subroutine hecmw_matvec_44_unset_async
82 
83  !C
84  !C***
85  !C*** hecmw_matvec_44_inner ( private subroutine )
86  !C***
87  !C
88  subroutine hecmw_matvec_44_inner (hecMESH, hecMAT, X, Y, time_Ax, COMMtime)
89  use hecmw_util
90  use m_hecmw_comm_f
93  use hecmw_jad_type
94  use hecmw_tuning_fx
95  !$ use omp_lib
96 
97  implicit none
98  type (hecmwst_local_mesh), intent(in) :: hecmesh
99  type (hecmwst_matrix), intent(in), target :: hecmat
100  real(kind=kreal), intent(in) :: x(:)
101  real(kind=kreal), intent(out) :: y(:)
102  real(kind=kreal), intent(inout) :: time_ax
103  real(kind=kreal), intent(inout), optional :: commtime
104 
105  real(kind=kreal) :: start_time, end_time, tcomm
106  integer(kind=kint) :: i, j, js, je, in
107  real(kind=kreal) :: yv1, yv2, yv3, yv4, x1, x2, x3, x4
108 
109  integer(kind=kint) :: n, np
110  integer(kind=kint), pointer :: indexl(:), iteml(:), indexu(:), itemu(:)
111  real(kind=kreal), pointer :: al(:), au(:), d(:)
112 
113  ! added for turning >>>
114  integer, parameter :: numofblockperthread = 100
115  logical, save :: isfirst = .true.
116  integer, save :: numofthread = 1
117  integer, save, allocatable :: startpos(:), endpos(:)
118  integer(kind=kint), save :: sectorcachesize0, sectorcachesize1
119  integer(kind=kint) :: threadnum, blocknum, numofblock
120  integer(kind=kint) :: numofelement, elementcount, blockindex
121  real(kind=kreal) :: numofelementperblock
122  ! <<< added for turning
123 
124  if (hecmw_jad_is_initialized().ne.0) then
125  tcomm = 0.d0
126  start_time = hecmw_wtime()
127  call hecmw_jad_matvec(hecmesh, hecmat, x, y, tcomm)
128  end_time = hecmw_wtime()
129  time_ax = time_ax + end_time - start_time - tcomm
130  if (present(commtime)) commtime = commtime + tcomm
131  else
132 
133  n = hecmat%N
134  np = hecmat%NP
135  indexl => hecmat%indexL
136  indexu => hecmat%indexU
137  iteml => hecmat%itemL
138  itemu => hecmat%itemU
139  al => hecmat%AL
140  au => hecmat%AU
141  d => hecmat%D
142 
143  ! added for turning >>>
144  if (.not. isfirst) then
145  numofblock = numofthread * numofblockperthread
146  if (endpos(numofblock-1) .ne. n-1) then
147  deallocate(startpos, endpos)
148  isfirst = .true.
149  endif
150  endif
151  if (isfirst) then
152  !$ numOfThread = omp_get_max_threads()
153  numofblock = numofthread * numofblockperthread
154  allocate (startpos(0 : numofblock - 1), endpos(0 : numofblock - 1))
155  numofelement = n + indexl(n) + indexu(n)
156  numofelementperblock = dble(numofelement) / numofblock
157  blocknum = 0
158  elementcount = 0
159  startpos(blocknum) = 1
160  do i= 1, n
161  elementcount = elementcount + 1
162  elementcount = elementcount + (indexl(i) - indexl(i-1))
163  elementcount = elementcount + (indexu(i) - indexu(i-1))
164  if (elementcount > (blocknum + 1) * numofelementperblock) then
165  endpos(blocknum) = i
166  ! write(9000+hecMESH%my_rank,*) mod(blockNum, numOfThread), &
167  ! startPos(blockNum), endPos(blockNum)
168  blocknum = blocknum + 1
169  startpos(blocknum) = i + 1
170  if (blocknum == (numofblock - 1)) exit
171  endif
172  enddo
173  endpos(blocknum) = n
174  ! write(9000+hecMESH%my_rank,*) mod(blockNum, numOfThread), &
175  ! startPos(blockNum), endPos(blockNum)
176  ! for irregular data
177  do i= blocknum+1, numofblock-1
178  startpos(i) = n
179  endpos(i) = n-1
180  ! write(9000+hecMESH%my_rank,*) mod(i, numOfThread), &
181  ! startPos(i), endPos(i)
182  end do
183 
185  sectorcachesize0, sectorcachesize1)
186 
187  isfirst = .false.
188  endif
189  ! <<< added for turning
190 
191  start_time= hecmw_wtime()
192 
193  call hecmw_update_4_r (hecmesh, x, np)
194 
195  ! endif
196  end_time= hecmw_wtime()
197  if (present(commtime)) commtime = commtime + end_time - start_time
198 
199  start_time = hecmw_wtime()
200 
201  !call fapp_start("loopInMatvec44", 1, 0)
202  !call start_collection("loopInMatvec44")
203 
204  !OCL CACHE_SECTOR_SIZE(sectorCacheSize0,sectorCacheSize1)
205  !OCL CACHE_SUBSECTOR_ASSIGN(X)
206 
207  !$OMP PARALLEL DEFAULT(NONE) &
208  !$OMP&PRIVATE(i,X1,X2,X3,X4,YV1,YV2,YV3,YV4,jS,jE,j,in,threadNum,blockNum,blockIndex) &
209  !$OMP&SHARED(D,AL,AU,indexL,itemL,indexU,itemU,X,Y,startPos,endPos,numOfThread,N,async_matvec_flg)
210  threadnum = 0
211  !$ threadNum = omp_get_thread_num()
212  do blocknum = 0 , numofblockperthread - 1
213  blockindex = blocknum * numofthread + threadnum
214  do i = startpos(blockindex), endpos(blockindex)
215  x1= x(4*i-3)
216  x2= x(4*i-2)
217  x3= x(4*i-1)
218  x4= x(4*i )
219  yv1= d(16*i-15)*x1 + d(16*i-14)*x2 + d(16*i-13)*x3 + d(16*i-12)*x4
220  yv2= d(16*i-11)*x1 + d(16*i-10)*x2 + d(16*i- 9)*x3 + d(16*i- 8)*x4
221  yv3= d(16*i- 7)*x1 + d(16*i- 6)*x2 + d(16*i- 5)*x3 + d(16*i- 4)*x4
222  yv4= d(16*i- 3)*x1 + d(16*i- 2)*x2 + d(16*i- 1)*x3 + d(16*i )*x4
223 
224  js= indexl(i-1) + 1
225  je= indexl(i )
226  do j= js, je
227  in = iteml(j)
228  x1= x(4*in-3)
229  x2= x(4*in-2)
230  x3= x(4*in-1)
231  x4= x(4*in )
232  yv1= yv1 + al(16*j-15)*x1 + al(16*j-14)*x2 + al(16*j-13)*x3 + al(16*j-12)*x4
233  yv2= yv2 + al(16*j-11)*x1 + al(16*j-10)*x2 + al(16*j- 9)*x3 + al(16*j- 8)*x4
234  yv3= yv3 + al(16*j- 7)*x1 + al(16*j- 6)*x2 + al(16*j- 5)*x3 + al(16*j- 4)*x4
235  yv4= yv4 + al(16*j- 3)*x1 + al(16*j- 2)*x2 + al(16*j- 1)*x3 + al(16*j )*x4
236  enddo
237  js= indexu(i-1) + 1
238  je= indexu(i )
239  do j= js, je
240  in = itemu(j)
241  ! if (async_matvec_flg .and. in > N) cycle
242  x1= x(4*in-3)
243  x2= x(4*in-2)
244  x3= x(4*in-1)
245  x4= x(4*in )
246  yv1= yv1 + au(16*j-15)*x1 + au(16*j-14)*x2 + au(16*j-13)*x3 + au(16*j-12)*x4
247  yv2= yv2 + au(16*j-11)*x1 + au(16*j-10)*x2 + au(16*j- 9)*x3 + au(16*j- 8)*x4
248  yv3= yv3 + au(16*j- 7)*x1 + au(16*j- 6)*x2 + au(16*j- 5)*x3 + au(16*j- 4)*x4
249  yv4= yv4 + au(16*j- 3)*x1 + au(16*j- 2)*x2 + au(16*j- 1)*x3 + au(16*j )*x4
250  enddo
251  y(4*i-3)= yv1
252  y(4*i-2)= yv2
253  y(4*i-1)= yv3
254  y(4*i )= yv4
255  enddo
256  enddo
257  !$OMP END PARALLEL
258 
259  !OCL END_CACHE_SUBSECTOR
260  !OCL END_CACHE_SECTOR_SIZE
261 
262  !call stop_collection("loopInMatvec44")
263  !call fapp_stop("loopInMatvec44", 1, 0)
264 
265  end_time = hecmw_wtime()
266  time_ax = time_ax + end_time - start_time
267 
268  endif
269 
270  if (hecmat%cmat%n_val > 0) then
271  call hecmw_cmat_multvec_add( hecmat%cmat, x, y, np * hecmat%NDOF )
272  end if
273 
274  end subroutine hecmw_matvec_44_inner
275 
276  !C
277  !C***
278  !C*** hecmw_matresid_44
279  !C***
280  !C
281  subroutine hecmw_matresid_44 (hecMESH, hecMAT, X, B, R, COMMtime)
282  use hecmw_util
283  implicit none
284  type (hecmwst_local_mesh), intent(in) :: hecmesh
285  type (hecmwst_matrix), intent(in) :: hecmat
286  real(kind=kreal), intent(in) :: x(:), b(:)
287  real(kind=kreal), intent(out) :: r(:)
288  real(kind=kreal), intent(inout), optional :: commtime
289 
290  integer(kind=kint) :: i
291  real(kind=kreal) :: tcomm
292 
293  tcomm = 0.d0
294  call hecmw_matvec_44 (hecmesh, hecmat, x, r, tcomm)
295  if (present(commtime)) commtime = commtime + tcomm
296  !$omp parallel default(none),private(i),shared(hecMAT,R,B)
297  !$omp do
298  do i = 1, hecmat%N * 4
299  r(i) = b(i) - r(i)
300  enddo
301  !$omp end do
302  !$omp end parallel
303  end subroutine hecmw_matresid_44
304 
305  !C
306  !C***
307  !C*** hecmw_rel_resid_L2_44
308  !C***
309  !C
310  function hecmw_rel_resid_l2_44 (hecMESH, hecMAT, COMMtime)
311  use hecmw_util
313  implicit none
314  real(kind=kreal) :: hecmw_rel_resid_l2_44
315  type ( hecmwst_local_mesh ), intent(in) :: hecmesh
316  type ( hecmwst_matrix ), intent(in) :: hecmat
317  real(kind=kreal), intent(inout), optional :: commtime
318 
319  real(kind=kreal), allocatable :: r(:)
320  real(kind=kreal) :: bnorm2, rnorm2
321  real(kind=kreal) :: tcomm
322 
323  allocate(r(hecmat%NDOF*hecmat%NP))
324 
325  tcomm = 0.d0
326  call hecmw_innerproduct_r(hecmesh, hecmat%NDOF, &
327  hecmat%B, hecmat%B, bnorm2, tcomm)
328  if (bnorm2 == 0.d0) then
329  bnorm2 = 1.d0
330  endif
331  call hecmw_matresid_44(hecmesh, hecmat, hecmat%X, hecmat%B, r, tcomm)
332  call hecmw_innerproduct_r(hecmesh, hecmat%NDOF, r, r, rnorm2, tcomm)
333  hecmw_rel_resid_l2_44 = sqrt(rnorm2 / bnorm2)
334 
335  if (present(commtime)) commtime = commtime + tcomm
336 
337  deallocate(r)
338  end function hecmw_rel_resid_l2_44
339 
340  !C
341  !C***
342  !C*** hecmw_Tvec_44
343  !C***
344  !C
345  subroutine hecmw_tvec_44 (hecMESH, X, Y, COMMtime)
346  use hecmw_util
347  use m_hecmw_comm_f
348  implicit none
349  type (hecmwst_local_mesh), intent(in) :: hecmesh
350  real(kind=kreal), intent(in) :: x(:)
351  real(kind=kreal), intent(out) :: y(:)
352  real(kind=kreal), intent(inout) :: commtime
353 
354  real(kind=kreal) :: start_time, end_time
355  integer(kind=kint) :: i, j, jj, k, kk
356 
357  start_time= hecmw_wtime()
358  call hecmw_update_4_r (hecmesh, x, hecmesh%n_node)
359  end_time= hecmw_wtime()
360  commtime = commtime + end_time - start_time
361 
362  !$omp parallel default(none),private(i,k,kk,j,jj),shared(hecMESH,X,Y)
363  !$omp do
364  do i= 1, hecmesh%nn_internal * hecmesh%n_dof
365  y(i)= x(i)
366  enddo
367  !$omp end do
368 
369  !$omp do
370  outer: do i= 1, hecmesh%mpc%n_mpc
371  do j= hecmesh%mpc%mpc_index(i-1) + 1, hecmesh%mpc%mpc_index(i)
372  if (hecmesh%mpc%mpc_dof(j) > 4) cycle outer
373  enddo
374  k = hecmesh%mpc%mpc_index(i-1) + 1
375  kk = 4 * (hecmesh%mpc%mpc_item(k) - 1) + hecmesh%mpc%mpc_dof(k)
376  y(kk) = 0.d0
377  do j= hecmesh%mpc%mpc_index(i-1) + 2, hecmesh%mpc%mpc_index(i)
378  jj = 4 * (hecmesh%mpc%mpc_item(j) - 1) + hecmesh%mpc%mpc_dof(j)
379  y(kk) = y(kk) - hecmesh%mpc%mpc_val(j) * x(jj)
380  enddo
381  enddo outer
382  !$omp end do
383  !$omp end parallel
384 
385  end subroutine hecmw_tvec_44
386 
387  !C
388  !C***
389  !C*** hecmw_Ttvec_44
390  !C***
391  !C
392  subroutine hecmw_ttvec_44 (hecMESH, X, Y, COMMtime)
393  use hecmw_util
394  use m_hecmw_comm_f
395  implicit none
396  type (hecmwst_local_mesh), intent(in) :: hecmesh
397  real(kind=kreal), intent(in) :: x(:)
398  real(kind=kreal), intent(out) :: y(:)
399  real(kind=kreal), intent(inout) :: commtime
400 
401  real(kind=kreal) :: start_time, end_time
402  integer(kind=kint) :: i, j, jj, k, kk
403 
404  start_time= hecmw_wtime()
405  call hecmw_update_4_r (hecmesh, x, hecmesh%n_node)
406  end_time= hecmw_wtime()
407  commtime = commtime + end_time - start_time
408 
409  !$omp parallel default(none),private(i,k,kk,j,jj),shared(hecMESH,X,Y)
410  !$omp do
411  do i= 1, hecmesh%nn_internal * hecmesh%n_dof
412  y(i)= x(i)
413  enddo
414  !$omp end do
415 
416  !$omp do
417  outer: do i= 1, hecmesh%mpc%n_mpc
418  do j= hecmesh%mpc%mpc_index(i-1) + 1, hecmesh%mpc%mpc_index(i)
419  if (hecmesh%mpc%mpc_dof(j) > 4) cycle outer
420  enddo
421  k = hecmesh%mpc%mpc_index(i-1) + 1
422  kk = 4 * (hecmesh%mpc%mpc_item(k) - 1) + hecmesh%mpc%mpc_dof(k)
423  y(kk) = 0.d0
424  do j= hecmesh%mpc%mpc_index(i-1) + 2, hecmesh%mpc%mpc_index(i)
425  jj = 4 * (hecmesh%mpc%mpc_item(j) - 1) + hecmesh%mpc%mpc_dof(j)
426  y(jj) = y(jj) - hecmesh%mpc%mpc_val(j) * x(kk)
427  enddo
428  enddo outer
429  !$omp end do
430  !$omp end parallel
431 
432  end subroutine hecmw_ttvec_44
433 
434  !C
435  !C***
436  !C*** hecmw_TtmatTvec_44
437  !C***
438  !C
439  subroutine hecmw_ttmattvec_44 (hecMESH, hecMAT, X, Y, W, COMMtime)
440  use hecmw_util
441  implicit none
442  type (hecmwst_local_mesh), intent(in) :: hecmesh
443  type (hecmwst_matrix), intent(in) :: hecmat
444  real(kind=kreal), intent(in) :: x(:)
445  real(kind=kreal), intent(out) :: y(:), w(:)
446  real(kind=kreal), intent(inout) :: commtime
447 
448  call hecmw_tvec_44(hecmesh, x, y, commtime)
449  call hecmw_matvec_44_inner(hecmesh, hecmat, y, w, commtime)
450  call hecmw_ttvec_44(hecmesh, w, y, commtime)
451 
452  end subroutine hecmw_ttmattvec_44
453 
454 
455  !C
456  !C***
457  !C*** hecmw_mat_diag_sr_44
458  !C***
459  !C
460  subroutine hecmw_mat_diag_sr_44(hecMESH, hecMAT, COMMtime)
461  use hecmw_util
462  use m_hecmw_comm_f
463  implicit none
464  type (hecmwst_local_mesh), intent(in) :: hecmesh
465  type (hecmwst_matrix), intent(inout), target :: hecmat
466  real(kind=kreal), intent(inout), optional :: commtime
467  real(kind=kreal), allocatable :: w(:,:)
468  real(kind=kreal), pointer :: d(:)
469  integer(kind=kint) :: ip
470  real(kind=kreal) :: start_time, end_time
471  allocate(w(4*hecmat%NP,4))
472  d => hecmat%D
473  do ip= 1, hecmat%N
474  w(4*ip-3,1)= d(16*ip-15); w(4*ip-3,2)= d(16*ip-14); w(4*ip-3,3)= d(16*ip-13); w(4*ip-3,4)= d(16*ip-12)
475  w(4*ip-2,1)= d(16*ip-11); w(4*ip-2,2)= d(16*ip-10); w(4*ip-2,3)= d(16*ip- 9); w(4*ip-2,4)= d(16*ip- 8)
476  w(4*ip-1,1)= d(16*ip- 7); w(4*ip-1,2)= d(16*ip- 6); w(4*ip-1,3)= d(16*ip- 5); w(4*ip-1,4)= d(16*ip- 4)
477  w(4*ip ,1)= d(16*ip- 3); w(4*ip ,2)= d(16*ip- 2); w(4*ip ,3)= d(16*ip- 1); w(4*ip ,4)= d(16*ip )
478  enddo
479  start_time= hecmw_wtime()
480  call hecmw_update_4_r (hecmesh, w(:,1), hecmat%NP)
481  call hecmw_update_4_r (hecmesh, w(:,2), hecmat%NP)
482  call hecmw_update_4_r (hecmesh, w(:,3), hecmat%NP)
483  call hecmw_update_4_r (hecmesh, w(:,4), hecmat%NP)
484  end_time= hecmw_wtime()
485  if (present(commtime)) commtime = commtime + end_time - start_time
486  do ip= hecmat%N+1, hecmat%NP
487  d(16*ip-15)= w(4*ip-3,1); d(16*ip-14)= w(4*ip-3,2); d(16*ip-13)= w(4*ip-3,3); d(16*ip-12)= w(4*ip-3,4)
488  d(16*ip-11)= w(4*ip-2,1); d(16*ip-10)= w(4*ip-2,2); d(16*ip- 9)= w(4*ip-2,3); d(16*ip- 8)= w(4*ip-2,4)
489  d(16*ip- 7)= w(4*ip-1,1); d(16*ip- 6)= w(4*ip-1,2); d(16*ip- 5)= w(4*ip-1,3); d(16*ip- 4)= w(4*ip-1,4)
490  d(16*ip- 3)= w(4*ip ,1); d(16*ip- 2)= w(4*ip ,2); d(16*ip- 1)= w(4*ip ,3); d(16*ip )= w(4*ip ,4)
491  enddo
492  deallocate(w)
493  end subroutine hecmw_mat_diag_sr_44
494 
495 end module hecmw_solver_las_44
Jagged Diagonal Matrix storage for vector processors. Original code was provided by JAMSTEC.
Definition: hecmw_jadm.f90:8
subroutine, public hecmw_jad_matvec(hecMESH, hecMAT, X, Y, COMMtime)
Definition: hecmw_jadm.f90:61
integer(kind=kint) function, public hecmw_jad_is_initialized()
Definition: hecmw_jadm.f90:56
subroutine, public hecmw_cmat_multvec_add(cmat, X, Y, len)
integer(kind=kint) function, public hecmw_mat_get_flag_mpcmatvec(hecMAT)
subroutine, public hecmw_ttmattvec_44(hecMESH, hecMAT, X, Y, W, COMMtime)
subroutine, public hecmw_mat_diag_sr_44(hecMESH, hecMAT, COMMtime)
subroutine, public hecmw_matvec_44(hecMESH, hecMAT, X, Y, time_Ax, COMMtime)
subroutine, public hecmw_ttvec_44(hecMESH, X, Y, COMMtime)
subroutine, public hecmw_matvec_44_unset_async
subroutine, public hecmw_matvec_44_set_async(hecMAT)
subroutine, public hecmw_tvec_44(hecMESH, X, Y, COMMtime)
subroutine, public hecmw_matresid_44(hecMESH, hecMAT, X, B, R, COMMtime)
real(kind=kreal) function, public hecmw_rel_resid_l2_44(hecMESH, hecMAT, COMMtime)
subroutine hecmw_innerproduct_r(hecMESH, ndof, X, Y, sum, COMMtime)
subroutine, public hecmw_tuning_fx_calc_sector_cache(N, NDOF, sectorCacheSize0, sectorCacheSize1)
I/O and Utility.
Definition: hecmw_util_f.F90:7
integer(kind=4), parameter kreal
real(kind=kreal) function hecmw_wtime()
subroutine hecmw_update_4_r(hecMESH, val, n)