FrontISTR  5.2.0
Large-scale structural analysis program with finit element method
hecmw_solver_las_66.f90
Go to the documentation of this file.
1 !-------------------------------------------------------------------------------
2 ! Copyright (c) 2019 FrontISTR Commons
3 ! This software is released under the MIT License, see LICENSE.txt
4 !-------------------------------------------------------------------------------
5 
7  use hecmw_util
8  implicit none
9 
10  private
11 
12  public :: hecmw_matvec_66
13  public :: hecmw_matresid_66
14  public :: hecmw_rel_resid_l2_66
15  public :: hecmw_tvec_66
16  public :: hecmw_ttvec_66
17  public :: hecmw_ttmattvec_66
18 
19 contains
20 
21  !C
22  !C***
23  !C*** hecmw_matvec_66
24  !C***
25  !C
26  subroutine hecmw_matvec_66 (hecMESH, hecMAT, X, Y, time_Ax, COMMtime)
27  use hecmw_util
28  use m_hecmw_comm_f
31  use hecmw_jad_type
32  use hecmw_tuning_fx
33  !$ use omp_lib
34 
35  implicit none
36  type (hecmwst_local_mesh), intent(in) :: hecmesh
37  type (hecmwst_matrix), intent(in), target :: hecmat
38  real(kind=kreal), intent(in) :: x(:)
39  real(kind=kreal), intent(out) :: y(:)
40  real(kind=kreal), intent(inout) :: time_ax
41  real(kind=kreal), intent(inout), optional :: commtime
42 
43  real(kind=kreal) :: start_time, end_time, tcomm
44  integer(kind=kint) :: i, j, js, je, in
45  real(kind=kreal) :: yv1, yv2, yv3, x1, x2, x3
46  real(kind=kreal) :: yv4, yv5, yv6, x4, x5, x6
47 
48  integer(kind=kint) :: n, np
49  integer(kind=kint), pointer :: indexl(:), iteml(:), indexu(:), itemu(:)
50  real(kind=kreal), pointer :: al(:), au(:), d(:)
51 
52  ! added for turning >>>
53  integer, parameter :: numofblockperthread = 100
54  logical, save :: isfirst = .true.
55  integer, save :: numofthread = 1
56  integer, save, allocatable :: startpos(:), endpos(:)
57  integer(kind=kint), save :: sectorcachesize0, sectorcachesize1
58  integer(kind=kint) :: threadnum, blocknum, numofblock
59  integer(kind=kint) :: numofelement, elementcount, blockindex
60  real(kind=kreal) :: numofelementperblock
61  ! <<< added for turning
62 
63  if (hecmw_mat_get_usejad(hecmat).ne.0) then
64  tcomm = 0.d0
65  start_time = hecmw_wtime()
66  call hecmw_jad_matvec(hecmesh, hecmat, x, y, tcomm)
67  end_time = hecmw_wtime()
68  time_ax = time_ax + end_time - start_time - tcomm
69  if (present(commtime)) commtime = commtime + tcomm
70  else
71 
72  n = hecmat%N
73  np = hecmat%NP
74  indexl => hecmat%indexL
75  indexu => hecmat%indexU
76  iteml => hecmat%itemL
77  itemu => hecmat%itemU
78  al => hecmat%AL
79  au => hecmat%AU
80  d => hecmat%D
81 
82  ! added for turning >>>
83  if (isfirst .eqv. .true.) then
84  !$ numOfThread = omp_get_max_threads()
85  numofblock = numofthread * numofblockperthread
86  allocate (startpos(0 : numofblock - 1), endpos(0 : numofblock - 1))
87  numofelement = n + indexl(n) + indexu(n)
88  numofelementperblock = dble(numofelement) / numofblock
89  blocknum = 0
90  elementcount = 0
91  startpos(blocknum) = 1
92  do i= 1, n
93  elementcount = elementcount + 1
94  elementcount = elementcount + (indexl(i) - indexl(i-1))
95  elementcount = elementcount + (indexu(i) - indexu(i-1))
96  if (elementcount > (blocknum + 1) * numofelementperblock) then
97  endpos(blocknum) = i
98  ! write(9000+hecMESH%my_rank,*) mod(blockNum, numOfThread), &
99  ! startPos(blockNum), endPos(blockNum)
100  blocknum = blocknum + 1
101  startpos(blocknum) = i + 1
102  if (blocknum == (numofblock - 1)) exit
103  endif
104  enddo
105  endpos(blocknum) = n
106  ! write(9000+hecMESH%my_rank,*) mod(blockNum, numOfThread), &
107  ! startPos(blockNum), endPos(blockNum)
108  ! for irregular data
109  do i= blocknum+1, numofblock-1
110  startpos(i) = n
111  endpos(i) = n-1
112  ! write(9000+hecMESH%my_rank,*) mod(i, numOfThread), &
113  ! startPos(i), endPos(i)
114  end do
115 
117  sectorcachesize0, sectorcachesize1)
118 
119  isfirst = .false.
120  endif
121  ! <<< added for turning
122 
123  start_time= hecmw_wtime()
124  call hecmw_update_6_r (hecmesh, x, np)
125  end_time= hecmw_wtime()
126  if (present(commtime)) commtime = commtime + end_time - start_time
127 
128  start_time = hecmw_wtime()
129 
130  !call fapp_start("loopInMatvec66", 1, 0)
131  !call start_collection("loopInMatvec66")
132 
133  !OCL CACHE_SECTOR_SIZE(sectorCacheSize0,sectorCacheSize1)
134  !OCL CACHE_SUBSECTOR_ASSIGN(X)
135 
136  !$OMP PARALLEL DEFAULT(NONE) &
137  !$OMP&PRIVATE(i,X1,X2,X3,X4,X5,X6,YV1,YV2,YV3,YV4,YV5,YV6,jS,jE,j,in,threadNum,blockNum,blockIndex) &
138  !$OMP&SHARED(D,AL,AU,indexL,itemL,indexU,itemU,X,Y,startPos,endPos,numOfThread)
139  threadnum = 0
140  !$ threadNum = omp_get_thread_num()
141  do blocknum = 0 , numofblockperthread - 1
142  blockindex = blocknum * numofthread + threadnum
143  do i = startpos(blockindex), endpos(blockindex)
144  x1= x(6*i-5)
145  x2= x(6*i-4)
146  x3= x(6*i-3)
147  x4= x(6*i-2)
148  x5= x(6*i-1)
149  x6= x(6*i )
150  yv1= d(36*i-35)*x1 + d(36*i-34)*x2 + d(36*i-33)*x3 + d(36*i-32)*x4 + d(36*i-31)*x5 + d(36*i-30)*x6
151  yv2= d(36*i-29)*x1 + d(36*i-28)*x2 + d(36*i-27)*x3 + d(36*i-26)*x4 + d(36*i-25)*x5 + d(36*i-24)*x6
152  yv3= d(36*i-23)*x1 + d(36*i-22)*x2 + d(36*i-21)*x3 + d(36*i-20)*x4 + d(36*i-19)*x5 + d(36*i-18)*x6
153  yv4= d(36*i-17)*x1 + d(36*i-16)*x2 + d(36*i-15)*x3 + d(36*i-14)*x4 + d(36*i-13)*x5 + d(36*i-12)*x6
154  yv5= d(36*i-11)*x1 + d(36*i-10)*x2 + d(36*i-9 )*x3 + d(36*i-8 )*x4 + d(36*i-7 )*x5 + d(36*i-6 )*x6
155  yv6= d(36*i-5 )*x1 + d(36*i-4 )*x2 + d(36*i-3 )*x3 + d(36*i-2 )*x4 + d(36*i-1 )*x5 + d(36*i )*x6
156 
157  js= indexl(i-1) + 1
158  je= indexl(i )
159  do j= js, je
160  in = iteml(j)
161  x1= x(6*in-5)
162  x2= x(6*in-4)
163  x3= x(6*in-3)
164  x4= x(6*in-2)
165  x5= x(6*in-1)
166  x6= x(6*in )
167  yv1= yv1 + al(36*j-35)*x1 + al(36*j-34)*x2 + al(36*j-33)*x3 + al(36*j-32)*x4 + al(36*j-31)*x5 + al(36*j-30)*x6
168  yv2= yv2 + al(36*j-29)*x1 + al(36*j-28)*x2 + al(36*j-27)*x3 + al(36*j-26)*x4 + al(36*j-25)*x5 + al(36*j-24)*x6
169  yv3= yv3 + al(36*j-23)*x1 + al(36*j-22)*x2 + al(36*j-21)*x3 + al(36*j-20)*x4 + al(36*j-19)*x5 + al(36*j-18)*x6
170  yv4= yv4 + al(36*j-17)*x1 + al(36*j-16)*x2 + al(36*j-15)*x3 + al(36*j-14)*x4 + al(36*j-13)*x5 + al(36*j-12)*x6
171  yv5= yv5 + al(36*j-11)*x1 + al(36*j-10)*x2 + al(36*j-9 )*x3 + al(36*j-8 )*x4 + al(36*j-7 )*x5 + al(36*j-6 )*x6
172  yv6= yv6 + al(36*j-5 )*x1 + al(36*j-4 )*x2 + al(36*j-3 )*x3 + al(36*j-2 )*x4 + al(36*j-1 )*x5 + al(36*j )*x6
173  enddo
174  js= indexu(i-1) + 1
175  je= indexu(i )
176  do j= js, je
177  in = itemu(j)
178  x1= x(6*in-5)
179  x2= x(6*in-4)
180  x3= x(6*in-3)
181  x4= x(6*in-2)
182  x5= x(6*in-1)
183  x6= x(6*in )
184  yv1= yv1 + au(36*j-35)*x1 + au(36*j-34)*x2 + au(36*j-33)*x3 + au(36*j-32)*x4 + au(36*j-31)*x5 + au(36*j-30)*x6
185  yv2= yv2 + au(36*j-29)*x1 + au(36*j-28)*x2 + au(36*j-27)*x3 + au(36*j-26)*x4 + au(36*j-25)*x5 + au(36*j-24)*x6
186  yv3= yv3 + au(36*j-23)*x1 + au(36*j-22)*x2 + au(36*j-21)*x3 + au(36*j-20)*x4 + au(36*j-19)*x5 + au(36*j-18)*x6
187  yv4= yv4 + au(36*j-17)*x1 + au(36*j-16)*x2 + au(36*j-15)*x3 + au(36*j-14)*x4 + au(36*j-13)*x5 + au(36*j-12)*x6
188  yv5= yv5 + au(36*j-11)*x1 + au(36*j-10)*x2 + au(36*j-9 )*x3 + au(36*j-8 )*x4 + au(36*j-7 )*x5 + au(36*j-6 )*x6
189  yv6= yv6 + au(36*j-5 )*x1 + au(36*j-4 )*x2 + au(36*j-3 )*x3 + au(36*j-2 )*x4 + au(36*j-1 )*x5 + au(36*j )*x6
190  enddo
191  y(6*i-5)= yv1
192  y(6*i-4)= yv2
193  y(6*i-3)= yv3
194  y(6*i-2)= yv4
195  y(6*i-1)= yv5
196  y(6*i )= yv6
197  enddo
198  enddo
199  !$OMP END PARALLEL
200 
201  !OCL END_CACHE_SUBSECTOR
202  !OCL END_CACHE_SECTOR_SIZE
203 
204  !call stop_collection("loopInMatvec66")
205  !call fapp_stop("loopInMatvec66", 1, 0)
206 
207  end_time = hecmw_wtime()
208  time_ax = time_ax + end_time - start_time
209 
210  endif
211 
212  if (hecmat%cmat%n_val > 0) then
213  call hecmw_cmat_multvec_add( hecmat%cmat, x, y, np * hecmat%NDOF )
214  end if
215 
216  end subroutine hecmw_matvec_66
217 
218  !C
219  !C***
220  !C*** hecmw_matresid_66
221  !C***
222  !C
223  subroutine hecmw_matresid_66 (hecMESH, hecMAT, X, B, R, COMMtime)
224  use hecmw_util
225  implicit none
226  type (hecmwst_local_mesh), intent(in) :: hecmesh
227  type (hecmwst_matrix), intent(in) :: hecmat
228  real(kind=kreal), intent(in) :: x(:), b(:)
229  real(kind=kreal), intent(out) :: r(:)
230  real(kind=kreal), intent(inout), optional :: commtime
231 
232  integer(kind=kint) :: i
233  real(kind=kreal) :: tcomm
234 
235  tcomm = 0.d0
236  call hecmw_matvec_66 (hecmesh, hecmat, x, r, tcomm)
237  if (present(commtime)) commtime = commtime + tcomm
238  do i = 1, hecmat%N * 6
239  r(i) = b(i) - r(i)
240  enddo
241 
242  end subroutine hecmw_matresid_66
243 
244  !C
245  !C***
246  !C*** hecmw_rel_resid_L2_66
247  !C***
248  !C
249  function hecmw_rel_resid_l2_66 (hecMESH, hecMAT, COMMtime)
250  use hecmw_util
252  implicit none
253  real(kind=kreal) :: hecmw_rel_resid_l2_66
254  type ( hecmwst_local_mesh ), intent(in) :: hecmesh
255  type ( hecmwst_matrix ), intent(in) :: hecmat
256  real(kind=kreal), intent(inout), optional :: commtime
257 
258  real(kind=kreal), allocatable :: r(:)
259  real(kind=kreal) :: bnorm2, rnorm2
260  real(kind=kreal) :: tcomm
261 
262  allocate(r(hecmat%NDOF*hecmat%NP))
263 
264  tcomm = 0.d0
265  call hecmw_innerproduct_r(hecmesh, hecmat%NDOF, &
266  hecmat%B, hecmat%B, bnorm2, tcomm)
267  if (bnorm2 == 0.d0) then
268  bnorm2 = 1.d0
269  endif
270  call hecmw_matresid_66(hecmesh, hecmat, hecmat%X, hecmat%B, r, tcomm)
271  call hecmw_innerproduct_r(hecmesh, hecmat%NDOF, r, r, rnorm2, tcomm)
272  hecmw_rel_resid_l2_66 = sqrt(rnorm2 / bnorm2)
273 
274  if (present(commtime)) commtime = commtime + tcomm
275 
276  deallocate(r)
277  end function hecmw_rel_resid_l2_66
278 
279  !C
280  !C***
281  !C*** hecmw_Tvec_66
282  !C***
283  !C
284  subroutine hecmw_tvec_66 (hecMESH, X, Y, COMMtime)
285  use hecmw_util
286  use m_hecmw_comm_f
287  implicit none
288  type (hecmwst_local_mesh), intent(in) :: hecmesh
289  real(kind=kreal), intent(in) :: x(:)
290  real(kind=kreal), intent(out) :: y(:)
291  real(kind=kreal), intent(inout) :: commtime
292 
293  real(kind=kreal) :: start_time, end_time
294  integer(kind=kint) :: i
295 
296  start_time= hecmw_wtime()
297  call hecmw_update_6_r (hecmesh, x, hecmesh%n_node)
298  end_time= hecmw_wtime()
299  commtime = commtime + end_time - start_time
300 
301  do i= 1, hecmesh%nn_internal * hecmesh%n_dof
302  y(i)= x(i)
303  enddo
304 
305  ! do i= 1, hecMESH%mpc%n_mpc
306  ! k = hecMESH%mpc%mpc_index(i-1) + 1
307  ! kk = 3 * (hecMESH%mpc%mpc_item(k) - 1) + hecMESH%mpc%mpc_dof(k)
308  ! Y(kk) = 0.d0
309  ! do j= hecMESH%mpc%mpc_index(i-1) + 2, hecMESH%mpc%mpc_index(i)
310  ! jj = 3 * (hecMESH%mpc%mpc_item(j) - 1) + hecMESH%mpc%mpc_dof(j)
311  ! Y(kk) = Y(kk) - hecMESH%mpc%mpc_val(j) * X(jj)
312  ! enddo
313  ! enddo
314 
315  end subroutine hecmw_tvec_66
316 
317  !C
318  !C***
319  !C*** hecmw_Ttvec_66
320  !C***
321  !C
322  subroutine hecmw_ttvec_66 (hecMESH, X, Y, COMMtime)
323  use hecmw_util
324  use m_hecmw_comm_f
325  implicit none
326  type (hecmwst_local_mesh), intent(in) :: hecmesh
327  real(kind=kreal), intent(in) :: x(:)
328  real(kind=kreal), intent(out) :: y(:)
329  real(kind=kreal), intent(inout) :: commtime
330 
331  real(kind=kreal) :: start_time, end_time
332  integer(kind=kint) :: i
333 
334  start_time= hecmw_wtime()
335  call hecmw_update_6_r (hecmesh, x, hecmesh%n_node)
336  end_time= hecmw_wtime()
337  commtime = commtime + end_time - start_time
338 
339  do i= 1, hecmesh%nn_internal * hecmesh%n_dof
340  y(i)= x(i)
341  enddo
342 
343  ! do i= 1, hecMESH%mpc%n_mpc
344  ! k = hecMESH%mpc%mpc_index(i-1) + 1
345  ! kk = 3 * (hecMESH%mpc%mpc_item(k) - 1) + hecMESH%mpc%mpc_dof(k)
346  ! Y(kk) = 0.d0
347  ! do j= hecMESH%mpc%mpc_index(i-1) + 2, hecMESH%mpc%mpc_index(i)
348  ! jj = 3 * (hecMESH%mpc%mpc_item(j) - 1) + hecMESH%mpc%mpc_dof(j)
349  ! Y(jj) = Y(jj) - hecMESH%mpc%mpc_val(j) * X(kk)
350  ! enddo
351  ! enddo
352 
353  end subroutine hecmw_ttvec_66
354 
355  !C
356  !C***
357  !C*** hecmw_TtmatTvec_66
358  !C***
359  !C
360  subroutine hecmw_ttmattvec_66 (hecMESH, hecMAT, X, Y, W, COMMtime)
361  use hecmw_util
362  implicit none
363  type (hecmwst_local_mesh), intent(in) :: hecmesh
364  type (hecmwst_matrix), intent(in) :: hecmat
365  real(kind=kreal), intent(in) :: x(:)
366  real(kind=kreal), intent(out) :: y(:), w(:)
367  real(kind=kreal), intent(inout) :: commtime
368 
369  call hecmw_tvec_66(hecmesh, x, y, commtime)
370  call hecmw_matvec_66(hecmesh, hecmat, y, w, commtime)
371  call hecmw_ttvec_66(hecmesh, w, y, commtime)
372 
373  end subroutine hecmw_ttmattvec_66
374 
375 
376 end module hecmw_solver_las_66
Jagged Diagonal Matrix storage for vector processors. Original code was provided by JAMSTEC.
Definition: hecmw_jadm.f90:8
subroutine, public hecmw_jad_matvec(hecMESH, hecMAT, X, Y, COMMtime)
Definition: hecmw_jadm.f90:61
subroutine, public hecmw_cmat_multvec_add(cmat, X, Y, len)
integer(kind=kint) function, public hecmw_mat_get_usejad(hecMAT)
subroutine, public hecmw_matresid_66(hecMESH, hecMAT, X, B, R, COMMtime)
subroutine, public hecmw_tvec_66(hecMESH, X, Y, COMMtime)
subroutine, public hecmw_ttvec_66(hecMESH, X, Y, COMMtime)
real(kind=kreal) function, public hecmw_rel_resid_l2_66(hecMESH, hecMAT, COMMtime)
subroutine, public hecmw_matvec_66(hecMESH, hecMAT, X, Y, time_Ax, COMMtime)
subroutine, public hecmw_ttmattvec_66(hecMESH, hecMAT, X, Y, W, COMMtime)
subroutine hecmw_innerproduct_r(hecMESH, ndof, X, Y, sum, COMMtime)
subroutine, public hecmw_tuning_fx_calc_sector_cache(N, NDOF, sectorCacheSize0, sectorCacheSize1)
I/O and Utility.
Definition: hecmw_util_f.F90:7
integer(kind=4), parameter kreal
real(kind=kreal) function hecmw_wtime()
subroutine hecmw_update_6_r(hecMESH, val, n)