FrontISTR  5.2.0
Large-scale structural analysis program with finit element method
hecmw_precond_SSOR_11.f90
Go to the documentation of this file.
1 !-------------------------------------------------------------------------------
2 ! Copyright (c) 2019 FrontISTR Commons
3 ! This software is released under the MIT License, see LICENSE.txt
4 !-------------------------------------------------------------------------------
5 
6 !C
7 !C***
8 !C*** module hecmw_precond_SSOR_11
9 !C***
10 !C
12  use hecmw_util
18  !$ use omp_lib
19 
20  private
21 
25 
26  integer(kind=kint) :: N
27  real(kind=kreal), pointer :: d(:) => null()
28  real(kind=kreal), pointer :: al(:) => null()
29  real(kind=kreal), pointer :: au(:) => null()
30  integer(kind=kint), pointer :: indexL(:) => null()
31  integer(kind=kint), pointer :: indexU(:) => null()
32  integer(kind=kint), pointer :: itemL(:) => null()
33  integer(kind=kint), pointer :: itemU(:) => null()
34  real(kind=kreal), pointer :: alu(:) => null()
35 
36  integer(kind=kint) :: NContact = 0
37  real(kind=kreal), pointer :: cal(:) => null()
38  real(kind=kreal), pointer :: cau(:) => null()
39  integer(kind=kint), pointer :: indexCL(:) => null()
40  integer(kind=kint), pointer :: indexCU(:) => null()
41  integer(kind=kint), pointer :: itemCL(:) => null()
42  integer(kind=kint), pointer :: itemCU(:) => null()
43 
44  integer(kind=kint) :: NColor
45  integer(kind=kint), pointer :: COLORindex(:) => null()
46  integer(kind=kint), pointer :: perm(:) => null()
47  integer(kind=kint), pointer :: iperm(:) => null()
48 
49  logical, save :: isFirst = .true.
50 
51  logical, save :: INITIALIZED = .false.
52 
53 contains
54 
55  subroutine hecmw_precond_ssor_11_setup(hecMAT)
56  implicit none
57  type(hecmwst_matrix), intent(inout) :: hecmat
58  integer(kind=kint ) :: npl, npu, npcl, npcu
59  real (kind=kreal), allocatable :: cd(:)
60  integer(kind=kint ) :: ncolor_in
61  real (kind=kreal) :: sigma_diag
62  real (kind=kreal) :: alutmp(1,1), pw(1)
63  integer(kind=kint ) :: ii, i, j, k
64  integer(kind=kint ) :: nthreads = 1
65  integer(kind=kint ), allocatable :: perm_tmp(:)
66  !real (kind=kreal) :: t0
67 
68  !t0 = hecmw_Wtime()
69  !write(*,*) 'DEBUG: SSOR setup start', hecmw_Wtime()-t0
70 
71  if (initialized) then
72  if (hecmat%Iarray(98) == 1) then ! need symbolic and numerical setup
73  call hecmw_precond_ssor_11_clear(hecmat)
74  else if (hecmat%Iarray(97) == 1) then ! need numerical setup only
75  call hecmw_precond_ssor_11_clear(hecmat) ! TEMPORARY
76  else
77  return
78  endif
79  endif
80 
81  !$ nthreads = omp_get_max_threads()
82 
83  n = hecmat%N
84  ! N = hecMAT%NP
85  ncolor_in = hecmw_mat_get_ncolor_in(hecmat)
86  sigma_diag = hecmw_mat_get_sigma_diag(hecmat)
87  ncontact = hecmat%cmat%n_val
88 
89  if (ncontact.gt.0) then
90  call hecmw_cmat_lu( hecmat )
91  endif
92 
93  if (nthreads == 1) then
94  ncolor = 1
95  allocate(colorindex(0:1), perm(n), iperm(n))
96  colorindex(0) = 0
97  colorindex(1) = n
98  do i=1,n
99  perm(i) = i
100  iperm(i) = i
101  end do
102  else
103  allocate(colorindex(0:n), perm_tmp(n), perm(n), iperm(n))
104  call hecmw_matrix_ordering_rcm(n, hecmat%indexL, hecmat%itemL, &
105  hecmat%indexU, hecmat%itemU, perm_tmp, iperm)
106  !write(*,*) 'DEBUG: RCM ordering done', hecmw_Wtime()-t0
107  call hecmw_matrix_ordering_mc(n, hecmat%indexL, hecmat%itemL, &
108  hecmat%indexU, hecmat%itemU, perm_tmp, &
109  ncolor_in, ncolor, colorindex, perm, iperm)
110  !write(*,*) 'DEBUG: MC ordering done', hecmw_Wtime()-t0
111  deallocate(perm_tmp)
112 
113  !call write_debug_info
114  endif
115 
116  npl = hecmat%indexL(n)
117  npu = hecmat%indexU(n)
118  allocate(indexl(0:n), indexu(0:n), iteml(npl), itemu(npu))
119  call hecmw_matrix_reorder_profile(n, perm, iperm, &
120  hecmat%indexL, hecmat%indexU, hecmat%itemL, hecmat%itemU, &
121  indexl, indexu, iteml, itemu)
122  !write(*,*) 'DEBUG: reordering profile done', hecmw_Wtime()-t0
123 
124  !call check_ordering
125 
126  allocate(d(n), al(npl), au(npu))
127  call hecmw_matrix_reorder_values(n, 1, perm, iperm, &
128  hecmat%indexL, hecmat%indexU, hecmat%itemL, hecmat%itemU, &
129  hecmat%AL, hecmat%AU, hecmat%D, &
130  indexl, indexu, iteml, itemu, al, au, d)
131  !write(*,*) 'DEBUG: reordering values done', hecmw_Wtime()-t0
132 
133  call hecmw_matrix_reorder_renum_item(n, perm, indexl, iteml)
134  call hecmw_matrix_reorder_renum_item(n, perm, indexu, itemu)
135 
136  if (ncontact.gt.0) then
137  npcl = hecmat%indexCL(n)
138  npcu = hecmat%indexCU(n)
139  allocate(indexcl(0:n), indexcu(0:n), itemcl(npcl), itemcu(npcu))
140  call hecmw_matrix_reorder_profile(n, perm, iperm, &
141  hecmat%indexCL, hecmat%indexCU, hecmat%itemCL, hecmat%itemCU, &
142  indexcl, indexcu, itemcl, itemcu)
143 
144  allocate(cd(n), cal(npcl), cau(npcu))
145  call hecmw_matrix_reorder_values(n, 1, perm, iperm, &
146  hecmat%indexCL, hecmat%indexCU, hecmat%itemCL, hecmat%itemCU, &
147  hecmat%CAL, hecmat%CAU, hecmat%D, &
148  indexcl, indexcu, itemcl, itemcu, cal, cau, cd)
149  deallocate(cd)
150 
151  call hecmw_matrix_reorder_renum_item(n, perm, indexcl, itemcl)
152  call hecmw_matrix_reorder_renum_item(n, perm, indexcu, itemcu)
153  end if
154 
155  allocate(alu(n))
156  alu = 0.d0
157 
158  do ii= 1, n
159  alu(ii) = d(ii)
160  enddo
161 
162  if (ncontact.gt.0) then
163  do k= 1, hecmat%cmat%n_val
164  if (hecmat%cmat%pair(k)%i.ne.hecmat%cmat%pair(k)%j) cycle
165  ii = iperm( hecmat%cmat%pair(k)%i )
166  alu(ii) = alu(ii) + hecmat%cmat%pair(k)%val(1, 1)
167  enddo
168  endif
169 
170  !$omp parallel default(none),private(ii,ALUtmp,k,i,j,PW),shared(N,ALU,SIGMA_DIAG)
171  !$omp do
172  do ii= 1, n
173  alutmp(1,1)= alu(ii) * sigma_diag
174  alutmp(1,1)= 1.d0/alutmp(1,1)
175  alu(ii)= alutmp(1,1)
176  enddo
177  !$omp end do
178  !$omp end parallel
179 
180  isfirst = .true.
181 
182  initialized = .true.
183  hecmat%Iarray(98) = 0 ! symbolic setup done
184  hecmat%Iarray(97) = 0 ! numerical setup done
185 
186  !write(*,*) 'DEBUG: SSOR setup done', hecmw_Wtime()-t0
187 
188  end subroutine hecmw_precond_ssor_11_setup
189 
191  use hecmw_tuning_fx
192  implicit none
193  real(kind=kreal), intent(inout) :: zp(:)
194  integer(kind=kint) :: ic, i, iold, j, isl, iel, isu, ieu, k
195  real(kind=kreal) :: sw(1), x(1)
196 
197  ! added for turning >>>
198  integer(kind=kint), parameter :: numofblockperthread = 100
199  integer(kind=kint), save :: numofthread = 1, numofblock
200  integer(kind=kint), save, allocatable :: ictoblockindex(:)
201  integer(kind=kint), save, allocatable :: blockindextocolorindex(:)
202  integer(kind=kint), save :: sectorcachesize0, sectorcachesize1
203  integer(kind=kint) :: blockindex, elementcount, numofelement, ii
204  real(kind=kreal) :: numofelementperblock
205  integer(kind=kint) :: my_rank
206 
207  if (isfirst) then
208  !$ numOfThread = omp_get_max_threads()
209  numofblock = numofthread * numofblockperthread
210  if (allocated(ictoblockindex)) deallocate(ictoblockindex)
211  if (allocated(blockindextocolorindex)) deallocate(blockindextocolorindex)
212  allocate (ictoblockindex(0:ncolor), &
213  blockindextocolorindex(0:numofblock + ncolor))
214  numofelement = n + indexl(n) + indexu(n)
215  numofelementperblock = dble(numofelement) / numofblock
216  blockindex = 0
217  ictoblockindex = -1
218  ictoblockindex(0) = 0
219  blockindextocolorindex = -1
220  blockindextocolorindex(0) = 0
221  my_rank = hecmw_comm_get_rank()
222  ! write(9000+my_rank,*) &
223  ! '# numOfElementPerBlock =', numOfElementPerBlock
224  ! write(9000+my_rank,*) &
225  ! '# ic, blockIndex, colorIndex, elementCount'
226  do ic = 1, ncolor
227  elementcount = 0
228  ii = 1
229  do i = colorindex(ic-1)+1, colorindex(ic)
230  elementcount = elementcount + 1
231  elementcount = elementcount + (indexl(i) - indexl(i-1))
232  elementcount = elementcount + (indexu(i) - indexu(i-1))
233  if (elementcount > ii * numofelementperblock &
234  .or. i == colorindex(ic)) then
235  ii = ii + 1
236  blockindex = blockindex + 1
237  blockindextocolorindex(blockindex) = i
238  ! write(9000+my_rank,*) ic, blockIndex, &
239  ! blockIndexToColorIndex(blockIndex), elementCount
240  endif
241  enddo
242  ictoblockindex(ic) = blockindex
243  enddo
244  numofblock = blockindex
245 
247  sectorcachesize0, sectorcachesize1 )
248 
249  isfirst = .false.
250  endif
251  ! <<< added for turning
252 
253  !call start_collection("loopInPrecond33")
254 
255  !OCL CACHE_SECTOR_SIZE(sectorCacheSize0,sectorCacheSize1)
256  !OCL CACHE_SUBSECTOR_ASSIGN(ZP)
257 
258  !$omp parallel default(none) &
259  !$omp&shared(NColor,indexL,itemL,indexU,itemU,AL,AU,D,ALU,perm,&
260  !$omp& NContact,indexCL,itemCL,indexCU,itemCU,CAL,CAU,&
261  !$omp& ZP,icToBlockIndex,blockIndexToColorIndex) &
262  !$omp&private(SW,X,ic,i,iold,isL,ieL,isU,ieU,j,k,blockIndex)
263 
264  !C-- FORWARD
265  do ic=1,ncolor
266  !$omp do schedule (static, 1)
267  do blockindex = ictoblockindex(ic-1)+1, ictoblockindex(ic)
268  do i = blockindextocolorindex(blockindex-1)+1, &
269  blockindextocolorindex(blockindex)
270  iold = perm(i)
271  sw(1)= zp(iold)
272  isl= indexl(i-1)+1
273  iel= indexl(i)
274  do j= isl, iel
275  k= iteml(j)
276  x(1)= zp(k)
277  sw(1)= sw(1) - al(j)*x(1)
278  enddo ! j
279 
280  if (ncontact.ne.0) then
281  isl= indexcl(i-1)+1
282  iel= indexcl(i)
283  do j= isl, iel
284  k= itemcl(j)
285  x(1)= zp(k)
286  sw(1)= sw(1) - cal(j)*x(1)
287  enddo ! j
288  endif
289 
290  x = sw
291  x(1)= alu(i )* x(1)
292  zp(iold)= x(1)
293  enddo ! i
294  enddo ! blockIndex
295  !$omp end do
296  enddo ! ic
297 
298  !C-- BACKWARD
299  do ic=ncolor, 1, -1
300  !$omp do schedule (static, 1)
301  do blockindex = ictoblockindex(ic), ictoblockindex(ic-1)+1, -1
302  do i = blockindextocolorindex(blockindex), &
303  blockindextocolorindex(blockindex-1)+1, -1
304  ! do blockIndex = icToBlockIndex(ic-1)+1, icToBlockIndex(ic)
305  ! do i = blockIndexToColorIndex(blockIndex-1)+1, &
306  ! blockIndexToColorIndex(blockIndex)
307  ! do i = endPos(threadNum, ic), startPos(threadNum, ic), -1
308  sw= 0.d0
309  isu= indexu(i-1) + 1
310  ieu= indexu(i)
311  do j= ieu, isu, -1
312  k= itemu(j)
313  x(1)= zp(k)
314  sw(1)= sw(1) + au(j)*x(1)
315  enddo ! j
316 
317  if (ncontact.gt.0) then
318  isu= indexcu(i-1) + 1
319  ieu= indexcu(i)
320  do j= ieu, isu, -1
321  k= itemcu(j)
322  x(1)= zp(k)
323  sw(1)= sw(1) + cau(j)*x(1)
324  enddo ! j
325  endif
326 
327  x = sw
328  x(1)= alu(i)* x(1)
329 
330  iold = perm(i)
331  zp(iold)= zp(iold) - x(1)
332  enddo ! i
333  enddo ! blockIndex
334  !$omp end do
335  enddo ! ic
336  !$omp end parallel
337 
338  !OCL END_CACHE_SUBSECTOR
339  !OCL END_CACHE_SECTOR_SIZE
340 
341  !call stop_collection("loopInPrecond33")
342 
343  end subroutine hecmw_precond_ssor_11_apply
344 
345  subroutine hecmw_precond_ssor_11_clear(hecMAT)
346  implicit none
347  type(hecmwst_matrix), intent(inout) :: hecmat
348  integer(kind=kint ) :: nthreads = 1
349  !$ nthreads = omp_get_max_threads()
350  if (associated(colorindex)) deallocate(colorindex)
351  if (associated(perm)) deallocate(perm)
352  if (associated(iperm)) deallocate(iperm)
353  if (associated(alu)) deallocate(alu)
354  if (nthreads >= 1) then
355  if (associated(d)) deallocate(d)
356  if (associated(al)) deallocate(al)
357  if (associated(au)) deallocate(au)
358  if (associated(indexl)) deallocate(indexl)
359  if (associated(indexu)) deallocate(indexu)
360  if (associated(iteml)) deallocate(iteml)
361  if (associated(itemu)) deallocate(itemu)
362  if (ncontact.ne.0) then
363  if (associated(cal)) deallocate(cal)
364  if (associated(cau)) deallocate(cau)
365  if (associated(indexcl)) deallocate(indexcl)
366  if (associated(indexcu)) deallocate(indexcu)
367  if (associated(itemcl)) deallocate(itemcl)
368  if (associated(itemcu)) deallocate(itemcu)
369  end if
370  end if
371  nullify(colorindex)
372  nullify(perm)
373  nullify(iperm)
374  nullify(alu)
375  nullify(d)
376  nullify(al)
377  nullify(au)
378  nullify(indexl)
379  nullify(indexu)
380  nullify(iteml)
381  nullify(itemu)
382  if (ncontact.ne.0) then
383  nullify(cal)
384  nullify(cau)
385  nullify(indexcl)
386  nullify(indexcu)
387  nullify(itemcl)
388  nullify(itemcu)
389  call hecmw_cmat_lu_free( hecmat )
390  endif
391  ncontact = 0
392  initialized = .false.
393  end subroutine hecmw_precond_ssor_11_clear
394 
395  subroutine write_debug_info
396  implicit none
397  integer(kind=kint) :: my_rank, ic, in
398  my_rank = hecmw_comm_get_rank()
399  !--------------------> debug: shizawa
400  if (my_rank.eq.0) then
401  write(*,*) 'DEBUG: Output fort.19000+myrank and fort.29000+myrank for coloring information'
402  endif
403  write(19000+my_rank,'(a)') '#NCOLORTot'
404  write(19000+my_rank,*) ncolor
405  write(19000+my_rank,'(a)') '#ic COLORindex(ic-1)+1 COLORindex(ic)'
406  do ic=1,ncolor
407  write(19000+my_rank,*) ic, colorindex(ic-1)+1,colorindex(ic)
408  enddo ! ic
409  write(29000+my_rank,'(a)') '#n_node'
410  write(29000+my_rank,*) n
411  write(29000+my_rank,'(a)') '#in OLDtoNEW(in) NEWtoOLD(in)'
412  do in=1,n
413  write(29000+my_rank,*) in, iperm(in), perm(in)
414  if (perm(iperm(in)) .ne. in) then
415  write(29000+my_rank,*) '** WARNING **: NEWtoOLD and OLDtoNEW: ',in
416  endif
417  enddo
418  end subroutine write_debug_info
419 
420  subroutine check_ordering
421  implicit none
422  integer(kind=kint) :: ic, i, j, k
423  integer(kind=kint), allocatable :: iicolor(:)
424  ! check color dependence of neighbouring nodes
425  if (ncolor.gt.1) then
426  allocate(iicolor(n))
427  do ic=1,ncolor
428  do i= colorindex(ic-1)+1, colorindex(ic)
429  iicolor(i) = ic
430  enddo ! i
431  enddo ! ic
432  ! FORWARD: L-part
433  do ic=1,ncolor
434  do i= colorindex(ic-1)+1, colorindex(ic)
435  do j= indexl(i-1)+1, indexl(i)
436  k= iteml(j)
437  if (iicolor(i).eq.iicolor(k)) then
438  write(*,*) .eq.'** ERROR **: L-part: iicolor(i)iicolor(k)',i,k,iicolor(i)
439  endif
440  enddo ! j
441  enddo ! i
442  enddo ! ic
443  ! BACKWARD: U-part
444  do ic=ncolor, 1, -1
445  do i= colorindex(ic), colorindex(ic-1)+1, -1
446  do j= indexu(i-1)+1, indexu(i)
447  k= itemu(j)
448  if (iicolor(i).eq.iicolor(k)) then
449  write(*,*) .eq.'** ERROR **: U-part: iicolor(i)iicolor(k)',i,k,iicolor(i)
450  endif
451  enddo ! j
452  enddo ! i
453  enddo ! ic
454  deallocate(iicolor)
455  endif ! if (NColor.gt.1)
456  !--------------------< debug: shizawa
457  end subroutine check_ordering
458 
459 end module hecmw_precond_ssor_11
subroutine, public hecmw_cmat_lu(hecMAT)
subroutine, public hecmw_cmat_lu_free(hecMAT)
real(kind=kreal) function, public hecmw_mat_get_sigma_diag(hecMAT)
integer(kind=kint) function, public hecmw_mat_get_ncolor_in(hecMAT)
subroutine, public hecmw_matrix_reorder_values(N, NDOF, perm, iperm, indexL, indexU, itemL, itemU, AL, AU, D, indexLp, indexUp, itemLp, itemUp, ALp, AUp, Dp)
subroutine, public hecmw_matrix_reorder_renum_item(N, perm, indexXp, itemXp)
subroutine, public hecmw_matrix_reorder_profile(N, perm, iperm, indexL, indexU, itemL, itemU, indexLp, indexUp, itemLp, itemUp)
subroutine, public hecmw_precond_ssor_11_setup(hecMAT)
subroutine, public hecmw_precond_ssor_11_clear(hecMAT)
subroutine, public hecmw_precond_ssor_11_apply(ZP)
subroutine, public hecmw_tuning_fx_calc_sector_cache(N, NDOF, sectorCacheSize0, sectorCacheSize1)
I/O and Utility.
Definition: hecmw_util_f.F90:7
integer(kind=4), parameter kreal
integer(kind=kint) function hecmw_comm_get_rank()
subroutine, public hecmw_matrix_ordering_rcm(N, indexL, itemL, indexU, itemU, perm, iperm)
subroutine, public hecmw_matrix_ordering_mc(N, indexL, itemL, indexU, itemU, perm_cur, ncolor_in, ncolor_out, COLORindex, perm, iperm)