FrontISTR  5.2.0
Large-scale structural analysis program with finit element method
hecmw_precond_SSOR_nn.f90
Go to the documentation of this file.
1 !-------------------------------------------------------------------------------
2 ! Copyright (c) 2019 FrontISTR Commons
3 ! This software is released under the MIT License, see LICENSE.txt
4 !-------------------------------------------------------------------------------
5 
6 !C
7 !C***
8 !C*** module hecmw_precond_SSOR_nn
9 !C***
10 !C
12  use hecmw_util
18  !$ use omp_lib
19 
20  private
21 
25 
26  integer(kind=kint) :: N
27  real(kind=kreal), pointer :: d(:) => null()
28  real(kind=kreal), pointer :: al(:) => null()
29  real(kind=kreal), pointer :: au(:) => null()
30  integer(kind=kint), pointer :: indexL(:) => null()
31  integer(kind=kint), pointer :: indexU(:) => null()
32  integer(kind=kint), pointer :: itemL(:) => null()
33  integer(kind=kint), pointer :: itemU(:) => null()
34  real(kind=kreal), pointer :: alu(:) => null()
35 
36  integer(kind=kint) :: NContact = 0
37  real(kind=kreal), pointer :: cal(:) => null()
38  real(kind=kreal), pointer :: cau(:) => null()
39  integer(kind=kint), pointer :: indexCL(:) => null()
40  integer(kind=kint), pointer :: indexCU(:) => null()
41  integer(kind=kint), pointer :: itemCL(:) => null()
42  integer(kind=kint), pointer :: itemCU(:) => null()
43 
44  integer(kind=kint) :: NColor
45  integer(kind=kint), pointer :: COLORindex(:) => null()
46  integer(kind=kint), pointer :: perm(:) => null()
47  integer(kind=kint), pointer :: iperm(:) => null()
48 
49  logical, save :: isFirst = .true.
50 
51  logical, save :: INITIALIZED = .false.
52 
53 contains
54 
55  subroutine hecmw_precond_ssor_nn_setup(hecMAT)
56  implicit none
57  type(hecmwst_matrix), intent(inout) :: hecmat
58  integer(kind=kint ) :: npl, npu, npcl, npcu
59  real (kind=kreal), allocatable :: cd(:)
60  integer(kind=kint ) :: ncolor_in
61  real (kind=kreal) :: sigma_diag
62  real (kind=kreal) :: alutmp(hecmat%NDOF,hecmat%NDOF), pw(hecmat%NDOF)
63  integer(kind=kint ) :: ii, i, j, k, ndof, ndof2
64  integer(kind=kint ) :: nthreads = 1
65  integer(kind=kint ), allocatable :: perm_tmp(:)
66  !real (kind=kreal) :: t0
67 
68  !t0 = hecmw_Wtime()
69  !write(*,*) 'DEBUG: SSOR setup start', hecmw_Wtime()-t0
70 
71  if (initialized) then
72  if (hecmat%Iarray(98) == 1) then ! need symbolic and numerical setup
73  call hecmw_precond_ssor_nn_clear(hecmat)
74  else if (hecmat%Iarray(97) == 1) then ! need numerical setup only
75  call hecmw_precond_ssor_nn_clear(hecmat) ! TEMPORARY
76  else
77  return
78  endif
79  endif
80 
81  !$ nthreads = omp_get_max_threads()
82 
83  n = hecmat%N
84  ndof=hecmat%NDOF
85  ndof2=ndof*ndof
86 
87  ncolor_in = hecmw_mat_get_ncolor_in(hecmat)
88  sigma_diag = hecmw_mat_get_sigma_diag(hecmat)
89  ncontact = hecmat%cmat%n_val
90 
91  if (ncontact.gt.0) then
92  call hecmw_cmat_lu( hecmat )
93  endif
94 
95  if (nthreads == 1) then
96  ncolor = 1
97  allocate(colorindex(0:1), perm(n), iperm(n))
98  colorindex(0) = 0
99  colorindex(1) = n
100  do i=1,n
101  perm(i) = i
102  iperm(i) = i
103  end do
104  else
105  allocate(colorindex(0:n), perm_tmp(n), perm(n), iperm(n))
106  call hecmw_matrix_ordering_rcm(n, hecmat%indexL, hecmat%itemL, &
107  hecmat%indexU, hecmat%itemU, perm_tmp, iperm)
108  !write(*,*) 'DEBUG: RCM ordering done', hecmw_Wtime()-t0
109  call hecmw_matrix_ordering_mc(n, hecmat%indexL, hecmat%itemL, &
110  hecmat%indexU, hecmat%itemU, perm_tmp, &
111  ncolor_in, ncolor, colorindex, perm, iperm)
112  !write(*,*) 'DEBUG: MC ordering done', hecmw_Wtime()-t0
113  deallocate(perm_tmp)
114 
115  !call write_debug_info
116  endif
117 
118  npl = hecmat%indexL(n)
119  npu = hecmat%indexU(n)
120  allocate(indexl(0:n), indexu(0:n), iteml(npl), itemu(npu))
121  call hecmw_matrix_reorder_profile(n, perm, iperm, &
122  hecmat%indexL, hecmat%indexU, hecmat%itemL, hecmat%itemU, &
123  indexl, indexu, iteml, itemu)
124  !write(*,*) 'DEBUG: reordering profile done', hecmw_Wtime()-t0
125 
126  !call check_ordering
127 
128  allocate(d(ndof2*n), al(ndof2*npl), au(ndof2*npu))
129  call hecmw_matrix_reorder_values(n, ndof, perm, iperm, &
130  hecmat%indexL, hecmat%indexU, hecmat%itemL, hecmat%itemU, &
131  hecmat%AL, hecmat%AU, hecmat%D, &
132  indexl, indexu, iteml, itemu, al, au, d)
133  !write(*,*) 'DEBUG: reordering values done', hecmw_Wtime()-t0
134 
135  call hecmw_matrix_reorder_renum_item(n, perm, indexl, iteml)
136  call hecmw_matrix_reorder_renum_item(n, perm, indexu, itemu)
137 
138  if (ncontact.gt.0) then
139  npcl = hecmat%indexCL(n)
140  npcu = hecmat%indexCU(n)
141  allocate(indexcl(0:n), indexcu(0:n), itemcl(npcl), itemcu(npcu))
142  call hecmw_matrix_reorder_profile(n, perm, iperm, &
143  hecmat%indexCL, hecmat%indexCU, hecmat%itemCL, hecmat%itemCU, &
144  indexcl, indexcu, itemcl, itemcu)
145 
146  allocate(cd(ndof2*n), cal(ndof2*npcl), cau(ndof2*npcu))
147  call hecmw_matrix_reorder_values(n, ndof, perm, iperm, &
148  hecmat%indexCL, hecmat%indexCU, hecmat%itemCL, hecmat%itemCU, &
149  hecmat%CAL, hecmat%CAU, hecmat%D, &
150  indexcl, indexcu, itemcl, itemcu, cal, cau, cd)
151  deallocate(cd)
152 
153  call hecmw_matrix_reorder_renum_item(n, perm, indexcl, itemcl)
154  call hecmw_matrix_reorder_renum_item(n, perm, indexcu, itemcu)
155  end if
156 
157  allocate(alu(ndof2*n))
158  alu = 0.d0
159 
160  do ii= 1, ndof2*n
161  alu(ii) = d(ii)
162  enddo
163 
164  if (ncontact.gt.0) then
165  do k= 1, hecmat%cmat%n_val
166  if (hecmat%cmat%pair(k)%i.ne.hecmat%cmat%pair(k)%j) cycle
167  ii = iperm( hecmat%cmat%pair(k)%i )
168  do i = 1, ndof
169  do j = 1, ndof
170  alu(ndof2*(ii-1)+ndof*(i-1)+j) = alu(ndof2*(ii-1)+ndof*(i-1)+j) + hecmat%cmat%pair(k)%val(i, j)
171  end do
172  end do
173  enddo
174  endif
175 
176  !$omp parallel default(none),private(ii,ALUtmp,k,i,j,PW),shared(N,NDOF,NDOF2,ALU,SIGMA_DIAG)
177  !$omp do
178  do ii= 1, n
179  do i = 1, ndof
180  do j = 1, ndof
181  alutmp(i,j) = alu(ndof2*(ii-1)+(i-1)*ndof+j)
182  if (i==j) alutmp(i,j)=alutmp(i,j)*sigma_diag
183  end do
184  end do
185  do k= 1, ndof
186  alutmp(k,k)= 1.d0/alutmp(k,k)
187  do i= k+1, ndof
188  alutmp(i,k)= alutmp(i,k) * alutmp(k,k)
189  do j= k+1, ndof
190  pw(j)= alutmp(i,j) - alutmp(i,k)*alutmp(k,j)
191  enddo
192  do j= k+1, ndof
193  alutmp(i,j)= pw(j)
194  enddo
195  enddo
196  enddo
197  do i = 1, ndof
198  do j = 1, ndof
199  alu(ndof2*(ii-1)+(i-1)*ndof+j)= alutmp(i,j)
200  end do
201  end do
202  enddo
203  !$omp end do
204  !$omp end parallel
205 
206  isfirst = .true.
207 
208  initialized = .true.
209  hecmat%Iarray(98) = 0 ! symbolic setup done
210  hecmat%Iarray(97) = 0 ! numerical setup done
211 
212  !write(*,*) 'DEBUG: SSOR setup done', hecmw_Wtime()-t0
213 
214  end subroutine hecmw_precond_ssor_nn_setup
215 
216  subroutine hecmw_precond_ssor_nn_apply(ZP, NDOF)
217  use hecmw_tuning_fx
218  implicit none
219  real(kind=kreal), intent(inout) :: zp(:)
220  integer(kind=kint) :: ic, i, iold, j, isl, iel, isu, ieu, k, ndof, ndof2, idof,jdof
221  real(kind=kreal) :: sw(ndof), x(ndof)
222 
223  ! added for turning >>>
224  integer(kind=kint), parameter :: numofblockperthread = 100
225  integer(kind=kint), save :: numofthread = 1, numofblock
226  integer(kind=kint), save, allocatable :: ictoblockindex(:)
227  integer(kind=kint), save, allocatable :: blockindextocolorindex(:)
228  integer(kind=kint), save :: sectorcachesize0, sectorcachesize1
229  integer(kind=kint) :: blockindex, elementcount, numofelement, ii
230  real(kind=kreal) :: numofelementperblock
231  integer(kind=kint) :: my_rank
232 
233  ndof2=ndof*ndof
234  if (isfirst) then
235  !$ numOfThread = omp_get_max_threads()
236  numofblock = numofthread * numofblockperthread
237  if (allocated(ictoblockindex)) deallocate(ictoblockindex)
238  if (allocated(blockindextocolorindex)) deallocate(blockindextocolorindex)
239  allocate (ictoblockindex(0:ncolor), &
240  blockindextocolorindex(0:numofblock + ncolor))
241  numofelement = n + indexl(n) + indexu(n)
242  numofelementperblock = dble(numofelement) / numofblock
243  blockindex = 0
244  ictoblockindex = -1
245  ictoblockindex(0) = 0
246  blockindextocolorindex = -1
247  blockindextocolorindex(0) = 0
248  my_rank = hecmw_comm_get_rank()
249  ! write(9000+my_rank,*) &
250  ! '# numOfElementPerBlock =', numOfElementPerBlock
251  ! write(9000+my_rank,*) &
252  ! '# ic, blockIndex, colorIndex, elementCount'
253  do ic = 1, ncolor
254  elementcount = 0
255  ii = 1
256  do i = colorindex(ic-1)+1, colorindex(ic)
257  elementcount = elementcount + 1
258  elementcount = elementcount + (indexl(i) - indexl(i-1))
259  elementcount = elementcount + (indexu(i) - indexu(i-1))
260  if (elementcount > ii * numofelementperblock &
261  .or. i == colorindex(ic)) then
262  ii = ii + 1
263  blockindex = blockindex + 1
264  blockindextocolorindex(blockindex) = i
265  ! write(9000+my_rank,*) ic, blockIndex, &
266  ! blockIndexToColorIndex(blockIndex), elementCount
267  endif
268  enddo
269  ictoblockindex(ic) = blockindex
270  enddo
271  numofblock = blockindex
272 
273  call hecmw_tuning_fx_calc_sector_cache( n, ndof, &
274  sectorcachesize0, sectorcachesize1 )
275 
276  isfirst = .false.
277  endif
278  ! <<< added for turning
279 
280  !call start_collection("loopInPrecond33")
281 
282  !OCL CACHE_SECTOR_SIZE(sectorCacheSize0,sectorCacheSize1)
283  !OCL CACHE_SUBSECTOR_ASSIGN(ZP)
284 
285  !$omp parallel default(none) &
286  !$omp&shared(NColor,indexL,itemL,indexU,itemU,AL,AU,D,ALU,perm,&
287  !$omp& NContact,indexCL,itemCL,indexCU,itemCU,CAL,CAU,&
288  !$omp& ZP,icToBlockIndex,blockIndexToColorIndex,NDOF,NDOF2) &
289  !$omp&private(SW,X,ic,i,iold,isL,ieL,isU,ieU,j,k,blockIndex,idof,jdof)
290 
291  !C-- FORWARD
292  do ic=1,ncolor
293  !$omp do schedule (static, 1)
294  do blockindex = ictoblockindex(ic-1)+1, ictoblockindex(ic)
295  do i = blockindextocolorindex(blockindex-1)+1, &
296  blockindextocolorindex(blockindex)
297  iold = perm(i)
298  do idof = 1, ndof
299  sw(idof) = zp(ndof*(iold-1)+idof)
300  end do
301  isl= indexl(i-1)+1
302  iel= indexl(i)
303  do j= isl, iel
304  k= iteml(j)
305  do idof = 1, ndof
306  x(idof) = zp(ndof*(k-1)+idof)
307  end do
308  do idof = 1, ndof
309  do jdof = 1, ndof
310  sw(idof) = sw(idof) - al(ndof2*(j-1)+ndof*(idof-1)+jdof)*x(jdof)
311  end do
312  end do
313  enddo ! j
314 
315  if (ncontact.ne.0) then
316  isl= indexcl(i-1)+1
317  iel= indexcl(i)
318  do j= isl, iel
319  k= itemcl(j)
320  do idof = 1, ndof
321  x(idof) = zp(ndof*(k-1)+idof)
322  end do
323  do idof = 1, ndof
324  do jdof = 1, ndof
325  sw(idof) = sw(idof) - cal(ndof2*(j-1)+ndof*(idof-1)+jdof)*x(jdof)
326  end do
327  end do
328  enddo ! j
329  endif
330 
331  x = sw
332  do idof = 2,ndof
333  do jdof = 1, idof-1
334  x(idof) = x(idof) - alu(ndof2*(i-1)+ndof*(idof-1)+jdof )*x(jdof)
335  end do
336  end do
337  do idof = ndof, 1, -1
338  do jdof = ndof, idof+1, -1
339  x(idof) = x(idof) - alu(ndof2*(i-1)+ndof*(idof-1)+jdof)*x(jdof)
340  end do
341  x(idof) = alu(ndof2*(i-1)+(ndof+1)*(idof-1)+1)*x(idof)
342  end do
343  zp(ndof*(iold-1)+1:ndof*(iold-1)+ndof) = x(1:ndof)
344 
345  enddo ! i
346  enddo ! blockIndex
347  !$omp end do
348  enddo ! ic
349 
350  !C-- BACKWARD
351  do ic=ncolor, 1, -1
352  !$omp do schedule (static, 1)
353  do blockindex = ictoblockindex(ic), ictoblockindex(ic-1)+1, -1
354  do i = blockindextocolorindex(blockindex), &
355  blockindextocolorindex(blockindex-1)+1, -1
356  ! do blockIndex = icToBlockIndex(ic-1)+1, icToBlockIndex(ic)
357  ! do i = blockIndexToColorIndex(blockIndex-1)+1, &
358  ! blockIndexToColorIndex(blockIndex)
359  ! do i = endPos(threadNum, ic), startPos(threadNum, ic), -1
360  sw= 0.d0
361  isu= indexu(i-1) + 1
362  ieu= indexu(i)
363  do j= ieu, isu, -1
364  k= itemu(j)
365  do idof = 1, ndof
366  x(idof) = zp(ndof*(k-1)+idof)
367  end do
368  do idof = 1, ndof
369  do jdof = 1, ndof
370  sw(idof) = sw(idof) + au(ndof2*(j-1)+ndof*(idof-1)+jdof)*x(jdof)
371  end do
372  end do
373  enddo ! j
374 
375  if (ncontact.gt.0) then
376  isu= indexcu(i-1) + 1
377  ieu= indexcu(i)
378  do j= ieu, isu, -1
379  k= itemcu(j)
380  do idof = 1, ndof
381  x(idof) = zp(ndof*(k-1)+idof)
382  end do
383  do idof = 1, ndof
384  do jdof = 1, ndof
385  sw(idof) = sw(idof) + cau(ndof2*(j-1)+ndof*(idof-1)+jdof)*x(jdof)
386  end do
387  end do
388  enddo ! j
389  endif
390 
391  x = sw
392  do idof = 2, ndof
393  do k = 1,idof-1
394  x(idof) = x(idof) - alu(ndof2*(i-1)+ndof*(idof-1)+k)*x(k)
395  end do
396  end do
397  do idof = ndof, 1, -1
398  do k = ndof, idof+1, -1
399  x(idof) = x(idof) - alu(ndof2*(i-1)+ndof*(idof-1)+k)*x(k)
400  end do
401  x(idof) = alu(ndof2*(i-1)+(ndof+1)*(idof-1)+1)*x(idof)
402  end do
403  iold = perm(i)
404  do idof = 1, ndof
405  zp(ndof*(iold-1)+idof) = zp(ndof*(iold-1)+idof) - x(idof)
406  end do
407  enddo ! i
408  enddo ! blockIndex
409  !$omp end do
410  enddo ! ic
411  !$omp end parallel
412 
413  !OCL END_CACHE_SUBSECTOR
414  !OCL END_CACHE_SECTOR_SIZE
415 
416  !call stop_collection("loopInPrecond33")
417 
418  end subroutine hecmw_precond_ssor_nn_apply
419 
420  subroutine hecmw_precond_ssor_nn_clear(hecMAT)
421  implicit none
422  type(hecmwst_matrix), intent(inout) :: hecmat
423  integer(kind=kint ) :: nthreads = 1
424  !$ nthreads = omp_get_max_threads()
425  if (associated(colorindex)) deallocate(colorindex)
426  if (associated(perm)) deallocate(perm)
427  if (associated(iperm)) deallocate(iperm)
428  if (associated(alu)) deallocate(alu)
429  if (nthreads >= 1) then
430  if (associated(d)) deallocate(d)
431  if (associated(al)) deallocate(al)
432  if (associated(au)) deallocate(au)
433  if (associated(indexl)) deallocate(indexl)
434  if (associated(indexu)) deallocate(indexu)
435  if (associated(iteml)) deallocate(iteml)
436  if (associated(itemu)) deallocate(itemu)
437  if (ncontact.ne.0) then
438  if (associated(cal)) deallocate(cal)
439  if (associated(cau)) deallocate(cau)
440  if (associated(indexcl)) deallocate(indexcl)
441  if (associated(indexcu)) deallocate(indexcu)
442  if (associated(itemcl)) deallocate(itemcl)
443  if (associated(itemcu)) deallocate(itemcu)
444  end if
445  end if
446  nullify(colorindex)
447  nullify(perm)
448  nullify(iperm)
449  nullify(alu)
450  nullify(d)
451  nullify(al)
452  nullify(au)
453  nullify(indexl)
454  nullify(indexu)
455  nullify(iteml)
456  nullify(itemu)
457  if (ncontact.ne.0) then
458  nullify(cal)
459  nullify(cau)
460  nullify(indexcl)
461  nullify(indexcu)
462  nullify(itemcl)
463  nullify(itemcu)
464  call hecmw_cmat_lu_free( hecmat )
465  endif
466  ncontact = 0
467  initialized = .false.
468  end subroutine hecmw_precond_ssor_nn_clear
469 
470  subroutine write_debug_info
471  implicit none
472  integer(kind=kint) :: my_rank, ic, in
473  my_rank = hecmw_comm_get_rank()
474  !--------------------> debug: shizawa
475  if (my_rank.eq.0) then
476  write(*,*) 'DEBUG: Output fort.19000+myrank and fort.29000+myrank for coloring information'
477  endif
478  write(19000+my_rank,'(a)') '#NCOLORTot'
479  write(19000+my_rank,*) ncolor
480  write(19000+my_rank,'(a)') '#ic COLORindex(ic-1)+1 COLORindex(ic)'
481  do ic=1,ncolor
482  write(19000+my_rank,*) ic, colorindex(ic-1)+1,colorindex(ic)
483  enddo ! ic
484  write(29000+my_rank,'(a)') '#n_node'
485  write(29000+my_rank,*) n
486  write(29000+my_rank,'(a)') '#in OLDtoNEW(in) NEWtoOLD(in)'
487  do in=1,n
488  write(29000+my_rank,*) in, iperm(in), perm(in)
489  if (perm(iperm(in)) .ne. in) then
490  write(29000+my_rank,*) '** WARNING **: NEWtoOLD and OLDtoNEW: ',in
491  endif
492  enddo
493  end subroutine write_debug_info
494 
495  subroutine check_ordering
496  implicit none
497  integer(kind=kint) :: ic, i, j, k
498  integer(kind=kint), allocatable :: iicolor(:)
499  ! check color dependence of neighbouring nodes
500  if (ncolor.gt.1) then
501  allocate(iicolor(n))
502  do ic=1,ncolor
503  do i= colorindex(ic-1)+1, colorindex(ic)
504  iicolor(i) = ic
505  enddo ! i
506  enddo ! ic
507  ! FORWARD: L-part
508  do ic=1,ncolor
509  do i= colorindex(ic-1)+1, colorindex(ic)
510  do j= indexl(i-1)+1, indexl(i)
511  k= iteml(j)
512  if (iicolor(i).eq.iicolor(k)) then
513  write(*,*) .eq.'** ERROR **: L-part: iicolor(i)iicolor(k)',i,k,iicolor(i)
514  endif
515  enddo ! j
516  enddo ! i
517  enddo ! ic
518  ! BACKWARD: U-part
519  do ic=ncolor, 1, -1
520  do i= colorindex(ic), colorindex(ic-1)+1, -1
521  do j= indexu(i-1)+1, indexu(i)
522  k= itemu(j)
523  if (iicolor(i).eq.iicolor(k)) then
524  write(*,*) .eq.'** ERROR **: U-part: iicolor(i)iicolor(k)',i,k,iicolor(i)
525  endif
526  enddo ! j
527  enddo ! i
528  enddo ! ic
529  deallocate(iicolor)
530  endif ! if (NColor.gt.1)
531  !--------------------< debug: shizawa
532  end subroutine check_ordering
533 
534 end module hecmw_precond_ssor_nn
subroutine, public hecmw_cmat_lu(hecMAT)
subroutine, public hecmw_cmat_lu_free(hecMAT)
real(kind=kreal) function, public hecmw_mat_get_sigma_diag(hecMAT)
integer(kind=kint) function, public hecmw_mat_get_ncolor_in(hecMAT)
subroutine, public hecmw_matrix_reorder_values(N, NDOF, perm, iperm, indexL, indexU, itemL, itemU, AL, AU, D, indexLp, indexUp, itemLp, itemUp, ALp, AUp, Dp)
subroutine, public hecmw_matrix_reorder_renum_item(N, perm, indexXp, itemXp)
subroutine, public hecmw_matrix_reorder_profile(N, perm, iperm, indexL, indexU, itemL, itemU, indexLp, indexUp, itemLp, itemUp)
subroutine, public hecmw_precond_ssor_nn_setup(hecMAT)
subroutine, public hecmw_precond_ssor_nn_clear(hecMAT)
subroutine, public hecmw_precond_ssor_nn_apply(ZP, NDOF)
subroutine, public hecmw_tuning_fx_calc_sector_cache(N, NDOF, sectorCacheSize0, sectorCacheSize1)
I/O and Utility.
Definition: hecmw_util_f.F90:7
integer(kind=4), parameter kreal
integer(kind=kint) function hecmw_comm_get_rank()
subroutine, public hecmw_matrix_ordering_rcm(N, indexL, itemL, indexU, itemU, perm, iperm)
subroutine, public hecmw_matrix_ordering_mc(N, indexL, itemL, indexU, itemU, perm_cur, ncolor_in, ncolor_out, COLORindex, perm, iperm)