26 integer(kind=kint) :: N
27 real(kind=
kreal),
pointer :: d(:) => null()
28 real(kind=
kreal),
pointer :: al(:) => null()
29 real(kind=
kreal),
pointer :: au(:) => null()
30 integer(kind=kint),
pointer :: indexL(:) => null()
31 integer(kind=kint),
pointer :: indexU(:) => null()
32 integer(kind=kint),
pointer :: itemL(:) => null()
33 integer(kind=kint),
pointer :: itemU(:) => null()
34 real(kind=
kreal),
pointer :: alu(:) => null()
36 integer(kind=kint) :: NContact = 0
37 real(kind=
kreal),
pointer :: cal(:) => null()
38 real(kind=
kreal),
pointer :: cau(:) => null()
39 integer(kind=kint),
pointer :: indexCL(:) => null()
40 integer(kind=kint),
pointer :: indexCU(:) => null()
41 integer(kind=kint),
pointer :: itemCL(:) => null()
42 integer(kind=kint),
pointer :: itemCU(:) => null()
44 integer(kind=kint) :: NColor
45 integer(kind=kint),
pointer :: COLORindex(:) => null()
46 integer(kind=kint),
pointer :: perm(:) => null()
47 integer(kind=kint),
pointer :: iperm(:) => null()
49 logical,
save :: isFirst = .true.
51 logical,
save :: INITIALIZED = .false.
58 integer(kind=kint ) :: npl, npu, npcl, npcu
59 real (kind=
kreal),
allocatable :: cd(:)
60 integer(kind=kint ) :: ncolor_in
61 real (kind=
kreal) :: sigma_diag
62 real (kind=
kreal) :: alutmp(hecmat%NDOF,hecmat%NDOF), pw(hecmat%NDOF)
63 integer(kind=kint ) :: ii, i, j, k, ndof, ndof2
64 integer(kind=kint ) :: nthreads = 1
65 integer(kind=kint ),
allocatable :: perm_tmp(:)
72 if (hecmat%Iarray(98) == 1)
then
74 else if (hecmat%Iarray(97) == 1)
then
89 ncontact = hecmat%cmat%n_val
91 if (ncontact.gt.0)
then
95 if (nthreads == 1)
then
97 allocate(colorindex(0:1), perm(n), iperm(n))
105 allocate(colorindex(0:n), perm_tmp(n), perm(n), iperm(n))
107 hecmat%indexU, hecmat%itemU, perm_tmp, iperm)
110 hecmat%indexU, hecmat%itemU, perm_tmp, &
111 ncolor_in, ncolor, colorindex, perm, iperm)
118 npl = hecmat%indexL(n)
119 npu = hecmat%indexU(n)
120 allocate(indexl(0:n), indexu(0:n), iteml(npl), itemu(npu))
122 hecmat%indexL, hecmat%indexU, hecmat%itemL, hecmat%itemU, &
123 indexl, indexu, iteml, itemu)
128 allocate(d(ndof2*n), al(ndof2*npl), au(ndof2*npu))
130 hecmat%indexL, hecmat%indexU, hecmat%itemL, hecmat%itemU, &
131 hecmat%AL, hecmat%AU, hecmat%D, &
132 indexl, indexu, iteml, itemu, al, au, d)
138 if (ncontact.gt.0)
then
139 npcl = hecmat%indexCL(n)
140 npcu = hecmat%indexCU(n)
141 allocate(indexcl(0:n), indexcu(0:n), itemcl(npcl), itemcu(npcu))
143 hecmat%indexCL, hecmat%indexCU, hecmat%itemCL, hecmat%itemCU, &
144 indexcl, indexcu, itemcl, itemcu)
146 allocate(cd(ndof2*n), cal(ndof2*npcl), cau(ndof2*npcu))
148 hecmat%indexCL, hecmat%indexCU, hecmat%itemCL, hecmat%itemCU, &
149 hecmat%CAL, hecmat%CAU, hecmat%D, &
150 indexcl, indexcu, itemcl, itemcu, cal, cau, cd)
157 allocate(alu(ndof2*n))
164 if (ncontact.gt.0)
then
165 do k= 1, hecmat%cmat%n_val
166 if (hecmat%cmat%pair(k)%i.ne.hecmat%cmat%pair(k)%j) cycle
167 ii = iperm( hecmat%cmat%pair(k)%i )
170 alu(ndof2*(ii-1)+ndof*(i-1)+j) = alu(ndof2*(ii-1)+ndof*(i-1)+j) + hecmat%cmat%pair(k)%val(i, j)
181 alutmp(i,j) = alu(ndof2*(ii-1)+(i-1)*ndof+j)
182 if (i==j) alutmp(i,j)=alutmp(i,j)*sigma_diag
186 alutmp(k,k)= 1.d0/alutmp(k,k)
188 alutmp(i,k)= alutmp(i,k) * alutmp(k,k)
190 pw(j)= alutmp(i,j) - alutmp(i,k)*alutmp(k,j)
199 alu(ndof2*(ii-1)+(i-1)*ndof+j)= alutmp(i,j)
209 hecmat%Iarray(98) = 0
210 hecmat%Iarray(97) = 0
219 real(kind=
kreal),
intent(inout) :: zp(:)
220 integer(kind=kint) :: ic, i, iold, j, isl, iel, isu, ieu, k, ndof, ndof2, idof,jdof
221 real(kind=
kreal) :: sw(ndof), x(ndof)
224 integer(kind=kint),
parameter :: numofblockperthread = 100
225 integer(kind=kint),
save :: numofthread = 1, numofblock
226 integer(kind=kint),
save,
allocatable :: ictoblockindex(:)
227 integer(kind=kint),
save,
allocatable :: blockindextocolorindex(:)
228 integer(kind=kint),
save :: sectorcachesize0, sectorcachesize1
229 integer(kind=kint) :: blockindex, elementcount, numofelement, ii
230 real(kind=
kreal) :: numofelementperblock
231 integer(kind=kint) :: my_rank
236 numofblock = numofthread * numofblockperthread
237 if (
allocated(ictoblockindex))
deallocate(ictoblockindex)
238 if (
allocated(blockindextocolorindex))
deallocate(blockindextocolorindex)
239 allocate (ictoblockindex(0:ncolor), &
240 blockindextocolorindex(0:numofblock + ncolor))
241 numofelement = n + indexl(n) + indexu(n)
242 numofelementperblock = dble(numofelement) / numofblock
245 ictoblockindex(0) = 0
246 blockindextocolorindex = -1
247 blockindextocolorindex(0) = 0
256 do i = colorindex(ic-1)+1, colorindex(ic)
257 elementcount = elementcount + 1
258 elementcount = elementcount + (indexl(i) - indexl(i-1))
259 elementcount = elementcount + (indexu(i) - indexu(i-1))
260 if (elementcount > ii * numofelementperblock &
261 .or. i == colorindex(ic))
then
263 blockindex = blockindex + 1
264 blockindextocolorindex(blockindex) = i
269 ictoblockindex(ic) = blockindex
271 numofblock = blockindex
274 sectorcachesize0, sectorcachesize1 )
294 do blockindex = ictoblockindex(ic-1)+1, ictoblockindex(ic)
295 do i = blockindextocolorindex(blockindex-1)+1, &
296 blockindextocolorindex(blockindex)
299 sw(idof) = zp(ndof*(iold-1)+idof)
306 x(idof) = zp(ndof*(k-1)+idof)
310 sw(idof) = sw(idof) - al(ndof2*(j-1)+ndof*(idof-1)+jdof)*x(jdof)
315 if (ncontact.ne.0)
then
321 x(idof) = zp(ndof*(k-1)+idof)
325 sw(idof) = sw(idof) - cal(ndof2*(j-1)+ndof*(idof-1)+jdof)*x(jdof)
334 x(idof) = x(idof) - alu(ndof2*(i-1)+ndof*(idof-1)+jdof )*x(jdof)
337 do idof = ndof, 1, -1
338 do jdof = ndof, idof+1, -1
339 x(idof) = x(idof) - alu(ndof2*(i-1)+ndof*(idof-1)+jdof)*x(jdof)
341 x(idof) = alu(ndof2*(i-1)+(ndof+1)*(idof-1)+1)*x(idof)
343 zp(ndof*(iold-1)+1:ndof*(iold-1)+ndof) = x(1:ndof)
353 do blockindex = ictoblockindex(ic), ictoblockindex(ic-1)+1, -1
354 do i = blockindextocolorindex(blockindex), &
355 blockindextocolorindex(blockindex-1)+1, -1
366 x(idof) = zp(ndof*(k-1)+idof)
370 sw(idof) = sw(idof) + au(ndof2*(j-1)+ndof*(idof-1)+jdof)*x(jdof)
375 if (ncontact.gt.0)
then
376 isu= indexcu(i-1) + 1
381 x(idof) = zp(ndof*(k-1)+idof)
385 sw(idof) = sw(idof) + cau(ndof2*(j-1)+ndof*(idof-1)+jdof)*x(jdof)
394 x(idof) = x(idof) - alu(ndof2*(i-1)+ndof*(idof-1)+k)*x(k)
397 do idof = ndof, 1, -1
398 do k = ndof, idof+1, -1
399 x(idof) = x(idof) - alu(ndof2*(i-1)+ndof*(idof-1)+k)*x(k)
401 x(idof) = alu(ndof2*(i-1)+(ndof+1)*(idof-1)+1)*x(idof)
405 zp(ndof*(iold-1)+idof) = zp(ndof*(iold-1)+idof) - x(idof)
423 integer(kind=kint ) :: nthreads = 1
425 if (
associated(colorindex))
deallocate(colorindex)
426 if (
associated(perm))
deallocate(perm)
427 if (
associated(iperm))
deallocate(iperm)
428 if (
associated(alu))
deallocate(alu)
429 if (nthreads >= 1)
then
430 if (
associated(d))
deallocate(d)
431 if (
associated(al))
deallocate(al)
432 if (
associated(au))
deallocate(au)
433 if (
associated(indexl))
deallocate(indexl)
434 if (
associated(indexu))
deallocate(indexu)
435 if (
associated(iteml))
deallocate(iteml)
436 if (
associated(itemu))
deallocate(itemu)
437 if (ncontact.ne.0)
then
438 if (
associated(cal))
deallocate(cal)
439 if (
associated(cau))
deallocate(cau)
440 if (
associated(indexcl))
deallocate(indexcl)
441 if (
associated(indexcu))
deallocate(indexcu)
442 if (
associated(itemcl))
deallocate(itemcl)
443 if (
associated(itemcu))
deallocate(itemcu)
457 if (ncontact.ne.0)
then
467 initialized = .false.
470 subroutine write_debug_info
472 integer(kind=kint) :: my_rank, ic, in
475 if (my_rank.eq.0)
then
476 write(*,*)
'DEBUG: Output fort.19000+myrank and fort.29000+myrank for coloring information'
478 write(19000+my_rank,
'(a)')
'#NCOLORTot'
479 write(19000+my_rank,*) ncolor
480 write(19000+my_rank,
'(a)')
'#ic COLORindex(ic-1)+1 COLORindex(ic)'
482 write(19000+my_rank,*) ic, colorindex(ic-1)+1,colorindex(ic)
484 write(29000+my_rank,
'(a)')
'#n_node'
485 write(29000+my_rank,*) n
486 write(29000+my_rank,
'(a)')
'#in OLDtoNEW(in) NEWtoOLD(in)'
488 write(29000+my_rank,*) in, iperm(in), perm(in)
489 if (perm(iperm(in)) .ne. in)
then
490 write(29000+my_rank,*)
'** WARNING **: NEWtoOLD and OLDtoNEW: ',in
493 end subroutine write_debug_info
495 subroutine check_ordering
497 integer(kind=kint) :: ic, i, j, k
498 integer(kind=kint),
allocatable :: iicolor(:)
500 if (ncolor.gt.1)
then
503 do i= colorindex(ic-1)+1, colorindex(ic)
509 do i= colorindex(ic-1)+1, colorindex(ic)
510 do j= indexl(i-1)+1, indexl(i)
512 if (iicolor(i).eq.iicolor(k))
then
513 write(*,*) .eq.
'** ERROR **: L-part: iicolor(i)iicolor(k)',i,k,iicolor(i)
520 do i= colorindex(ic), colorindex(ic-1)+1, -1
521 do j= indexu(i-1)+1, indexu(i)
523 if (iicolor(i).eq.iicolor(k))
then
524 write(*,*) .eq.
'** ERROR **: U-part: iicolor(i)iicolor(k)',i,k,iicolor(i)
532 end subroutine check_ordering
real(kind=kreal) function, public hecmw_mat_get_sigma_diag(hecMAT)
integer(kind=kint) function, public hecmw_mat_get_ncolor_in(hecMAT)
subroutine, public hecmw_matrix_reorder_values(N, NDOF, perm, iperm, indexL, indexU, itemL, itemU, AL, AU, D, indexLp, indexUp, itemLp, itemUp, ALp, AUp, Dp)
subroutine, public hecmw_matrix_reorder_renum_item(N, perm, indexXp, itemXp)
subroutine, public hecmw_matrix_reorder_profile(N, perm, iperm, indexL, indexU, itemL, itemU, indexLp, indexUp, itemLp, itemUp)
subroutine, public hecmw_precond_ssor_nn_setup(hecMAT)
subroutine, public hecmw_precond_ssor_nn_clear(hecMAT)
subroutine, public hecmw_precond_ssor_nn_apply(ZP, NDOF)
subroutine, public hecmw_tuning_fx_calc_sector_cache(N, NDOF, sectorCacheSize0, sectorCacheSize1)
integer(kind=4), parameter kreal
integer(kind=kint) function hecmw_comm_get_rank()
subroutine, public hecmw_matrix_ordering_rcm(N, indexL, itemL, indexU, itemU, perm, iperm)
subroutine, public hecmw_matrix_ordering_mc(N, indexL, itemL, indexU, itemU, perm_cur, ncolor_in, ncolor_out, COLORindex, perm, iperm)