FrontISTR  5.2.0
Large-scale structural analysis program with finit element method
hecmw_local_matrix.f90
Go to the documentation of this file.
1 !-------------------------------------------------------------------------------
2 ! Copyright (c) 2019 FrontISTR Commons
3 ! This software is released under the MIT License, see LICENSE.txt
4 !-------------------------------------------------------------------------------
5 
7  use hecmw_util
9 
10  private
11  public :: hecmwst_local_matrix
12  public :: hecmw_localmat_write
13  public :: hecmw_localmat_blocking
14  public :: hecmw_localmat_free
15  public :: hecmw_localmat_mulvec
16  public :: hecmw_trimatmul_ttkt
17  public :: hecmw_trimatmul_ttkt_mpc
18  public :: hecmw_localmat_transpose
19  public :: hecmw_localmat_assemble
20  public :: hecmw_localmat_add
23  public :: hecmw_localmat_multmat
26 
27  type hecmwst_local_matrix
28  integer :: nr, nc, nnz, ndof
29  integer(kind=kint), pointer :: index(:)
30  integer(kind=kint), pointer :: item(:)
31  real(kind=kreal), pointer :: a(:)
32  end type hecmwst_local_matrix
33 
34  integer(kind=kint), parameter :: cNCOL_ITEM = 2
35  integer(kind=kint), parameter :: cLID = 1
36  integer(kind=kint), parameter :: cRANK = 2
37  integer(kind=kint), parameter :: cGID = 3
38 
39  integer(kind=kint), parameter :: DEBUG = 0
40  integer(kind=kint), parameter :: TIMER = 0
41 
42 contains
43 
44  subroutine hecmw_localmat_write(Tmat,iunit)
45  implicit none
46  type (hecmwst_local_matrix), intent(in) :: tmat
47  integer(kind=kint), intent(in) :: iunit
48  integer(kind=kint) :: nr, nc, nnz, ndof, ndof2, i, js, je, j, jj
49  nr=tmat%nr
50  nc=tmat%nc
51  nnz=tmat%nnz
52  ndof=tmat%ndof
53  ndof2=ndof*ndof
54  write(iunit,*) 'nr, nc, nnz, ndof', nr, nc, nnz, ndof
55  write(iunit,*) 'i, j, A'
56  do i=1,nr
57  js=tmat%index(i-1)+1
58  je=tmat%index(i)
59  do j=js,je
60  jj=tmat%item(j)
61  if (ndof==1) then
62  write(iunit,*) i, jj, tmat%A(j)
63  else
64  write(iunit,*) i, jj
65  write(iunit,*) tmat%A((j-1)*ndof2+1:j*ndof2)
66  endif
67  enddo
68  enddo
69  end subroutine hecmw_localmat_write
70 
71  subroutine hecmw_localmat_write_ij(Tmat,iunit)
72  implicit none
73  type (hecmwst_local_matrix), intent(in) :: tmat
74  integer(kind=kint), intent(in) :: iunit
75  integer(kind=kint) :: nr, nc, nnz, ndof, i, js, je, j, jj
76  nr=tmat%nr
77  nc=tmat%nc
78  nnz=tmat%nnz
79  ndof=tmat%ndof
80  write(iunit,*) 'nr, nc, nnz, ndof', nr, nc, nnz, ndof
81  write(iunit,*) 'i, j'
82  do i=1,nr
83  js=tmat%index(i-1)+1
84  je=tmat%index(i)
85  do j=js,je
86  jj=tmat%item(j)
87  write(iunit,*) i, jj
88  enddo
89  enddo
90  end subroutine hecmw_localmat_write_ij
91 
92  subroutine hecmw_localmat_blocking(Tmat, ndof, BTmat)
93  implicit none
94  type (hecmwst_local_matrix), intent(in) :: tmat
95  integer, intent(in) :: ndof
96  type (hecmwst_local_matrix), intent(out) :: btmat
97  integer, allocatable :: iw(:)
98  integer :: ndof2, i, icnt, idof, idx, ls, le, l, j, jb, k, lb0, jdof, ks, ke
99  ndof2=ndof*ndof
100 
101  if (mod(tmat%nr, ndof) /= 0 .or. mod(tmat%nc, ndof) /= 0) then
102  write(0,*) tmat%nr, tmat%nc, ndof
103  stop 'ERROR: blocking_Tmat failed'
104  endif
105  btmat%nr=tmat%nr/ndof
106  btmat%nc=tmat%nc/ndof
107  btmat%ndof=ndof
108 
109  allocate(iw(btmat%nc))
110  allocate(btmat%index(0:btmat%nr))
111 
112  btmat%index(0)=0
113  do i=1,btmat%nr
114  icnt=0
115  do idof=1,ndof
116  idx=(i-1)*ndof+idof
117  ls=tmat%index(idx-1)+1
118  le=tmat%index(idx)
119  lcol: do l=ls,le
120  j=tmat%item(l)
121  jb=(j-1)/ndof+1
122  do k=1,icnt
123  if (iw(k)==jb) cycle lcol
124  enddo
125  icnt=icnt+1
126  iw(icnt)=jb
127  enddo lcol
128  enddo
129  btmat%index(i)=btmat%index(i-1)+icnt
130  enddo
131 
132  btmat%nnz=btmat%index(btmat%nr)
133  allocate(btmat%item(btmat%nnz))
134  allocate(btmat%A(btmat%nnz*ndof2))
135  btmat%A=0.d0
136 
137  do i=1,btmat%nr
138  icnt=0
139  do idof=1,ndof
140  idx=(i-1)*ndof+idof
141  ls=tmat%index(idx-1)+1
142  le=tmat%index(idx)
143  lcol2: do l=ls,le
144  j=tmat%item(l)
145  jb=(j-1)/ndof+1
146  do k=1,icnt
147  if (iw(k)==jb) cycle lcol2
148  enddo
149  icnt=icnt+1
150  iw(icnt)=jb
151  enddo lcol2
152  enddo
153  ! if (icnt /= BTmat%index(i)-BTmat%index(i-1)) stop 'ERROR: blocking Tmat'
154  ! ! call qsort(iw, 1, icnt)
155  lb0=btmat%index(i-1)
156  do k=1,icnt
157  btmat%item(lb0+k)=iw(k)
158  enddo
159  do idof=1,ndof
160  idx=(i-1)*ndof+idof
161  ls=tmat%index(idx-1)+1
162  le=tmat%index(idx)
163  lcol3: do l=ls,le
164  j=tmat%item(l)
165  jb=(j-1)/ndof+1
166  jdof=mod((j-1), ndof)+1
167  ks=btmat%index(i-1)+1
168  ke=btmat%index(i)
169  do k=ks,ke
170  if (btmat%item(k)==jb) then
171  btmat%A((k-1)*ndof2+(idof-1)*ndof+jdof)=tmat%A(l)
172  cycle lcol3
173  endif
174  enddo
175  stop 'ERROR: something wrong in blocking Tmat'
176  enddo lcol3
177  enddo
178  enddo
179  end subroutine hecmw_localmat_blocking
180 
181  subroutine hecmw_localmat_free(Tmat)
182  implicit none
183  type (hecmwst_local_matrix), intent(inout) :: tmat
184  deallocate(tmat%index)
185  if (associated(tmat%item)) deallocate(tmat%item)
186  if (associated(tmat%A)) deallocate(tmat%A)
187  tmat%nr=0
188  tmat%nc=0
189  tmat%nnz=0
190  tmat%ndof=0
191  end subroutine hecmw_localmat_free
192 
193  subroutine hecmw_trimatmul_ttkt(hecMESH, BTtmat, hecMAT, BTmat, &
194  iwS, num_lagrange, hecTKT)
196  implicit none
197  type (hecmwst_local_mesh), intent(inout) :: hecmesh
198  type (hecmwst_local_matrix), intent(inout) :: bttmat, btmat
199  type (hecmwst_matrix), intent(in) :: hecmat
200  integer(kind=kint), intent(in) :: iws(:)
201  integer(kind=kint), intent(in) :: num_lagrange
202  type (hecmwst_matrix), intent(inout) :: hectkt
203  if (hecmesh%n_neighbor_pe == 0) then
204  call hecmw_trimatmul_ttkt_serial(hecmesh, bttmat, hecmat, btmat, &
205  iws, num_lagrange, hectkt)
206  else
207  call hecmw_trimatmul_ttkt_parallel(hecmesh, bttmat, hecmat, btmat, &
208  iws, num_lagrange, hectkt)
209  endif
210  end subroutine hecmw_trimatmul_ttkt
211 
212  subroutine hecmw_trimatmul_ttkt_serial(hecMESH, BTtmat, hecMAT, BTmat, &
213  iwS, num_lagrange, hecTKT)
215  implicit none
216  type (hecmwST_local_mesh), intent(in) :: hecMESH
217  type (hecmwST_local_matrix), intent(in) :: BTtmat, BTmat
218  type (hecmwST_matrix), intent(in) :: hecMAT
219  integer(kind=kint), intent(in) :: iwS(:)
220  integer(kind=kint), intent(in) :: num_lagrange
221  type (hecmwST_matrix), intent(inout) :: hecTKT
222  type (hecmwST_local_matrix) :: BTtKT
223  real(kind=kreal) :: num
224 
225  ! perform three matrices multiplication for elimination
226  call trimatmul_ttkt(bttmat, hecmat, btmat, bttkt)
227  !write(700+hecmw_comm_get_rank(),*) 'DEBUG: BTtKT(MPC)'
228  !call hecmw_localmat_write(BTtKT, 700+hecmw_comm_get_rank())
229 
230  ! place small numbers where the DOF is eliminated
231  !num = hecmw_mat_diag_max(hecMAT, hecMESH) * 1.0d-10
232  num = 1.d0
233  call place_num_on_diag(bttkt, iws, num_lagrange, num)
234  !write(700+hecmw_comm_get_rank(),*) 'DEBUG: BTtKT(MPC)'
235  !call hecmw_localmat_write(BTtKT, 700+hecmw_comm_get_rank())
236 
237  ! make_new HECMW matrix
238  call make_new_hecmat(hecmat, bttkt, hectkt)
239  call hecmw_localmat_free(bttkt)
240  end subroutine hecmw_trimatmul_ttkt_serial
241 
242  subroutine hecmw_trimatmul_ttkt_parallel(hecMESH, BTtmat, hecMAT, BTmat, &
243  iwS, num_lagrange, hecTKT)
245  implicit none
246  type (hecmwST_local_mesh), intent(inout) :: hecMESH
247  type (hecmwST_local_matrix), intent(inout) :: BTtmat, BTmat
248  type (hecmwST_matrix), intent(in) :: hecMAT
249  integer(kind=kint), intent(in) :: iwS(:)
250  integer(kind=kint), intent(in) :: num_lagrange
251  type (hecmwST_matrix), intent(inout) :: hecTKT
252  type (hecmwST_local_matrix) :: BKmat, BTtKmat, BTtKTmat
253  real(kind=kreal) :: num
254  real(kind=kreal) :: t0, t1
255 
256  ! perform three matrices multiplication for elimination
257  t0 = hecmw_wtime()
258  call hecmw_localmat_init_with_hecmat(bkmat, hecmat)
259  if (debug >= 3) then
260  write(700+hecmw_comm_get_rank(),*) 'BKmat (hecMAT)'
261  if (debug == 3) then
262  call hecmw_localmat_write_ij(bkmat, 700+hecmw_comm_get_rank())
263  else
264  call hecmw_localmat_write(bkmat, 700+hecmw_comm_get_rank())
265  endif
266  endif
267  t1 = hecmw_wtime()
268  if (timer >= 1) write(0, '(A,f10.4)') "#### hecmw_trimatmul_TtKT_parallel (1) : ",t1-t0
269 
270  t0 = hecmw_wtime()
271  call hecmw_localmat_multmat(bttmat, bkmat, hecmesh, bttkmat)
272  if (debug >= 2) write(0,*) ' DEBUG2: multiply Tt and K done'
273  if (debug >= 3) then
274  write(700+hecmw_comm_get_rank(),*) 'BTtKmat'
275  if (debug == 3) then
276  call hecmw_localmat_write_ij(bttkmat, 700+hecmw_comm_get_rank())
277  else
278  call hecmw_localmat_write(bttkmat, 700+hecmw_comm_get_rank())
279  endif
280  endif
281  call hecmw_localmat_free(bkmat)
282  t1 = hecmw_wtime()
283  if (timer >= 1) write(0, '(A,f10.4)') "#### hecmw_trimatmul_TtKT_parallel (2) : ",t1-t0
284 
285  t0 = hecmw_wtime()
286  call hecmw_localmat_multmat(bttkmat, btmat, hecmesh, bttktmat)
287  if (debug >= 2) write(0,*) ' DEBUG2: multiply TtK and T done'
288  if (debug >= 3) then
289  write(700+hecmw_comm_get_rank(),*) 'BTtKTmat'
290  if (debug == 3) then
291  call hecmw_localmat_write_ij(bttktmat, 700+hecmw_comm_get_rank())
292  else
293  call hecmw_localmat_write(bttktmat, 700+hecmw_comm_get_rank())
294  endif
295  endif
296  call hecmw_localmat_free(bttkmat)
297  t1 = hecmw_wtime()
298  if (timer >= 1) write(0, '(A,f10.4)') "#### hecmw_trimatmul_TtKT_parallel (3) : ",t1-t0
299 
300  t0 = hecmw_wtime()
301  ! place small numbers where the DOF is eliminated
302  !num = hecmw_mat_diag_max(hecMAT, hecMESH) * 1.0d-10
303  num = 1.d0
304  call place_num_on_diag(bttktmat, iws, num_lagrange, num)
305  if (debug >= 3) then
306  write(700+hecmw_comm_get_rank(),*) 'num_lagrange =', num_lagrange
307  write(700+hecmw_comm_get_rank(),*) 'iwS(1:num_lagrange)'
308  write(700+hecmw_comm_get_rank(),*) iws(1:num_lagrange)
309  write(700+hecmw_comm_get_rank(),*) 'BTtKTmat (place 1.0 on slave diag)'
310  if (debug == 3) then
311  call hecmw_localmat_write_ij(bttktmat, 700+hecmw_comm_get_rank())
312  else
313  call hecmw_localmat_write(bttktmat, 700+hecmw_comm_get_rank())
314  endif
315  endif
316  t1 = hecmw_wtime()
317  if (timer >= 1) write(0, '(A,f10.4)') "#### hecmw_trimatmul_TtKT_parallel (4) : ",t1-t0
318 
319  t0 = hecmw_wtime()
320  ! make_new HECMW matrix
321  call make_new_hecmat(hecmat, bttktmat, hectkt)
322  call hecmw_localmat_free(bttktmat)
323  t1 = hecmw_wtime()
324  if (timer >= 1) write(0, '(A,f10.4)') "#### hecmw_trimatmul_TtKT_parallel (5) : ",t1-t0
325  end subroutine hecmw_trimatmul_ttkt_parallel
326 
327  subroutine trimatmul_ttkt(BTtmat, hecMAT, BTmat, BTtKT)
328  implicit none
329  type (hecmwST_local_matrix), intent(in) :: BTtmat, BTmat
330  type (hecmwST_matrix), intent(in) :: hecMAT
331  type (hecmwST_local_matrix), intent(out) :: BTtKT
332  integer :: nr, nc, ndof, ndof2, i, icnt, js, je, j, jj, ks, ke, k, kk
333  integer :: ls, le, l, ll, m, ms, me, mm
334  integer, allocatable :: iw(:)
335  real(kind=kreal), pointer :: ttp(:), kp(:), tp(:), ttktp(:)
336  ! real(kind=kreal) :: tsym_s, tsym_e, tnum_s, tnum_e
337 
338  nr=bttmat%nr
339  nc=btmat%nc
340  ndof=bttmat%ndof
341  ndof2=ndof*ndof
342 
343  bttkt%nr=nr
344  bttkt%nc=nc
345  bttkt%ndof=ndof
346  allocate(bttkt%index(0:nr))
347 
348  ! tsym_s = hecmw_wtime()
349 
350  !$omp parallel default(none), &
351  !$omp& private(iw,i,icnt,js,je,j,jj,ks,ke,k,kk,ls,le,l,ll,m), &
352  !$omp& shared(nr,nc,BTtmat,hecMAT,BTmat,BTtKT)
353  allocate(iw(nc))
354  !$omp do
355  do i=1,nr
356  icnt=0
357  js=bttmat%index(i-1)+1
358  je=bttmat%index(i)
359  do j=js,je
360  jj=bttmat%item(j)
361  ! lower
362  ks=hecmat%indexL(jj-1)+1
363  ke=hecmat%indexL(jj)
364  do k=ks,ke
365  kk=hecmat%itemL(k)
366  ls=btmat%index(kk-1)+1
367  le=btmat%index(kk)
368  ll1: do l=ls,le
369  ll=btmat%item(l)
370  do m=1,icnt
371  if (iw(m)==ll) cycle ll1
372  enddo
373  icnt=icnt+1
374  iw(icnt)=ll
375  !if (i==1) write(0,*) 'l', icnt, jj, kk, ll
376  enddo ll1
377  enddo
378  ! diagonal
379  ls=btmat%index(jj-1)+1
380  le=btmat%index(jj)
381  ll2: do l=ls,le
382  ll=btmat%item(l)
383  do m=1,icnt
384  if (iw(m)==ll) cycle ll2
385  enddo
386  icnt=icnt+1
387  iw(icnt)=ll
388  !if (i==1) write(0,*) 'd', icnt, jj, kk, ll
389  enddo ll2
390  ! upper
391  ks=hecmat%indexU(jj-1)+1
392  ke=hecmat%indexU(jj)
393  do k=ks,ke
394  kk=hecmat%itemU(k)
395  ls=btmat%index(kk-1)+1
396  le=btmat%index(kk)
397  ll3: do l=ls,le
398  ll=btmat%item(l)
399  do m=1,icnt
400  if (iw(m)==ll) cycle ll3
401  enddo
402  icnt=icnt+1
403  iw(icnt)=ll
404  !if (i==1) write(0,*) 'u', icnt, jj, kk, ll
405  enddo ll3
406  enddo
407  enddo
408  if (icnt == 0) icnt=1
409  !if (i==1) write(0,*) iw(1:icnt)
410  bttkt%index(i)=icnt
411  enddo
412  !$omp end do
413  deallocate(iw)
414  !$omp end parallel
415 
416  ! tsym_e = hecmw_wtime()
417  ! write(0,*) 'tsym:',tsym_e-tsym_s
418 
419  bttkt%index(0)=0
420  do i=1,nr
421  bttkt%index(i)=bttkt%index(i-1)+bttkt%index(i)
422  enddo
423  !write(0,*) BTtKT%index(1:n)-BTtKT%index(0:n-1)
424 
425  bttkt%nnz=bttkt%index(nr)
426  allocate(bttkt%item(bttkt%nnz))
427  allocate(bttkt%A(bttkt%nnz*ndof2))
428  bttkt%item=0
429  bttkt%A=0.d0
430 
431  ! tnum_s = hecmw_wtime()
432 
433  !$omp parallel default(none), &
434  !$omp& private(i,icnt,js,je,j,jj,ks,ke,k,kk,ls,le,l,ll,m, &
435  !$omp& ms,me,mm,Ttp,Kp,Tp,TtKTp), &
436  !$omp& shared(nr,nc,BTtmat,hecMAT,BTmat,BTtKT,ndof,ndof2)
437  !$omp do
438  do i=1,nr
439  icnt=0
440  ms=bttkt%index(i-1)+1
441  !me=BTtKT%index(i)
442  js=bttmat%index(i-1)+1
443  je=bttmat%index(i)
444  do j=js,je
445  jj=bttmat%item(j)
446  ttp=>bttmat%A((j-1)*ndof2+1:j*ndof2)
447  ! lower
448  ks=hecmat%indexL(jj-1)+1
449  ke=hecmat%indexL(jj)
450  do k=ks,ke
451  kk=hecmat%itemL(k)
452  kp=>hecmat%AL((k-1)*ndof2+1:k*ndof2)
453  ls=btmat%index(kk-1)+1
454  le=btmat%index(kk)
455  do l=ls,le
456  ll=btmat%item(l)
457  tp=>btmat%A((l-1)*ndof2+1:l*ndof2)
458  me=ms-1+icnt
459  mm=-1
460  do m=ms,me
461  if (bttkt%item(m)==ll) mm=m
462  enddo
463  if (mm<0) then
464  icnt=icnt+1
465  mm=me+1
466  bttkt%item(mm)=ll
467  !if (i==1) write(0,*) 'l', mm, jj, kk, ll
468  endif
469  ttktp=>bttkt%A((mm-1)*ndof2+1:mm*ndof2)
470  call blk_trimatmul_add(ndof, ttp, kp, tp, ttktp)
471  enddo
472  enddo
473  ! diagonal
474  kp=>hecmat%D((jj-1)*ndof2+1:jj*ndof2)
475  ls=btmat%index(jj-1)+1
476  le=btmat%index(jj)
477  do l=ls,le
478  ll=btmat%item(l)
479  tp=>btmat%A((l-1)*ndof2+1:l*ndof2)
480  me=ms-1+icnt
481  mm=-1
482  do m=ms,me
483  if (bttkt%item(m)==ll) mm=m
484  enddo
485  if (mm<0) then
486  icnt=icnt+1
487  mm=me+1
488  bttkt%item(mm)=ll
489  !if (i==1) write(0,*) 'd', mm, jj, kk, ll
490  endif
491  ttktp=>bttkt%A((mm-1)*ndof2+1:mm*ndof2)
492  call blk_trimatmul_add(ndof, ttp, kp, tp, ttktp)
493  enddo
494  ! upper
495  ks=hecmat%indexU(jj-1)+1
496  ke=hecmat%indexU(jj)
497  do k=ks,ke
498  kk=hecmat%itemU(k)
499  kp=>hecmat%AU((k-1)*ndof2+1:k*ndof2)
500  ls=btmat%index(kk-1)+1
501  le=btmat%index(kk)
502  do l=ls,le
503  ll=btmat%item(l)
504  tp=>btmat%A((l-1)*ndof2+1:l*ndof2)
505  me=ms-1+icnt
506  mm=-1
507  do m=ms,me
508  if (bttkt%item(m)==ll) mm=m
509  enddo
510  if (mm<0) then
511  icnt=icnt+1
512  mm=me+1
513  bttkt%item(mm)=ll
514  !if (i==1) write(0,*) 'u', mm, jj, kk, ll
515  endif
516  ttktp=>bttkt%A((mm-1)*ndof2+1:mm*ndof2)
517  call blk_trimatmul_add(ndof, ttp, kp, tp, ttktp)
518  enddo
519  enddo
520  enddo
521  if (icnt == 0) then
522  icnt=1
523  bttkt%item(ms)=i
524  endif
525  ! error check!
526  !write(0,*) BTtKT%item(ms:ms-1+icnt)
527  !write(0,*) BTtKT%index(i)-BTtKT%index(i-1), icnt
528  if (ms-1+icnt /= bttkt%index(i)) stop 'ERROR: trimatmul'
529  enddo
530  !$omp end do
531  !$omp end parallel
532 
533  ! tnum_e = hecmw_wtime()
534  ! write(0,*) 'tnum:',tnum_e-tnum_s
535  end subroutine trimatmul_ttkt
536 
537  subroutine blk_trimatmul_add(ndof, A, B, C, ABC)
538  implicit none
539  integer, intent(in) :: ndof
540  real(kind=kreal), intent(in) :: a(:), b(:), c(:)
541  real(kind=kreal), intent(inout) :: abc(:)
542  real(kind=kreal), allocatable :: ab(:)
543  integer :: ndof2, i, j, k, i0, j0, ij, ik, jk
544 
545  ndof2=ndof*ndof
546  allocate(ab(ndof2))
547  ab=0.d0
548 
549  do i=1,ndof
550  i0=(i-1)*ndof
551  do j=1,ndof
552  ij=i0+j
553  j0=(j-1)*ndof
554  do k=1,ndof
555  ik=i0+k
556  jk=j0+k
557  ab(ik)=ab(ik)+a(ij)*b(jk)
558  enddo
559  enddo
560  enddo
561 
562  do i=1,ndof
563  i0=(i-1)*ndof
564  do j=1,ndof
565  ij=i0+j
566  j0=(j-1)*ndof
567  do k=1,ndof
568  ik=i0+k
569  jk=j0+k
570  abc(ik)=abc(ik)+ab(ij)*c(jk)
571  enddo
572  enddo
573  enddo
574 
575  deallocate(ab)
576  end subroutine blk_trimatmul_add
577 
578  subroutine place_num_on_diag(BTtKT, iwS, num_lagrange, num)
579  implicit none
580  type (hecmwST_local_matrix), intent(inout) :: BTtKT
581  integer(kind=kint), intent(in) :: iwS(:)
582  integer(kind=kint), intent(in) :: num_lagrange
583  real(kind=kreal), intent(in) :: num
584  integer(kind=kint) :: ndof, ndof2, ilag, i, idof, js, je, j, jj
585  integer(kind=kint) :: nmissing, k, ks, ke
586  integer(kind=kint), allocatable :: missing(:), cnt(:)
587  integer(kind=kint), pointer :: index(:), item(:)
588  real(kind=kreal), pointer :: a(:)
589 
590  ndof=bttkt%ndof
591  ndof2=ndof*ndof
592 
593  ! check if there are places
594  allocate(missing(num_lagrange))
595  nmissing = 0
596  outer1: do ilag=1,num_lagrange
597  i=(iws(ilag)-1)/ndof+1
598  idof=mod(iws(ilag)-1, ndof)+1
599  js=bttkt%index(i-1)+1
600  je=bttkt%index(i)
601  do j=js,je
602  jj=bttkt%item(j)
603  if (jj==i) cycle outer1 ! found place
604  enddo
605  ! not found
606  do k=1,nmissing
607  if (missing(k) == i) cycle outer1 ! already marked as missing
608  enddo
609  nmissing = nmissing + 1
610  missing(nmissing) = i
611  enddo outer1
612 
613  ! if not, reallocate
614  if (nmissing > 0) then
615  allocate(cnt(bttkt%nr))
616  allocate(index(0:bttkt%nr))
617  do i=1,bttkt%nr
618  cnt(i) = bttkt%index(i) - bttkt%index(i-1)
619  enddo
620  do i=1,nmissing
621  cnt(missing(i)) = cnt(missing(i)) + 1
622  enddo
623  call make_index(bttkt%nr, cnt, index)
624  allocate(item(bttkt%nnz + nmissing))
625  allocate(a(ndof2 * (bttkt%nnz + nmissing)))
626  do i=1,bttkt%nr
627  ks=index(i-1)+1
628  js=bttkt%index(i-1)+1
629  je=bttkt%index(i)
630  item(ks:ks+(je-js))=bttkt%item(js:je)
631  a(ndof2*(ks-1)+1:ndof2*(ks+(je-js)))=bttkt%A(ndof2*(js-1)+1:ndof2*je)
632  enddo
633  do i=1,nmissing
634  ke=index(missing(i))
635  item(ke)=missing(i)
636  a(ndof2*(ke-1)+1:ndof2*ke)=0.d0
637  enddo
638  deallocate(bttkt%index)
639  deallocate(bttkt%item)
640  deallocate(bttkt%A)
641  bttkt%index => index
642  bttkt%item => item
643  bttkt%A => a
644  bttkt%nnz = index(bttkt%nr)
645  deallocate(cnt)
646  endif
647  deallocate(missing)
648 
649  ! place num
650  outer: do ilag=1,num_lagrange
651  i=(iws(ilag)-1)/ndof+1
652  idof=mod(iws(ilag)-1, ndof)+1
653  js=bttkt%index(i-1)+1
654  je=bttkt%index(i)
655  do j=js,je
656  jj=bttkt%item(j)
657  if (jj==i) then
658  !write(0,*) ilag, i, idof
659  bttkt%A((j-1)*ndof2+(idof-1)*ndof+idof)=num
660  cycle outer
661  endif
662  enddo
663  enddo outer
664  end subroutine place_num_on_diag
665 
666  subroutine replace_hecmat(hecMAT, BTtKT)
667  implicit none
668  type (hecmwST_matrix), intent(inout) :: hecMAT
669  type (hecmwST_local_matrix), intent(in) :: BTtKT
670  integer :: nr, nc, ndof, ndof2, i, nl, nu, js, je, j, jj
671  integer :: ksl, ksu, k
672 
673  nr=bttkt%nr
674  nc=bttkt%nc
675  ndof=hecmat%NDOF
676  ndof2=ndof*ndof
677 
678  ! free old hecMAT
679  if (associated(hecmat%AL)) deallocate(hecmat%AL)
680  if (associated(hecmat%AU)) deallocate(hecmat%AU)
681  if (associated(hecmat%itemL)) deallocate(hecmat%itemL)
682  if (associated(hecmat%itemU)) deallocate(hecmat%itemU)
683  hecmat%indexL=0
684  hecmat%indexU=0
685 
686  ! count NPL, NPU
687  !$omp parallel default(none),private(i,nl,nu,js,je,j,jj), &
688  !$omp& shared(nr,BTtKT,hecMAT)
689  !$omp do
690  do i=1,nr
691  nl=0
692  nu=0
693  js=bttkt%index(i-1)+1
694  je=bttkt%index(i)
695  do j=js,je
696  jj=bttkt%item(j)
697  if (jj < i) then
698  nl=nl+1
699  elseif (i < jj) then
700  nu=nu+1
701  else
702  ! diagonal
703  endif
704  enddo
705  hecmat%indexL(i)=nl
706  hecmat%indexU(i)=nu
707  enddo
708  !$omp end do
709  !$omp end parallel
710 
711  hecmat%indexL(0)=0
712  hecmat%indexU(0)=0
713  do i=1,nc
714  hecmat%indexL(i)=hecmat%indexL(i-1)+hecmat%indexL(i)
715  hecmat%indexU(i)=hecmat%indexU(i-1)+hecmat%indexU(i)
716  enddo
717  hecmat%NPL=hecmat%indexL(nc)
718  hecmat%NPU=hecmat%indexU(nc)
719 
720  ! allocate new hecMAT
721  allocate(hecmat%itemL(hecmat%NPL), hecmat%itemU(hecmat%NPU))
722  allocate(hecmat%AL(hecmat%NPL*ndof2), hecmat%AU(hecmat%NPU*ndof2))
723  hecmat%itemL=0
724  hecmat%itemU=0
725  hecmat%D=0.d0
726  hecmat%AL=0.d0
727  hecmat%AU=0.d0
728 
729  ! copy from BTtKT to hecMAT
730  !$omp parallel default(none),private(i,nl,nu,js,je,ksl,ksu,j,jj,k), &
731  !$omp& shared(nr,BTtKT,hecMAT,ndof2)
732  !$omp do
733  do i=1,nr
734  nl=0
735  nu=0
736  js=bttkt%index(i-1)+1
737  je=bttkt%index(i)
738  ksl=hecmat%indexL(i-1)+1
739  ksu=hecmat%indexU(i-1)+1
740  do j=js,je
741  jj=bttkt%item(j)
742  if (jj < i) then
743  k=ksl+nl
744  hecmat%itemL(k)=jj
745  hecmat%AL((k-1)*ndof2+1:k*ndof2)=bttkt%A((j-1)*ndof2+1:j*ndof2)
746  nl=nl+1
747  elseif (i < jj) then
748  k=ksu+nu
749  hecmat%itemU(k)=jj
750  hecmat%AU((k-1)*ndof2+1:k*ndof2)=bttkt%A((j-1)*ndof2+1:j*ndof2)
751  nu=nu+1
752  else
753  hecmat%D((i-1)*ndof2+1:i*ndof2)=bttkt%A((j-1)*ndof2+1:j*ndof2)
754  endif
755  enddo
756  ! if (ksl+nl /= hecMAT%indexL(i)+1) stop 'ERROR: indexL'
757  ! if (ksu+nu /= hecMAT%indexU(i)+1) stop 'ERROR: indexU'
758  enddo
759  !$omp end do
760  !$omp end parallel
761 
762  ! do i=1,hecMAT%NPL
763  ! if (hecMAT%itemL(i) <= 0) stop 'ERROR: negative itemL'
764  ! if (hecMAT%itemL(i) > nc) stop 'ERROR: too big itemL'
765  ! enddo
766  ! do i=1,hecMAT%NPU
767  ! if (hecMAT%itemU(i) <= 0) stop 'ERROR: negative itemU'
768  ! if (hecMAT%itemU(i) > nc) stop 'ERROR: too big itemU'
769  ! enddo
770  end subroutine replace_hecmat
771 
772  subroutine make_new_hecmat(hecMAT, BTtKT, hecTKT)
773  implicit none
774  type(hecmwst_matrix), intent(in) :: hecMAT
775  type(hecmwst_local_matrix), intent(in) :: BTtKT
776  type(hecmwst_matrix), intent(inout) :: hecTKT
777  integer(kind=kint) :: nr, nc, ndof, ndof2
778 
779  nr=bttkt%nr
780  nc=bttkt%nc
781  ndof=bttkt%ndof
782  ndof2=ndof*ndof
783 
784  !write(0,*) 'DEBUG: nr, nc =',nr,nc
785 
786  ! if (nr /= nc) then
787  ! stop 'ERROR: nr /= nc'
788  ! endif
789  hectkt%N =hecmat%N
790  hectkt%NP=nc
791  hectkt%NDOF=ndof
792 
793  if (associated(hectkt%D)) deallocate(hectkt%D)
794  allocate(hectkt%D(nc*ndof2))
795 
796  if (associated(hectkt%indexL)) deallocate(hectkt%indexL)
797  if (associated(hectkt%indexU)) deallocate(hectkt%indexU)
798  allocate(hectkt%indexL(0:nc))
799  allocate(hectkt%indexU(0:nc))
800 
801  hectkt%Iarray=hecmat%Iarray
802  hectkt%Rarray=hecmat%Rarray
803 
804  call replace_hecmat(hectkt, bttkt)
805  end subroutine make_new_hecmat
806 
807  subroutine hecmw_localmat_mulvec(BTmat, V, TV)
808  implicit none
809  type (hecmwst_local_matrix), intent(in) :: btmat
810  real(kind=kreal), intent(in), target :: v(:)
811  real(kind=kreal), intent(out), target :: tv(:)
812  real(kind=kreal), pointer :: tvp(:), tp(:), vp(:)
813  integer :: nr, ndof, ndof2, i, js, je, j, jj, k, kl0, l
814  !!$ real(kind=kreal) :: vnorm
815 
816  nr=btmat%nr
817  ndof=btmat%ndof
818  ndof2=ndof*ndof
819 
820  tv=0.d0
821 
822  !!$ vnorm=0.d0
823  !!$ do i=1,nr*ndof
824  !!$ vnorm=vnorm+V(i)**2
825  !!$ enddo
826  !!$ write(0,*) 'vnorm:', sqrt(vnorm)
827 
828  !$omp parallel default(none),private(i,TVp,js,je,j,jj,Tp,Vp,k,kl0,l), &
829  !$omp& shared(nr,TV,ndof,BTmat,ndof2,V)
830  !$omp do
831  do i=1,nr
832  tvp=>tv((i-1)*ndof+1:i*ndof)
833  js=btmat%index(i-1)+1
834  je=btmat%index(i)
835  do j=js,je
836  jj=btmat%item(j)
837  tp=>btmat%A((j-1)*ndof2+1:j*ndof2)
838  vp=>v((jj-1)*ndof+1:jj*ndof)
839  do k=1,ndof
840  kl0=(k-1)*ndof
841  do l=1,ndof
842  tvp(k)=tvp(k)+tp(kl0+l)*vp(l)
843  enddo
844  enddo
845  enddo
846  enddo
847  !$omp end do
848  !$omp end parallel
849  end subroutine hecmw_localmat_mulvec
850 
851  subroutine hecmw_trimatmul_ttkt_mpc(hecMESH, hecMAT, hecTKT)
852  implicit none
853  type (hecmwst_local_mesh), intent(inout) :: hecmesh
854  type (hecmwst_matrix), intent(in) :: hecmat
855  type (hecmwst_matrix), intent(inout) :: hectkt
856  type (hecmwst_local_matrix) :: btmat, bttmat
857  integer(kind=kint), allocatable :: iws(:)
858  integer(kind=kint) :: ndof, n_mpc, i_mpc
859  integer(kind=kint) :: i, j, k, kk, ilag
860  real(kind=kreal) :: t0, t1
861  t0 = hecmw_wtime()
862  ndof=hecmat%NDOF
863  n_mpc=0
864  outer: do i=1,hecmesh%mpc%n_mpc
865  do j= hecmesh%mpc%mpc_index(i-1) + 1, hecmesh%mpc%mpc_index(i)
866  if (hecmesh%mpc%mpc_dof(j) > ndof) cycle outer
867  enddo
868  n_mpc=n_mpc+1
869  enddo outer
870  allocate(iws(n_mpc))
871  i_mpc=0
872  outer2: do i=1,hecmesh%mpc%n_mpc
873  do j= hecmesh%mpc%mpc_index(i-1) + 1, hecmesh%mpc%mpc_index(i)
874  if (hecmesh%mpc%mpc_dof(j) > ndof) cycle outer2
875  enddo
876  i_mpc=i_mpc+1
877  k=hecmesh%mpc%mpc_index(i-1)+1
878  kk=ndof*(hecmesh%mpc%mpc_item(k)-1)+hecmesh%mpc%mpc_dof(k)
879  iws(i_mpc)=kk
880  enddo outer2
881  if (debug >= 2) then
882  write(700+hecmw_comm_get_rank(),*) 'DEBUG: n_mpc, slaves',n_mpc,iws(1:n_mpc)
883  endif
884  t1 = hecmw_wtime()
885  if (timer >= 1) write(0, '(A,f10.4)') "### hecmw_trimatmul_TtKT_mpc (1) : ",t1-t0
886  t0 = hecmw_wtime()
887  call make_btmat_mpc(hecmesh, ndof, btmat)
888  if (debug >= 3) then
889  write(700+hecmw_comm_get_rank(),*) 'DEBUG: BTmat(MPC)'
890  if (debug == 3) then
891  call hecmw_localmat_write_ij(btmat,700+hecmw_comm_get_rank())
892  else
893  call hecmw_localmat_write(btmat,700+hecmw_comm_get_rank())
894  endif
895  endif
896  t1 = hecmw_wtime()
897  if (timer >= 1) write(0, '(A,f10.4)') "### hecmw_trimatmul_TtKT_mpc (2) : ",t1-t0
898  t0 = hecmw_wtime()
899  ! call make_BTtmat_mpc(hecMESH, ndof, BTtmat)
900  call hecmw_localmat_transpose(btmat, bttmat)
901  ! if (hecmw_localmat_equal(BTtmat, BTtmat2) == 0) then
902  ! write(0,*) 'ERROR: BTtmat2 is incorrect!!!'
903  ! else
904  ! write(0,*) 'DEBUG: BTtmat2 is correct'
905  ! endif
906  if (debug >= 3) then
907  write(700+hecmw_comm_get_rank(),*) 'DEBUG: BTtmat(MPC)'
908  if (debug == 3) then
909  call hecmw_localmat_write_ij(bttmat,700+hecmw_comm_get_rank())
910  else
911  call hecmw_localmat_write(bttmat,700+hecmw_comm_get_rank())
912  endif
913  endif
914  t1 = hecmw_wtime()
915  if (timer >= 1) write(0, '(A,f10.4)') "### hecmw_trimatmul_TtKT_mpc (3) : ",t1-t0
916  t0 = hecmw_wtime()
917  call hecmw_trimatmul_ttkt(hecmesh, bttmat, hecmat, btmat, iws, n_mpc, hectkt)
918  t1 = hecmw_wtime()
919  if (timer >= 1) write(0, '(A,f10.4)') "### hecmw_trimatmul_TtKT_mpc (4) : ",t1-t0
920  t0 = hecmw_wtime()
921 
922  if (associated(hectkt%B)) deallocate(hectkt%B)
923  if (associated(hectkt%X)) deallocate(hectkt%X)
924  allocate(hectkt%B(ndof*hectkt%NP))
925  allocate(hectkt%X(ndof*hectkt%NP))
926  do i=1, size(hecmat%B)
927  hectkt%B(i) = hecmat%B(i)
928  enddo
929  do i=1, size(hecmat%X)
930  hectkt%X(i) = hecmat%X(i)
931  enddo
932  do ilag=1,n_mpc
933  hectkt%X(iws(ilag)) = 0.d0
934  enddo
935 
936  call hecmw_localmat_free(btmat)
937  call hecmw_localmat_free(bttmat)
938  ! call hecmw_localmat_free(BTtmat2)
939  deallocate(iws)
940  t1 = hecmw_wtime()
941  if (timer >= 1) write(0, '(A,f10.4)') "### hecmw_trimatmul_TtKT_mpc (5) : ",t1-t0
942  end subroutine hecmw_trimatmul_ttkt_mpc
943 
944  subroutine make_btmat_mpc(hecMESH, ndof, BTmat)
945  implicit none
946  type (hecmwst_local_mesh), intent(in) :: hecmesh
947  integer(kind=kint), intent(in) :: ndof
948  type (hecmwst_local_matrix), intent(out) :: btmat
949  type (hecmwst_local_matrix) :: tmat
950  integer(kind=kint) :: n_mpc
951  integer(kind=kint) :: i,j,k,js,jj,kk
952  n_mpc=0
953  outer: do i=1,hecmesh%mpc%n_mpc
954  do j= hecmesh%mpc%mpc_index(i-1) + 1, hecmesh%mpc%mpc_index(i)
955  if (hecmesh%mpc%mpc_dof(j) > ndof) cycle outer
956  enddo
957  n_mpc=n_mpc+1
958  enddo outer
959  tmat%nr=hecmesh%n_node*ndof
960  tmat%nc=tmat%nr
961  tmat%ndof=1
962  allocate(tmat%index(0:tmat%nr))
963  ! count nonzero in each row
964  tmat%index(1:tmat%nr)=1
965  outer2: do i=1,hecmesh%mpc%n_mpc
966  do j= hecmesh%mpc%mpc_index(i-1) + 1, hecmesh%mpc%mpc_index(i)
967  if (hecmesh%mpc%mpc_dof(j) > ndof) cycle outer2
968  enddo
969  k=hecmesh%mpc%mpc_index(i-1)+1
970  kk=ndof*(hecmesh%mpc%mpc_item(k)-1)+hecmesh%mpc%mpc_dof(k)
971  tmat%index(kk)=hecmesh%mpc%mpc_index(i)-hecmesh%mpc%mpc_index(i-1)-1
972  enddo outer2
973  ! index
974  tmat%index(0)=0
975  do i=1,tmat%nr
976  tmat%index(i)=tmat%index(i-1)+tmat%index(i)
977  enddo
978  tmat%nnz=tmat%index(tmat%nr)
979  allocate(tmat%item(tmat%nnz), tmat%A(tmat%nnz))
980  ! diag
981  do i=1,tmat%nr
982  js=tmat%index(i-1)+1
983  tmat%item(js)=i
984  tmat%A(js)=1.d0
985  enddo
986  ! others
987  outer3: do i=1,hecmesh%mpc%n_mpc
988  do j= hecmesh%mpc%mpc_index(i-1) + 1, hecmesh%mpc%mpc_index(i)
989  if (hecmesh%mpc%mpc_dof(j) > ndof) cycle outer3
990  enddo
991  k=hecmesh%mpc%mpc_index(i-1)+1
992  kk=ndof*(hecmesh%mpc%mpc_item(k)-1)+hecmesh%mpc%mpc_dof(k)
993  js=tmat%index(kk-1)+1
994  do j= hecmesh%mpc%mpc_index(i-1) + 2, hecmesh%mpc%mpc_index(i)
995  jj = ndof * (hecmesh%mpc%mpc_item(j) - 1) + hecmesh%mpc%mpc_dof(j)
996  tmat%item(js)=jj
997  tmat%A(js)=-hecmesh%mpc%mpc_val(j)
998  js=js+1
999  enddo
1000  enddo outer3
1001  !write(700+hecmw_comm_get_rank(),*) 'DEBUG: Tmat(MPC)'
1002  !call hecmw_localmat_write(Tmat,700+hecmw_comm_get_rank())
1003  call hecmw_localmat_blocking(tmat, ndof, btmat)
1004  call hecmw_localmat_free(tmat)
1005  end subroutine make_btmat_mpc
1006 
1007  subroutine make_bttmat_mpc(hecMESH, ndof, BTtmat)
1008  implicit none
1009  type (hecmwst_local_mesh), intent(in) :: hecmesh
1010  integer(kind=kint), intent(in) :: ndof
1011  type (hecmwst_local_matrix), intent(out) :: bttmat
1012  type (hecmwst_local_matrix) :: ttmat
1013  integer(kind=kint) :: n_mpc
1014  integer(kind=kint) :: i,j,k,js,je,jj,kk
1015  integer(kind=kint), allocatable :: iw(:)
1016  n_mpc=0
1017  outer: do i=1,hecmesh%mpc%n_mpc
1018  do j= hecmesh%mpc%mpc_index(i-1) + 1, hecmesh%mpc%mpc_index(i)
1019  if (hecmesh%mpc%mpc_dof(j) > ndof) cycle outer
1020  enddo
1021  n_mpc=n_mpc+1
1022  enddo outer
1023  ttmat%nr=hecmesh%n_node*ndof
1024  ttmat%nc=ttmat%nr
1025  ttmat%ndof=1
1026  allocate(ttmat%index(0:ttmat%nr))
1027  ! count nonzero in each row
1028  ttmat%index(1:ttmat%nr)=1
1029  outer2: do i=1,hecmesh%mpc%n_mpc
1030  do j= hecmesh%mpc%mpc_index(i-1) + 1, hecmesh%mpc%mpc_index(i)
1031  if (hecmesh%mpc%mpc_dof(j) > ndof) cycle outer2
1032  enddo
1033  k=hecmesh%mpc%mpc_index(i-1)+1
1034  kk=ndof*(hecmesh%mpc%mpc_item(k)-1)+hecmesh%mpc%mpc_dof(k)
1035  ttmat%index(kk)=0
1036  do j= hecmesh%mpc%mpc_index(i-1) + 2, hecmesh%mpc%mpc_index(i)
1037  jj = ndof * (hecmesh%mpc%mpc_item(j) - 1) + hecmesh%mpc%mpc_dof(j)
1038  ttmat%index(jj)=ttmat%index(jj)+1
1039  enddo
1040  enddo outer2
1041  ! index
1042  ttmat%index(0)=0
1043  do i=1,ttmat%nr
1044  ttmat%index(i)=ttmat%index(i-1)+ttmat%index(i)
1045  enddo
1046  ttmat%nnz=ttmat%index(ttmat%nr)
1047  allocate(ttmat%item(ttmat%nnz), ttmat%A(ttmat%nnz))
1048  ! diag
1049  do i=1,ttmat%nr
1050  js=ttmat%index(i-1)+1
1051  je=ttmat%index(i)
1052  if (js <= je) then
1053  ttmat%item(js)=i
1054  ttmat%A(js)=1.d0
1055  endif
1056  enddo
1057  ! others
1058  allocate(iw(ttmat%nr))
1059  iw(:)=1
1060  outer3: do i=1,hecmesh%mpc%n_mpc
1061  do j= hecmesh%mpc%mpc_index(i-1) + 1, hecmesh%mpc%mpc_index(i)
1062  if (hecmesh%mpc%mpc_dof(j) > ndof) cycle outer3
1063  enddo
1064  k=hecmesh%mpc%mpc_index(i-1)+1
1065  kk=ndof*(hecmesh%mpc%mpc_item(k)-1)+hecmesh%mpc%mpc_dof(k)
1066  do j= hecmesh%mpc%mpc_index(i-1) + 2, hecmesh%mpc%mpc_index(i)
1067  jj = ndof * (hecmesh%mpc%mpc_item(j) - 1) + hecmesh%mpc%mpc_dof(j)
1068  js=ttmat%index(jj-1)+1+iw(jj)
1069  ttmat%item(js)=kk
1070  ttmat%A(js)=-hecmesh%mpc%mpc_val(j)
1071  iw(jj)=iw(jj)+1
1072  enddo
1073  enddo outer3
1074  deallocate(iw)
1075  !write(700+hecmw_comm_get_rank(),*) 'DEBUG: Ttmat(MPC)'
1076  !call hecmw_localmat_write(Ttmat,700+hecmw_comm_get_rank())
1077  call hecmw_localmat_blocking(ttmat, ndof, bttmat)
1078  call hecmw_localmat_free(ttmat)
1079  end subroutine make_bttmat_mpc
1080 
1081  subroutine hecmw_localmat_transpose(Tmat, Ttmat)
1082  implicit none
1083  type (hecmwst_local_matrix), intent(in) :: tmat
1084  type (hecmwst_local_matrix), intent(out) :: ttmat
1085  integer(kind=kint), allocatable :: iw(:)
1086  integer(kind=kint) :: i, j, jj, ndof, ndof2, k, idof, jdof
1087  allocate(iw(tmat%nc))
1088  iw = 0
1089  do i = 1, tmat%nr
1090  do j = tmat%index(i-1)+1, tmat%index(i)
1091  jj = tmat%item(j)
1092  iw(jj) = iw(jj) + 1
1093  enddo
1094  enddo
1095  ttmat%nr = tmat%nc
1096  ttmat%nc = tmat%nr
1097  ttmat%nnz = tmat%nnz
1098  ttmat%ndof = tmat%ndof
1099  ndof = tmat%ndof
1100  ndof2 = ndof * ndof
1101  allocate(ttmat%index(0:ttmat%nr))
1102  allocate(ttmat%item(ttmat%nnz))
1103  allocate(ttmat%A(ttmat%nnz*ndof2))
1104  ttmat%index(0) = 0
1105  do i = 1, ttmat%nr
1106  ttmat%index(i) = ttmat%index(i-1) + iw(i)
1107  iw(i) = ttmat%index(i-1) + 1
1108  enddo
1109  do i = 1, tmat%nr
1110  do j = tmat%index(i-1)+1, tmat%index(i)
1111  jj = tmat%item(j)
1112  k = iw(jj)
1113  ttmat%item( k ) = i
1114  do idof = 1, ndof
1115  do jdof = 1, ndof
1116  ttmat%A((k-1)*ndof2+(idof-1)*ndof+jdof) = &
1117  tmat%A((j-1)*ndof2+(jdof-1)*ndof+idof)
1118  enddo
1119  enddo
1120  iw(jj) = k + 1
1121  enddo
1122  enddo
1123  end subroutine hecmw_localmat_transpose
1124 
1125  function hecmw_localmat_equal(Tmat1, Tmat2)
1126  implicit none
1127  type (hecmwst_local_matrix), intent(in) :: tmat1, tmat2
1128  integer(kind=kint) :: hecmw_localmat_equal
1129  integer(kind=kint) :: i, j, k0, k, ndof, ndof2
1130  hecmw_localmat_equal = 0
1131  if (tmat1%nr /= tmat2%nr) return
1132  if (tmat1%nc /= tmat2%nc) return
1133  if (tmat1%nnz /= tmat2%nnz) return
1134  if (tmat1%ndof /= tmat2%ndof) return
1135  ndof = tmat1%ndof
1136  ndof2 = ndof * ndof
1137  do i = 1, tmat1%nr
1138  if (tmat1%index(i) /= tmat2%index(i)) return
1139  do j = tmat1%index(i-1)+1, tmat1%index(i)
1140  if (tmat1%item(j) /= tmat2%item(j)) return
1141  k0 = (j-1)*ndof2
1142  do k = 1, ndof2
1143  if (tmat1%A(k0+k) /= tmat2%A(k0+k)) return
1144  enddo
1145  enddo
1146  enddo
1147  hecmw_localmat_equal = 1
1148  end function hecmw_localmat_equal
1149 
1150 !!!
1151 !!! Subroutines for parallel contact analysis with iterative linear solver
1152 !!!
1153 
1154  subroutine hecmw_localmat_assemble(BTmat, hecMESH, hecMESHnew)
1155  implicit none
1156  type (hecmwst_local_matrix), intent(inout) :: btmat
1157  type (hecmwst_local_mesh), intent(in) :: hecmesh
1158  type (hecmwst_local_mesh), intent(inout) :: hecmeshnew
1159  integer(kind=kint) :: nn_int, np, ndof, ndof2, nr_ext, nnz_ext
1160  integer(kind=kint), allocatable :: exp_rows_index(:), exp_cols_index(:)
1161  integer(kind=kint), allocatable :: exp_rows_item(:,:), exp_cols_item(:,:)
1162  type (hecmwst_local_matrix), allocatable :: bt_ext(:)
1163  type (hecmwst_local_matrix) :: bt_int
1164  type (hecmwst_local_matrix) :: btnew
1165  ! some checks
1166  if (debug >= 1) write(0,*) 'DEBUG: nr,nc,nnz,ndof',btmat%nr,btmat%nc,btmat%nnz,btmat%ndof
1167  if (btmat%nr /= hecmesh%n_node) stop 'ERROR: invalid size in hecmw_localmat_assemble'
1168  !
1169  nn_int = hecmesh%nn_internal
1170  np = hecmesh%n_node
1171  ndof = btmat%ndof
1172  ndof2 = ndof*ndof
1173  !
1174  nr_ext = np - nn_int
1175  nnz_ext = btmat%index(np) - btmat%index(nn_int)
1176  !
1177  call prepare_bt_ext(btmat, hecmesh, exp_rows_index, exp_rows_item, bt_ext)
1178  if (debug >= 1) write(0,*) 'DEBUG: prepare_BT_ext done'
1179  !
1180  call prepare_column_info(hecmesh, bt_ext, exp_cols_index, exp_cols_item)
1181  if (debug >= 1) write(0,*) 'DEBUG: prepare_column info done'
1182  !
1183  call send_bt_ext_and_recv_bt_int(hecmesh, exp_rows_index, exp_rows_item, bt_ext, &
1184  exp_cols_index, exp_cols_item, bt_int, hecmeshnew)
1185  if (debug >= 1) write(0,*) 'DEBUG: send BT_ext and recv BT_int done'
1186  !
1187  !write(0,*) 'BTmat%ndof,BT_int%ndof',BTmat%ndof,BT_int%ndof
1188  call hecmw_localmat_add(btmat, bt_int, btnew)
1189  if (debug >= 1) write(0,*) 'DEBUG: localmat_add done'
1190  !
1191  call hecmw_localmat_free(btmat)
1192  call hecmw_localmat_free(bt_int)
1193  !
1194  btmat%nr = btnew%nr
1195  btmat%nc = btnew%nc
1196  btmat%nnz = btnew%nnz
1197  btmat%ndof = btnew%ndof
1198  btmat%index => btnew%index
1199  btmat%item => btnew%item
1200  btmat%A => btnew%A
1201  !
1202  ! hecMESH%n_node = hecMESHnew%n_node
1203  ! hecMESH%n_neighbor_pe = hecMESHnew%n_neighbor_pe
1204  ! deallocate(hecMESH%neighbor_pe)
1205  ! deallocate(hecMESH%import_index)
1206  ! deallocate(hecMESH%export_index)
1207  ! deallocate(hecMESH%import_item)
1208  ! deallocate(hecMESH%export_item)
1209  ! deallocate(hecMESH%node_ID)
1210  ! deallocate(hecMESH%global_node_ID)
1211  ! hecMESH%neighbor_pe => hecMESHnew%neighbor_pe
1212  ! hecMESH%import_index => hecMESHnew%import_index
1213  ! hecMESH%export_index => hecMESHnew%export_index
1214  ! hecMESH%import_item => hecMESHnew%import_item
1215  ! hecMESH%export_item => hecMESHnew%export_item
1216  ! hecMESH%node_ID => hecMESHnew%node_ID
1217  ! hecMESH%global_node_ID => hecMESHnew%global_node_ID
1218  !
1219  if (debug >= 1) write(0,*) 'DEBUG: update BTmat and hecMESH done'
1220  end subroutine hecmw_localmat_assemble
1221 
1222  subroutine prepare_bt_ext(BTmat, hecMESH, exp_rows_index, exp_rows_item, BT_ext)
1223  implicit none
1224  type (hecmwst_local_matrix), intent(in) :: btmat
1225  type (hecmwst_local_mesh), intent(in) :: hecmesh
1226  integer(kind=kint), allocatable, intent(out) :: exp_rows_index(:)
1227  integer(kind=kint), allocatable, intent(out) :: exp_rows_item(:,:)
1228  type (hecmwst_local_matrix), allocatable, intent(out) :: bt_ext(:)
1229  integer(kind=kint), allocatable :: incl_nz(:), exp_cols_per_row(:), exp_rows_per_rank(:)
1230  integer(kind=kint) :: nn_int
1231  nn_int = hecmesh%nn_internal
1232  !
1233  call check_external_nz_blocks(btmat, nn_int, incl_nz)
1234  !
1235  call count_ext_rows_with_nz(btmat, nn_int, incl_nz, exp_cols_per_row)
1236  !
1237  call count_exp_rows_per_rank(hecmesh, exp_cols_per_row, exp_rows_per_rank)
1238  !
1239  allocate(exp_rows_index(0:hecmesh%n_neighbor_pe))
1240  call make_index(hecmesh%n_neighbor_pe, exp_rows_per_rank, exp_rows_index)
1241  !write(0,*) 'exp_rows_index',exp_rows_index(:)
1242  !
1243  deallocate(exp_rows_per_rank)
1244  !
1245  call make_exp_rows_item(hecmesh, exp_cols_per_row, exp_rows_index, exp_rows_item)
1246  !
1247  deallocate(exp_cols_per_row)
1248  !
1249  allocate(bt_ext(hecmesh%n_neighbor_pe))
1250  call extract_bt_ext(hecmesh, btmat, incl_nz, exp_rows_index, exp_rows_item, bt_ext)
1251  !
1252  deallocate(incl_nz)
1253  end subroutine prepare_bt_ext
1254 
1255  subroutine check_external_nz_blocks(BTmat, nn_internal, incl_nz)
1256  implicit none
1257  type (hecmwst_local_matrix), intent(in) :: btmat
1258  integer(kind=kint), intent(in) :: nn_internal
1259  integer(kind=kint), allocatable, intent(out) :: incl_nz(:)
1260  integer(kind=kint) :: ndof2, i0, nnz_ext, i, k, nnz_blk
1261  if (nn_internal > btmat%nr) stop 'ERROR: invalid nn_internal'
1262  ndof2 = btmat%ndof ** 2
1263  i0 = btmat%index(nn_internal)
1264  nnz_ext = btmat%index(btmat%nr) - i0
1265  allocate(incl_nz(nnz_ext))
1266  nnz_blk = 0
1267  do i = 1, nnz_ext
1268  incl_nz(i) = 0
1269  do k = 1, ndof2
1270  if (btmat%A(ndof2*(i0+i-1)+k) /= 0.0d0) then
1271  incl_nz(i) = 1
1272  nnz_blk = nnz_blk + 1
1273  exit
1274  endif
1275  enddo
1276  enddo
1277  if (debug >= 1) write(0,*) 'DEBUG: nnz_blk',nnz_blk
1278  end subroutine check_external_nz_blocks
1279 
1280  subroutine count_ext_rows_with_nz(BTmat, nn_internal, incl_nz, exp_cols_per_row)
1281  implicit none
1282  type (hecmwst_local_matrix), intent(in) :: btmat
1283  integer(kind=kint), intent(in) :: nn_internal
1284  integer(kind=kint), intent(in) :: incl_nz(:)
1285  integer(kind=kint), allocatable, intent(out) :: exp_cols_per_row(:)
1286  integer(kind=kint) :: nr_ext, nnz_int, i, irow, js, je, j, jcol
1287  nr_ext = btmat%nr - nn_internal
1288  nnz_int = btmat%index(nn_internal)
1289  allocate(exp_cols_per_row(nr_ext))
1290  exp_cols_per_row(:) = 0
1291  do i = 1, nr_ext
1292  irow = nn_internal+i
1293  js = btmat%index(irow-1)+1
1294  je = btmat%index(irow)
1295  do j = js, je
1296  jcol = btmat%item(j)
1297  if (incl_nz(j-nnz_int) == 1) exp_cols_per_row(i) = exp_cols_per_row(i) + 1
1298  enddo
1299  enddo
1300  !write(0,*) 'exp_cols_per_row',exp_cols_per_row(:)
1301  end subroutine count_ext_rows_with_nz
1302 
1303  subroutine count_exp_rows_per_rank(hecMESH, exp_cols_per_row, exp_rows_per_rank)
1304  implicit none
1305  type (hecmwst_local_mesh), intent(in) :: hecmesh
1306  integer(kind=kint), intent(in) :: exp_cols_per_row(:)
1307  integer(kind=kint), allocatable, intent(out) :: exp_rows_per_rank(:)
1308  integer(kind=kint) :: nn_int, np, nr_ext, i, irow, exp_rank, idom
1309  allocate(exp_rows_per_rank(hecmesh%n_neighbor_pe))
1310  exp_rows_per_rank(1:hecmesh%n_neighbor_pe) = 0
1311  nn_int = hecmesh%nn_internal
1312  np = hecmesh%n_node
1313  nr_ext = np - nn_int
1314  do i = 1, nr_ext
1315  if (exp_cols_per_row(i) > 0) then
1316  irow = nn_int + i
1317  exp_rank = hecmesh%node_ID(2*irow)
1318  call rank_to_idom(hecmesh, exp_rank, idom)
1319  exp_rows_per_rank(idom) = exp_rows_per_rank(idom) + 1
1320  endif
1321  enddo
1322  !write(0,*) 'exp_rows_per_rank',exp_rows_per_rank(:)
1323  end subroutine count_exp_rows_per_rank
1324 
1325  subroutine rank_to_idom(hecMESH, rank, idom)
1326  implicit none
1327  type (hecmwst_local_mesh), intent(in) :: hecmesh
1328  integer(kind=kint), intent(in) :: rank
1329  integer(kind=kint), intent(out) :: idom
1330  integer(kind=kint) :: i
1331  do i = 1, hecmesh%n_neighbor_pe
1332  if (hecmesh%neighbor_pe(i) == rank) then
1333  idom = i
1334  return
1335  endif
1336  enddo
1337  stop 'ERROR: exp_rank not found in neighbor_pe'
1338  end subroutine rank_to_idom
1339 
1340  subroutine make_index(len, cnt, index)
1341  implicit none
1342  integer(kind=kint), intent(in) :: len
1343  integer(kind=kint), intent(in) :: cnt(len)
1344  integer(kind=kint), intent(out) :: index(0:)
1345  integer(kind=kint) :: i
1346  ! write(0,*) 'make_index: len',len
1347  index(0) = 0
1348  do i = 1,len
1349  index(i) = index(i-1) + cnt(i)
1350  enddo
1351  end subroutine make_index
1352 
1353  subroutine make_exp_rows_item(hecMESH, exp_cols_per_row, exp_rows_index, exp_rows_item)
1354  implicit none
1355  type (hecmwst_local_mesh), intent(in) :: hecmesh
1356  integer(kind=kint), intent(in) :: exp_cols_per_row(:)
1357  integer(kind=kint), allocatable, intent(in) :: exp_rows_index(:)
1358  integer(kind=kint), allocatable, intent(out) :: exp_rows_item(:,:)
1359  integer(kind=kint), allocatable :: cnt(:)
1360  integer(kind=kint) :: nn_int, np, nr_ext, i, irow, exp_rank, idom, idx
1361  allocate(exp_rows_item(2,exp_rows_index(hecmesh%n_neighbor_pe)))
1362  allocate(cnt(hecmesh%n_neighbor_pe))
1363  cnt(:) = 0
1364  nn_int = hecmesh%nn_internal
1365  np = hecmesh%n_node
1366  nr_ext = np - nn_int
1367  do i = 1, nr_ext
1368  if (exp_cols_per_row(i) > 0) then
1369  irow = nn_int + i
1370  exp_rank = hecmesh%node_ID(2*irow)
1371  call rank_to_idom(hecmesh, exp_rank, idom)
1372  cnt(idom) = cnt(idom) + 1
1373  idx = exp_rows_index(idom-1) + cnt(idom)
1374  exp_rows_item(1,idx) = irow
1375  exp_rows_item(2,idx) = exp_cols_per_row(i)
1376  endif
1377  enddo
1378  !write(0,*) 'cnt',cnt(:)
1379  do idom = 1, hecmesh%n_neighbor_pe
1380  if (cnt(idom) /= exp_rows_index(idom)-exp_rows_index(idom-1)) stop 'ERROR: make exp_rows_item'
1381  enddo
1382  !write(0,*) 'exp_rows_item(1,:)',exp_rows_item(1,:)
1383  !write(0,*) 'exp_rows_item(2,:)',exp_rows_item(2,:)
1384  end subroutine make_exp_rows_item
1385 
1386  subroutine extract_bt_ext(hecMESH, BTmat, incl_nz, exp_rows_index, exp_rows_item, BT_ext)
1387  implicit none
1388  type (hecmwst_local_mesh), intent(in) :: hecmesh
1389  type (hecmwst_local_matrix), intent(in) :: btmat
1390  integer(kind=kint), intent(in) :: incl_nz(:)
1391  integer(kind=kint), allocatable, intent(in) :: exp_rows_index(:)
1392  integer(kind=kint), intent(in) :: exp_rows_item(:,:)
1393  type (hecmwst_local_matrix), allocatable, intent(out) :: bt_ext(:)
1394  integer(kind=kint) :: ndof, ndof2, nn_int, nnz_int, idom, j, idx, ncol, cnt, jrow, ks, ke, k, kcol
1395  allocate(bt_ext(hecmesh%n_neighbor_pe))
1396  ndof = btmat%ndof
1397  ndof2 = ndof * ndof
1398  nn_int = hecmesh%nn_internal
1399  nnz_int = btmat%index(nn_int)
1400  do idom = 1, hecmesh%n_neighbor_pe
1401  bt_ext(idom)%nr = exp_rows_index(idom) - exp_rows_index(idom-1)
1402  bt_ext(idom)%nc = btmat%nc
1403  bt_ext(idom)%nnz = 0
1404  bt_ext(idom)%ndof = ndof
1405  allocate(bt_ext(idom)%index(0:bt_ext(idom)%nr))
1406  bt_ext(idom)%index(0) = 0
1407  do j = 1, bt_ext(idom)%nr
1408  idx = exp_rows_index(idom-1) + j
1409  ncol = exp_rows_item(2,idx)
1410  bt_ext(idom)%index(j) = bt_ext(idom)%index(j-1) + ncol
1411  enddo
1412  bt_ext(idom)%nnz = bt_ext(idom)%index(bt_ext(idom)%nr)
1413  if (debug >= 1) write(0,*) 'DEBUG: idom,nr,nc,nnz,ndof', &
1414  idom,bt_ext(idom)%nr,bt_ext(idom)%nc,bt_ext(idom)%nnz,bt_ext(idom)%ndof
1415  allocate(bt_ext(idom)%item(bt_ext(idom)%nnz))
1416  allocate(bt_ext(idom)%A(bt_ext(idom)%nnz * ndof2))
1417  cnt = 0
1418  do j = 1, bt_ext(idom)%nr
1419  idx = exp_rows_index(idom-1) + j
1420  jrow = exp_rows_item(1,idx)
1421  if (jrow < 1 .or. btmat%nr < jrow) stop 'ERROR: extract BT_ext: jrow'
1422  ks = btmat%index(jrow-1)+1
1423  ke = btmat%index(jrow)
1424  do k = ks, ke
1425  kcol = btmat%item(k)
1426  if (incl_nz(k-nnz_int) == 0) cycle
1427  cnt = cnt + 1
1428  bt_ext(idom)%item(cnt) = kcol
1429  bt_ext(idom)%A(ndof2*(cnt-1)+1:ndof2*cnt) = btmat%A(ndof2*(k-1)+1:ndof2*k)
1430  enddo
1431  if (cnt /= bt_ext(idom)%index(j)) stop 'ERROR: extract BT_ext'
1432  enddo
1433  ! if (DEBUG >= 3) write(100+hecMESH%my_rank,*) 'BT_ext(',idom,')'
1434  ! call hecmw_localmat_write(BT_ext(idom), 100+hecMESH%my_rank)
1435  enddo
1436  end subroutine extract_bt_ext
1437 
1438  subroutine prepare_column_info(hecMESH, BT_ext, exp_cols_index, exp_cols_item)
1439  implicit none
1440  type (hecmwst_local_mesh), intent(in) :: hecmesh
1441  type (hecmwst_local_matrix), intent(in) :: bt_ext(:)
1442  integer(kind=kint), allocatable, intent(out) :: exp_cols_index(:)
1443  integer(kind=kint), allocatable, intent(out) :: exp_cols_item(:,:)
1444  !
1445  call make_exp_cols_index(hecmesh%n_neighbor_pe, bt_ext, exp_cols_index)
1446  if (debug >= 2) write(0,*) ' DEBUG2: make exp_cols_index done'
1447  if (debug >= 3) write(0,*) ' DEBUG3: exp_cols_index', exp_cols_index(0:hecmesh%n_neighbor_pe)
1448  !
1449  ! (col ID, rank, global ID)
1450  !
1451  call make_exp_cols_item(hecmesh, bt_ext, exp_cols_index, exp_cols_item)
1452  if (debug >= 2) write(0,*) ' DEBUG2: make exp_cols_item done'
1453  ! if (DEBUG >= 3) write(0,*) ' DEBUG3: exp_cols_item', exp_cols_item(1:cNCOL_ITEM,1:exp_cols_index(hecMESH%n_neighbor_pe))
1454  end subroutine prepare_column_info
1455 
1456  subroutine make_exp_cols_index(nnb, BT_ext, exp_cols_index)
1457  implicit none
1458  integer(kind=kint), intent(in) :: nnb
1459  type (hecmwst_local_matrix), intent(in) :: bt_ext(:)
1460  integer(kind=kint), allocatable, intent(out) :: exp_cols_index(:)
1461  integer(kind=kint) :: idom
1462  allocate(exp_cols_index(0:nnb))
1463  exp_cols_index(0) = 0
1464  do idom = 1, nnb
1465  exp_cols_index(idom) = exp_cols_index(idom-1) + bt_ext(idom)%nnz
1466  enddo
1467  end subroutine make_exp_cols_index
1468 
1469  subroutine make_exp_cols_item(hecMESH, BT_ext, exp_cols_index, exp_cols_item)
1470  implicit none
1471  type (hecmwst_local_mesh), intent(in) :: hecmesh
1472  type (hecmwst_local_matrix), intent(in) :: bt_ext(:)
1473  integer(kind=kint), allocatable, intent(in) :: exp_cols_index(:)
1474  integer(kind=kint), allocatable, intent(out) :: exp_cols_item(:,:)
1475  integer(kind=kint) :: cnt, idom, j, jcol
1476  allocate(exp_cols_item(cncol_item,exp_cols_index(hecmesh%n_neighbor_pe)))
1477  cnt = 0
1478  do idom = 1, hecmesh%n_neighbor_pe
1479  do j = 1, bt_ext(idom)%nnz
1480  cnt = cnt + 1
1481  jcol = bt_ext(idom)%item(j)
1482  ! if (DEBUG >= 3) write(0,*) ' DEBUG3: idom,j,cnt,jcol,nn_internal,n_node',&
1483  ! idom,j,cnt,jcol,hecMESH%nn_internal,hecMESH%n_node
1484  ! if (DEBUG >= 3) write(0,*) ' DEBUG3: size of exp_cols_item',size(exp_cols_item)
1485  ! if (DEBUG >= 3) write(0,*) ' DEBUG3: size of node_ID',size(hecMESH%node_ID)
1486  ! if (DEBUG >= 3) write(0,*) ' DEBUG3: size of global_node_ID',size(hecMESH%global_node_ID)
1487  exp_cols_item(clid,cnt) = hecmesh%node_ID(2*jcol-1)
1488  exp_cols_item(crank,cnt) = hecmesh%node_ID(2*jcol)
1489  if (cncol_item >= 3) exp_cols_item(cgid,cnt) = hecmesh%global_node_ID(jcol)
1490  ! if (DEBUG >= 3) write(0,*) ' DEBUG3: lid,rank(,gid)',exp_cols_item(1:cNCOL_ITEM,cnt)
1491  enddo
1492  if (cnt /= exp_cols_index(idom)) stop 'ERROR: make exp_cols_item'
1493  enddo
1494  end subroutine make_exp_cols_item
1495 
1496  subroutine send_bt_ext_and_recv_bt_int(hecMESH, exp_rows_index, exp_rows_item, BT_ext, &
1497  exp_cols_index, exp_cols_item, BT_int, hecMESHnew)
1498  implicit none
1499  type (hecmwst_local_mesh), intent(in) :: hecmesh
1500  integer(kind=kint), allocatable, intent(inout) :: exp_rows_index(:), exp_cols_index(:)
1501  integer(kind=kint), allocatable, intent(inout) :: exp_rows_item(:,:), exp_cols_item(:,:)
1502  type (hecmwst_local_matrix), allocatable, intent(inout) :: bt_ext(:)
1503  type (hecmwst_local_matrix), intent(out) :: bt_int
1504  type (hecmwst_local_mesh), intent(inout) :: hecmeshnew
1505  integer(kind=kint), allocatable :: imp_rows_index(:), imp_cols_index(:)
1506  integer(kind=kint), allocatable :: imp_rows_item(:,:), imp_cols_item(:,:)
1507  real(kind=kreal), allocatable :: imp_vals_item(:)
1508  integer(kind=kint), allocatable :: map(:), add_nodes(:,:)
1509  integer(kind=kint) :: ndof, ndof2, idom, n_add_node, i0
1510  if (hecmesh%n_neighbor_pe == 0) return
1511  ndof = bt_ext(1)%ndof
1512  ndof2 = ndof*ndof
1513  !
1514  call convert_rowid_to_remote_localid(hecmesh, exp_rows_index(hecmesh%n_neighbor_pe), exp_rows_item)
1515  if (debug >= 2) write(0,*) ' DEBUG2: convert rowID to remote localID done'
1516  !
1517  call send_recv_bt_ext_nr_nnz(hecmesh, bt_ext, imp_rows_index, imp_cols_index)
1518  if (debug >= 2) write(0,*) ' DEBUG2: send recv BT_ext nr and nnz done'
1519  !
1520  call send_recv_bt_ext_contents(hecmesh, bt_ext, &
1521  exp_rows_index, exp_cols_index, exp_rows_item, exp_cols_item, &
1522  imp_rows_index, imp_cols_index, &
1523  imp_rows_item, imp_cols_item, imp_vals_item)
1524  if (debug >= 2) write(0,*) ' DEBUG2: send recv BT_ext contents done'
1525  !
1526  do idom = 1, hecmesh%n_neighbor_pe
1527  call hecmw_localmat_free(bt_ext(idom))
1528  enddo
1529  deallocate(bt_ext)
1530  !
1531  call allocate_bt_int(hecmesh, ndof, imp_rows_index, imp_rows_item, bt_int)
1532  if (debug >= 2) write(0,*) ' DEBUG2: allocate BT_int done'
1533  !
1534  ! call copy_mesh(hecMESH, hecMESHnew)
1535  ! if (DEBUG >= 2) write(0,*) ' DEBUG2: copy mesh done'
1536  !
1537  call map_imported_cols(hecmeshnew, imp_cols_index(hecmesh%n_neighbor_pe), &
1538  imp_cols_item, n_add_node, add_nodes, map, i0)
1539  if (debug >= 2) write(0,*) ' DEBUG2: map imported cols done'
1540  !
1541  call update_comm_table(hecmeshnew, n_add_node, add_nodes, i0)
1542  if (debug >= 2) write(0,*) ' DEBUG2: update comm_table done'
1543  !
1544  bt_int%nc = hecmeshnew%n_node
1545  !
1546  call copy_vals_to_bt_int(hecmesh%n_neighbor_pe, imp_rows_index, imp_cols_index, &
1547  imp_rows_item, map, ndof2, imp_vals_item, bt_int)
1548  if (debug >= 2) write(0,*) ' DEBUG2: copy vals to BT_int done'
1549  !
1550  deallocate(imp_rows_index)
1551  deallocate(imp_cols_index)
1552  deallocate(imp_rows_item)
1553  deallocate(imp_cols_item)
1554  deallocate(imp_vals_item)
1555  deallocate(map)
1556  !
1557  call sort_and_uniq_rows(bt_int)
1558  if (debug >= 2) write(0,*) ' DEBUG2: sort and uniq rows of BT_int done'
1559  end subroutine send_bt_ext_and_recv_bt_int
1560 
1561  subroutine convert_rowid_to_remote_localid(hecMESH, len, exp_rows_item)
1562  implicit none
1563  type (hecmwst_local_mesh), intent(in) :: hecmesh
1564  integer(kind=kint), intent(in) :: len
1565  integer(kind=kint), intent(out) :: exp_rows_item(:,:)
1566  integer(kind=kint) :: i
1567  do i = 1, len
1568  exp_rows_item(1,i) = hecmesh%node_ID(2 * exp_rows_item(1,i) - 1)
1569  enddo
1570  end subroutine convert_rowid_to_remote_localid
1571 
1572  subroutine send_recv_bt_ext_nr_nnz(hecMESH, BT_ext, imp_rows_index, imp_cols_index)
1573  use m_hecmw_comm_f
1574  implicit none
1575  type (hecmwst_local_mesh), intent(in) :: hecmesh
1576  type (hecmwst_local_matrix), intent(in) :: bt_ext(:)
1577  integer(kind=kint), allocatable, intent(out) :: imp_rows_index(:), imp_cols_index(:)
1578  integer(kind=kint) :: nnb, idom, irank, tag, recvbuf(2)
1579  integer(kind=kint), allocatable :: sendbuf(:,:)
1580  integer(kind=kint), allocatable :: requests(:)
1581  integer(kind=kint), allocatable :: statuses(:,:)
1582  nnb = hecmesh%n_neighbor_pe
1583  allocate(imp_rows_index(0:nnb))
1584  allocate(imp_cols_index(0:nnb))
1585  allocate(requests(nnb))
1586  allocate(statuses(hecmw_status_size, nnb))
1587  allocate(sendbuf(2,nnb))
1588  do idom = 1, nnb
1589  irank = hecmesh%neighbor_pe(idom)
1590  ! nr = exp_rows_per_rank(idom)
1591  sendbuf(1,idom) = bt_ext(idom)%nr
1592  sendbuf(2,idom) = bt_ext(idom)%nnz
1593  tag=2001
1594  call hecmw_isend_int(sendbuf(1,idom), 2, irank, tag, hecmesh%MPI_COMM, &
1595  requests(idom))
1596  enddo
1597  imp_rows_index(0) = 0
1598  imp_cols_index(0) = 0
1599  do idom = 1, nnb
1600  irank = hecmesh%neighbor_pe(idom)
1601  tag = 2001
1602  call hecmw_recv_int(recvbuf, 2, irank, tag, &
1603  hecmesh%MPI_COMM, statuses(:,1))
1604  imp_rows_index(idom) = imp_rows_index(idom-1) + recvbuf(1)
1605  imp_cols_index(idom) = imp_cols_index(idom-1) + recvbuf(2)
1606  enddo
1607  call hecmw_waitall(nnb, requests, statuses)
1608  deallocate(requests)
1609  deallocate(statuses)
1610  deallocate(sendbuf)
1611  end subroutine send_recv_bt_ext_nr_nnz
1612 
1613  subroutine send_recv_bt_ext_contents(hecMESH, BT_ext, &
1614  exp_rows_index, exp_cols_index, exp_rows_item, exp_cols_item, &
1615  imp_rows_index, imp_cols_index, &
1616  imp_rows_item, imp_cols_item, imp_vals_item)
1617  use m_hecmw_comm_f
1618  implicit none
1619  type (hecmwST_local_mesh), intent(in) :: hecMESH
1620  type (hecmwST_local_matrix), intent(in) :: BT_ext(:)
1621  integer(kind=kint), allocatable, intent(inout) :: exp_rows_index(:), exp_cols_index(:)
1622  integer(kind=kint), allocatable, intent(inout) :: exp_rows_item(:,:), exp_cols_item(:,:)
1623  integer(kind=kint), allocatable, intent(in) :: imp_rows_index(:), imp_cols_index(:)
1624  integer(kind=kint), allocatable, intent(out) :: imp_rows_item(:,:), imp_cols_item(:,:)
1625  real(kind=kreal), allocatable, intent(out) :: imp_vals_item(:)
1626  integer(kind=kint) :: nnb, ndof2, n_send, idom, irank, tag, nr, nnz
1627  integer(kind=kint), allocatable :: requests(:)
1628  integer(kind=kint), allocatable :: statuses(:,:)
1629  nnb = hecmesh%n_neighbor_pe
1630  if (nnb < 1) return
1631  ndof2 = bt_ext(1)%ndof ** 2
1632  allocate(imp_rows_item(2,imp_rows_index(nnb)))
1633  allocate(imp_cols_item(cncol_item,imp_cols_index(nnb)))
1634  allocate(imp_vals_item(ndof2*imp_cols_index(nnb)))
1635  allocate(requests(3*nnb))
1636  allocate(statuses(hecmw_status_size, 3*nnb))
1637  n_send = 0
1638  do idom = 1, nnb
1639  irank = hecmesh%neighbor_pe(idom)
1640  if (bt_ext(idom)%nr > 0) then
1641  n_send = n_send + 1
1642  tag = 2002
1643  call hecmw_isend_int(exp_rows_item(1,exp_rows_index(idom-1)+1), &
1644  2*bt_ext(idom)%nr, irank, tag, hecmesh%MPI_COMM, &
1645  requests(n_send))
1646  n_send = n_send + 1
1647  tag = 2003
1648  call hecmw_isend_int(exp_cols_item(1,exp_cols_index(idom-1)+1), &
1649  cncol_item*bt_ext(idom)%nnz, irank, tag, hecmesh%MPI_COMM, &
1650  requests(n_send))
1651  n_send = n_send + 1
1652  tag = 2004
1653  call hecmw_isend_r(bt_ext(idom)%A, ndof2*bt_ext(idom)%nnz, irank, &
1654  tag, hecmesh%MPI_COMM, requests(n_send))
1655  endif
1656  enddo
1657  do idom = 1, nnb
1658  irank = hecmesh%neighbor_pe(idom)
1659  nr = imp_rows_index(idom) - imp_rows_index(idom-1)
1660  nnz = imp_cols_index(idom) - imp_cols_index(idom-1)
1661  if (nr > 0) then
1662  tag = 2002
1663  call hecmw_recv_int(imp_rows_item(1,imp_rows_index(idom-1)+1), &
1664  2*nr, irank, tag, hecmesh%MPI_COMM, statuses(:,1))
1665  tag = 2003
1666  call hecmw_recv_int(imp_cols_item(1,imp_cols_index(idom-1)+1), &
1667  cncol_item*nnz, irank, tag, hecmesh%MPI_COMM, statuses(:,1))
1668  tag = 2004
1669  call hecmw_recv_r(imp_vals_item(ndof2*imp_cols_index(idom-1)+1), &
1670  ndof2*nnz, irank, tag, hecmesh%MPI_COMM, statuses(:,1))
1671  endif
1672  enddo
1673  call hecmw_waitall(n_send, requests, statuses)
1674  deallocate(exp_rows_index)
1675  deallocate(exp_rows_item)
1676  deallocate(exp_cols_index)
1677  deallocate(exp_cols_item)
1678  end subroutine send_recv_bt_ext_contents
1679 
1680  subroutine allocate_bt_int(hecMESH, ndof, imp_rows_index, imp_rows_item, BT_int)
1681  implicit none
1682  type (hecmwST_local_mesh), intent(in) :: hecMESH
1683  integer(kind=kint), intent(in) :: ndof
1684  integer(kind=kint), allocatable, intent(in) :: imp_rows_index(:), imp_rows_item(:,:)
1685  type (hecmwST_local_matrix), intent(out) :: BT_int
1686  integer(kind=kint), allocatable :: cnt(:)
1687  integer(kind=kint) :: idom, is, ie, i, irow, ncol, ndof2
1688  ndof2 = ndof*ndof
1689  bt_int%nr = hecmesh%nn_internal
1690  bt_int%nc = hecmesh%n_node
1691  bt_int%nnz = 0
1692  bt_int%ndof = ndof
1693  allocate(cnt(bt_int%nr))
1694  cnt(:) = 0
1695  do idom = 1, hecmesh%n_neighbor_pe
1696  is = imp_rows_index(idom-1)+1
1697  ie = imp_rows_index(idom)
1698  do i = is, ie
1699  irow = imp_rows_item(1,i)
1700  ncol = imp_rows_item(2,i)
1701  if (irow < 1 .or. bt_int%nr < irow) stop 'ERROR: allocate BT_int'
1702  cnt(irow) = cnt(irow) + ncol !!! might include duplicate cols
1703  enddo
1704  enddo
1705  !
1706  allocate(bt_int%index(0:bt_int%nr))
1707  call make_index(bt_int%nr, cnt, bt_int%index)
1708  !
1709  bt_int%nnz = bt_int%index(bt_int%nr)
1710  allocate(bt_int%item(bt_int%nnz))
1711  allocate(bt_int%A(bt_int%nnz * ndof2))
1712  bt_int%A(:) = 0.d0
1713  end subroutine allocate_bt_int
1714 
1715  subroutine copy_mesh(src, dst)
1716  implicit none
1717  type (hecmwST_local_mesh), intent(in) :: src
1718  type (hecmwST_local_mesh), intent(out) :: dst
1719  dst%zero = src%zero
1720  dst%MPI_COMM = src%MPI_COMM
1721  dst%PETOT = src%PETOT
1722  dst%PEsmpTOT = src%PEsmpTOT
1723  dst%my_rank = src%my_rank
1724  dst%n_subdomain = src%n_subdomain
1725  dst%n_node = src%n_node
1726  dst%nn_internal = src%nn_internal
1727  dst%n_dof = src%n_dof
1728  dst%n_neighbor_pe = src%n_neighbor_pe
1729  allocate(dst%neighbor_pe(dst%n_neighbor_pe))
1730  dst%neighbor_pe(:) = src%neighbor_pe(:)
1731  allocate(dst%import_index(0:dst%n_neighbor_pe))
1732  allocate(dst%export_index(0:dst%n_neighbor_pe))
1733  dst%import_index(:)= src%import_index(:)
1734  dst%export_index(:)= src%export_index(:)
1735  allocate(dst%import_item(dst%import_index(dst%n_neighbor_pe)))
1736  dst%import_item(:) = src%import_item(:)
1737  allocate(dst%export_item(dst%export_index(dst%n_neighbor_pe)))
1738  dst%export_item(:) = src%export_item(:)
1739  allocate(dst%node_ID(2*dst%n_node))
1740  dst%node_ID(1:2*dst%n_node) = src%node_ID(1:2*src%n_node)
1741  allocate(dst%global_node_ID(dst%n_node))
1742  dst%global_node_ID(1:dst%n_node) = src%global_node_ID(1:src%n_node)
1743  dst%mpc%n_mpc = 0
1744  dst%node => src%node
1745  end subroutine copy_mesh
1746 
1747  subroutine map_imported_cols(hecMESHnew, ncols, cols, n_add_node, add_nodes, map, i0)
1748  implicit none
1749  type (hecmwST_local_mesh), intent(inout) :: hecMESHnew
1750  integer(kind=kint), intent(in) :: ncols
1751  integer(kind=kint), intent(in) :: cols(cNCOL_ITEM,ncols)
1752  integer(kind=kint), allocatable, intent(out) :: map(:)
1753  integer(kind=kint), intent(out) :: n_add_node
1754  integer(kind=kint), allocatable, intent(out) :: add_nodes(:,:)
1755  integer(kind=kint), intent(out) :: i0
1756  allocate(map(ncols))
1757  !
1758  call map_present_nodes(hecmeshnew, ncols, cols, map, n_add_node)
1759  !
1760  ! add nodes == unmapped nodes
1761  !
1762  call extract_add_nodes(ncols, cols, map, n_add_node, add_nodes)
1763  !
1764  call append_nodes(hecmeshnew, n_add_node, add_nodes, i0)
1765  !
1766  call map_additional_nodes(ncols, cols, n_add_node, add_nodes, i0, map)
1767  end subroutine map_imported_cols
1768 
1769  subroutine map_present_nodes(hecMESH, ncols, cols, map, n_add_node)
1770  implicit none
1771  type (hecmwST_local_mesh), intent(in) :: hecMESH
1772  integer(kind=kint), intent(in) :: ncols
1773  integer(kind=kint), intent(in) :: cols(cNCOL_ITEM,ncols)
1774  integer(kind=kint), intent(out) :: map(ncols)
1775  integer(kind=kint), intent(out) :: n_add_node
1776  integer(kind=kint) :: i, j, lid, rank, llid, n_ext_node, idx
1777  integer(kind=kint), allocatable :: ext_node(:)
1778  type (hecmwST_pair_array) :: parray
1779  !
1780  call hecmw_pair_array_init(parray, hecmesh%n_node - hecmesh%nn_internal)
1781  do i = hecmesh%nn_internal + 1, hecmesh%n_node
1782  call hecmw_pair_array_append(parray, i, hecmesh%node_ID(2*i-1), hecmesh%node_ID(2*i))
1783  enddo
1784  call hecmw_pair_array_sort(parray)
1785  !
1786  n_add_node = 0
1787  n_ext_node = 0
1788  allocate(ext_node(ncols))
1789  !$omp parallel default(none), &
1790  !$omp& private(i,lid,rank,llid,idx,j), &
1791  !$omp& shared(ncols,hecMESH,cols,map,n_ext_node,ext_node,parray), &
1792  !$omp& reduction(+:n_add_node)
1793  !$omp do
1794  do i = 1, ncols
1795  lid = cols(clid,i)
1796  rank = cols(crank,i)
1797  ! check rank
1798  if (rank == hecmesh%my_rank) then ! internal: set mapping
1799  map(i) = lid
1800  else ! external
1801  !$omp atomic capture
1802  n_ext_node = n_ext_node + 1
1803  idx = n_ext_node
1804  !$omp end atomic
1805  ext_node(idx) = i
1806  endif
1807  enddo
1808  !$omp end do
1809  !$omp do
1810  do j = 1, n_ext_node
1811  i = ext_node(j)
1812  lid = cols(clid,i)
1813  rank = cols(crank,i)
1814  ! search node_ID in external nodes
1815  llid = hecmw_pair_array_find_id(parray, lid, rank)
1816  if (llid > 0) then ! found: set mapping
1817  map(i) = llid
1818  else ! not found
1819  map(i) = -1
1820  n_add_node = n_add_node + 1
1821  endif
1822  enddo
1823  !$omp end do
1824  !$omp end parallel
1825  deallocate(ext_node)
1826  !
1827  call hecmw_pair_array_finalize(parray)
1828  end subroutine map_present_nodes
1829 
1830  subroutine extract_add_nodes(ncols, cols, map, n_add_node, add_nodes)
1831  implicit none
1832  integer(kind=kint), intent(in) :: ncols
1833  integer(kind=kint), intent(in) :: cols(cNCOL_ITEM,ncols), map(ncols)
1834  integer(kind=kint), intent(inout) :: n_add_node
1835  integer(kind=kint), allocatable, intent(out) :: add_nodes(:,:)
1836  integer(kind=kint) :: cnt, i
1837  allocate(add_nodes(cncol_item,n_add_node))
1838  cnt = 0
1839  do i = 1, ncols
1840  if (map(i) == -1) then
1841  cnt = cnt + 1
1842  add_nodes(1:cncol_item,cnt) = cols(1:cncol_item,i)
1843  endif
1844  enddo
1845  if (cnt /= n_add_node) stop 'ERROR: extract add_nodes'
1846  call sort_and_uniq_add_nodes(n_add_node, add_nodes)
1847  end subroutine extract_add_nodes
1848 
1849  subroutine sort_and_uniq_add_nodes(n_add_node, add_nodes)
1850  implicit none
1851  integer(kind=kint), intent(inout) :: n_add_node
1852  integer(kind=kint), intent(inout) :: add_nodes(cNCOL_ITEM,n_add_node)
1853  integer(kind=kint) :: ndup
1854  call sort_add_nodes(add_nodes, 1, n_add_node)
1855  call uniq_add_nodes(add_nodes, n_add_node, ndup)
1856  n_add_node = n_add_node - ndup
1857  end subroutine sort_and_uniq_add_nodes
1858 
1859  recursive subroutine sort_add_nodes(add_nodes, id1, id2)
1860  implicit none
1861  integer(kind=kint), intent(inout) :: add_nodes(:,:)
1862  integer(kind=kint), intent(in) :: id1, id2
1863  integer(kind=kint) :: center, left, right
1864  integer(kind=kint) :: pivot(cNCOL_ITEM), tmp(cNCOL_ITEM)
1865  if (id1 >= id2) return
1866  center = (id1 + id2) / 2
1867  pivot(1:cncol_item) = add_nodes(1:cncol_item,center)
1868  left = id1
1869  right = id2
1870  do
1871  do while ((add_nodes(crank,left) < pivot(crank)) .or. &
1872  (add_nodes(crank,left) == pivot(crank) .and. add_nodes(clid,left) < pivot(clid)))
1873  left = left + 1
1874  enddo
1875  do while ((pivot(crank) < add_nodes(crank,right)) .or. &
1876  (pivot(crank) == add_nodes(crank,right) .and. pivot(clid) < add_nodes(clid,right)))
1877  right = right - 1
1878  enddo
1879  if (left >= right) exit
1880  tmp(1:cncol_item) = add_nodes(1:cncol_item,left)
1881  add_nodes(1:cncol_item,left) = add_nodes(1:cncol_item,right)
1882  add_nodes(1:cncol_item,right) = tmp(1:cncol_item)
1883  left = left + 1
1884  right = right - 1
1885  enddo
1886  if (id1 < left-1) call sort_add_nodes(add_nodes, id1, left-1)
1887  if (right+1 < id2) call sort_add_nodes(add_nodes, right+1, id2)
1888  return
1889  end subroutine sort_add_nodes
1890 
1891  subroutine uniq_add_nodes(add_nodes, len, ndup)
1892  implicit none
1893  integer(kind=kint), intent(inout) :: add_nodes(:,:)
1894  integer(kind=kint), intent(in) :: len
1895  integer(kind=kint), intent(out) :: ndup
1896  integer(kind=kint) :: i
1897  ndup = 0
1898  do i = 2,len
1899  if (add_nodes(clid,i) == add_nodes(clid,i-1-ndup) .and. &
1900  add_nodes(crank,i) == add_nodes(crank,i-1-ndup)) then
1901  ndup = ndup + 1
1902  else if (ndup > 0) then
1903  add_nodes(1:cncol_item,i-ndup) = add_nodes(1:cncol_item,i)
1904  endif
1905  enddo
1906  end subroutine uniq_add_nodes
1907 
1908  subroutine search_add_nodes(n_add_node, add_nodes, rank, lid, idx)
1909  implicit none
1910  integer(kind=kint), intent(in) :: n_add_node
1911  integer(kind=kint), intent(in) :: add_nodes(cNCOL_ITEM,n_add_node)
1912  integer(kind=kint), intent(in) :: rank
1913  integer(kind=kint), intent(in) :: lid
1914  integer(kind=kint), intent(out) :: idx
1915  integer(kind=kint) :: left, right, center
1916  left = 1
1917  right = n_add_node
1918  do while (left <= right)
1919  center = (left + right) / 2
1920  if ((rank == add_nodes(crank,center)) .and. (lid == add_nodes(clid,center))) then
1921  idx = center
1922  return
1923  else if ((rank < add_nodes(crank,center)) .or. &
1924  (rank == add_nodes(crank,center) .and. lid < add_nodes(clid,center))) then
1925  right = center - 1
1926  else if ((add_nodes(crank,center) < rank) .or. &
1927  (add_nodes(crank,center) == rank .and. add_nodes(clid,center) < lid)) then
1928  left = center + 1
1929  endif
1930  end do
1931  idx = -1
1932  end subroutine search_add_nodes
1933 
1934  subroutine append_nodes(hecMESHnew, n_add_node, add_nodes, i0)
1935  implicit none
1936  type (hecmwST_local_mesh), intent(inout) :: hecMESHnew
1937  integer(kind=kint), intent(in) :: n_add_node
1938  integer(kind=kint), intent(in) :: add_nodes(:,:)
1939  integer(kind=kint), intent(out) :: i0
1940  integer(kind=kint) :: n_node, i, ii
1941  integer(kind=kint), pointer :: node_ID(:), global_node_ID(:)
1942  i0 = hecmeshnew%n_node
1943  n_node = hecmeshnew%n_node + n_add_node
1944  allocate(node_id(2*n_node))
1945  allocate(global_node_id(n_node))
1946  do i = 1, hecmeshnew%n_node
1947  node_id(2*i-1) = hecmeshnew%node_ID(2*i-1)
1948  node_id(2*i ) = hecmeshnew%node_ID(2*i )
1949  global_node_id(i) = hecmeshnew%global_node_ID(i)
1950  enddo
1951  do i = 1, n_add_node
1952  ii = hecmeshnew%n_node + i
1953  node_id(2*ii-1) = add_nodes(clid,i)
1954  node_id(2*ii ) = add_nodes(crank,i)
1955  if (cncol_item >= 3) then
1956  global_node_id(i) = add_nodes(cgid,i)
1957  else
1958  global_node_id(i) = -1
1959  endif
1960  enddo
1961  deallocate(hecmeshnew%node_ID)
1962  deallocate(hecmeshnew%global_node_ID)
1963  hecmeshnew%n_node = n_node
1964  hecmeshnew%node_ID => node_id
1965  hecmeshnew%global_node_ID => global_node_id
1966  end subroutine append_nodes
1967 
1968  subroutine map_additional_nodes(ncols, cols, n_add_node, add_nodes, i0, map)
1969  implicit none
1970  integer(kind=kint), intent(in) :: ncols
1971  integer(kind=kint), intent(in) :: cols(cNCOL_ITEM,ncols)
1972  integer(kind=kint), intent(in) :: n_add_node
1973  integer(kind=kint), intent(in) :: add_nodes(cNCOL_ITEM,n_add_node)
1974  integer(kind=kint), intent(in) :: i0
1975  integer(kind=kint), intent(inout) :: map(ncols)
1976  integer(kind=kint) :: i, j
1977  do i = 1, ncols
1978  if (map(i) > 0) cycle
1979  call search_add_nodes(n_add_node, add_nodes, cols(crank,i), cols(clid,i), j)
1980  if (j == -1) stop 'ERROR: map_additional_nodes'
1981  map(i) = i0 + j
1982  enddo
1983  end subroutine map_additional_nodes
1984 
1985  subroutine update_comm_table(hecMESHnew, n_add_node, add_nodes, i0)
1986  use m_hecmw_comm_f
1987  implicit none
1988  type (hecmwST_local_mesh), intent(inout) :: hecMESHnew
1989  integer(kind=kint), intent(in) :: n_add_node
1990  integer(kind=kint), allocatable, intent(inout) :: add_nodes(:,:)
1991  integer(kind=kint), intent(in) :: i0
1992  integer(kind=kint), allocatable :: n_add_imp(:), add_imp_index(:)
1993  integer(kind=kint), allocatable :: add_imp_item_remote(:), add_imp_item_local(:)
1994  integer(kind=kint), allocatable :: n_add_exp(:), add_exp_index(:), add_exp_item(:)
1995  integer(kind=kint), allocatable :: n_new_imp(:), n_new_exp(:)
1996  integer(kind=kint) :: npe, nnb, comm, new_nnb
1997  integer(kind=kint), pointer :: nbpe(:), new_nbpe(:)
1998  integer(kind=kint), pointer :: import_index(:), export_index(:), import_item(:), export_item(:)
1999  integer(kind=kint), pointer :: new_import_index(:), new_export_index(:)
2000  integer(kind=kint), pointer :: new_import_item(:), new_export_item(:)
2001  npe = hecmeshnew%PETOT
2002  nnb = hecmeshnew%n_neighbor_pe
2003  comm = hecmeshnew%MPI_COMM
2004  nbpe => hecmeshnew%neighbor_pe
2005  import_index => hecmeshnew%import_index
2006  export_index => hecmeshnew%export_index
2007  import_item => hecmeshnew%import_item
2008  export_item => hecmeshnew%export_item
2009  !
2010  call count_add_imp_per_rank(n_add_node, add_nodes, npe, n_add_imp)
2011  if (debug >= 3) write(0,*) ' DEBUG3: count add_imp per rank done'
2012  !
2013  allocate(add_imp_index(0:npe))
2014  call make_index(npe, n_add_imp, add_imp_index)
2015  if (debug >= 3) write(0,*) ' DEBUG3: make add_imp_index done'
2016  !
2017  call make_add_imp_item(n_add_node, add_nodes, npe, i0, add_imp_index, &
2018  add_imp_item_remote, add_imp_item_local)
2019  if (debug >= 3) write(0,*) ' DEBUG3: make add_imp_item done'
2020  !
2021  deallocate(add_nodes)
2022  !
2023  ! all_to_all n_add_imp -> n_add_exp
2024  !
2025  allocate(n_add_exp(npe))
2026  call hecmw_alltoall_int(n_add_imp, 1, n_add_exp, 1, comm)
2027  if (debug >= 3) write(0,*) ' DEBUG3: alltoall n_add_imp to n_add_exp done'
2028  !
2029  allocate(add_exp_index(0:npe))
2030  call make_index(npe, n_add_exp, add_exp_index)
2031  if (debug >= 3) write(0,*) ' DEBUG3: make add_exp_index done'
2032  !
2033  call send_recv_add_imp_exp_item(npe, add_imp_index, add_imp_item_remote, &
2034  add_exp_index, add_exp_item, comm)
2035  if (debug >= 3) write(0,*) ' DEBUG3: send recv add_imp/exp_item done'
2036  !
2037  ! count new import
2038  !
2039  call count_new_comm_nodes(npe, nnb, nbpe, import_index, n_add_imp, n_new_imp)
2040  if (debug >= 3) write(0,*) ' DEBUG3: count new comm_nodes (import) done'
2041  !
2042  ! count new export
2043  !
2044  call count_new_comm_nodes(npe, nnb, nbpe, export_index, n_add_exp, n_new_exp)
2045  if (debug >= 3) write(0,*) ' DEBUG3: count new comm_nodes (export) done'
2046  !
2047  call update_neighbor_pe(npe, n_new_imp, n_new_exp, new_nnb, new_nbpe)
2048  if (debug >= 3) write(0,*) ' DEBUG3: update neighbor_pe done'
2049  !
2050  ! merge import table: import
2051  !
2052  call merge_comm_table(npe, nnb, nbpe, import_index, import_item, &
2053  new_nnb, new_nbpe, add_imp_index, add_imp_item_local, n_add_imp, n_new_imp, &
2054  new_import_index, new_import_item)
2055  if (debug >= 3) write(0,*) ' DEBUG3: merge comm_table (import) done'
2056  !
2057  deallocate(n_add_imp)
2058  deallocate(add_imp_index)
2059  deallocate(add_imp_item_remote, add_imp_item_local)
2060  deallocate(n_new_imp)
2061  !
2062  ! merge export table: export
2063  !
2064  call merge_comm_table(npe, nnb, nbpe, export_index, export_item, &
2065  new_nnb, new_nbpe, add_exp_index, add_exp_item, n_add_exp, n_new_exp, &
2066  new_export_index, new_export_item)
2067  if (debug >= 3) write(0,*) ' DEBUG3: merge comm_table (export) done'
2068  !
2069  deallocate(n_add_exp)
2070  deallocate(add_exp_index)
2071  deallocate(add_exp_item)
2072  deallocate(n_new_exp)
2073  !
2074  deallocate(nbpe)
2075  deallocate(import_index,import_item)
2076  deallocate(export_index,export_item)
2077  hecmeshnew%n_neighbor_pe = new_nnb
2078  hecmeshnew%neighbor_pe => new_nbpe
2079  hecmeshnew%import_index => new_import_index
2080  hecmeshnew%export_index => new_export_index
2081  hecmeshnew%import_item => new_import_item
2082  hecmeshnew%export_item => new_export_item
2083  end subroutine update_comm_table
2084 
2085  subroutine count_add_imp_per_rank(n_add_node, add_nodes, npe, n_add_imp)
2086  implicit none
2087  integer(kind=kint), intent(in) :: n_add_node
2088  integer(kind=kint), intent(in) :: add_nodes(cNCOL_ITEM,n_add_node)
2089  integer(kind=kint), intent(in) :: npe
2090  integer(kind=kint), allocatable, intent(out) :: n_add_imp(:)
2091  integer(kind=kint) :: i, rank
2092  allocate(n_add_imp(npe))
2093  n_add_imp(:) = 0
2094  do i = 1, n_add_node
2095  rank = add_nodes(crank,i)
2096  n_add_imp(rank+1) = n_add_imp(rank+1) + 1
2097  enddo
2098  end subroutine count_add_imp_per_rank
2099 
2100  subroutine make_add_imp_item(n_add_node, add_nodes, npe, i0, add_imp_index, &
2101  add_imp_item_remote, add_imp_item_local)
2102  implicit none
2103  integer(kind=kint), intent(in) :: n_add_node
2104  integer(kind=kint), intent(in) :: add_nodes(cNCOL_ITEM,n_add_node)
2105  integer(kind=kint), intent(in) :: npe, i0
2106  integer(kind=kint), allocatable, intent(in) :: add_imp_index(:)
2107  integer(kind=kint), allocatable, intent(out) :: add_imp_item_remote(:), add_imp_item_local(:)
2108  integer(kind=kint), allocatable :: cnt(:)
2109  integer(kind=kint) :: i, lid, rank, ipe
2110  allocate(add_imp_item_remote(add_imp_index(npe)))
2111  allocate(add_imp_item_local(add_imp_index(npe)))
2112  allocate(cnt(npe))
2113  cnt(:) = 0
2114  do i = 1, n_add_node
2115  lid = add_nodes(clid,i)
2116  rank = add_nodes(crank,i)
2117  ipe = rank + 1
2118  cnt(ipe) = cnt(ipe) + 1
2119  add_imp_item_remote(add_imp_index(ipe-1) + cnt(ipe)) = lid
2120  add_imp_item_local(add_imp_index(ipe-1) + cnt(ipe)) = i0 + i
2121  enddo
2122  deallocate(cnt)
2123  end subroutine make_add_imp_item
2124 
2125  subroutine send_recv_add_imp_exp_item(npe, add_imp_index, add_imp_item_remote, &
2126  add_exp_index, add_exp_item, mpi_comm)
2127  use m_hecmw_comm_f
2128  implicit none
2129  integer(kind=kint), intent(in) :: npe
2130  integer(kind=kint), allocatable, intent(in) :: add_imp_index(:), add_imp_item_remote(:)
2131  integer(kind=kint), allocatable, intent(in) :: add_exp_index(:)
2132  integer(kind=kint), allocatable, intent(out) :: add_exp_item(:)
2133  integer(kind=kint), intent(in) :: mpi_comm
2134  integer(kind=kint) :: n_send, i, irank, is, ie, len, tag
2135  integer(kind=kint), allocatable :: requests(:)
2136  integer(kind=kint), allocatable :: statuses(:,:)
2137  allocate(add_exp_item(add_exp_index(npe)))
2138  allocate(requests(npe))
2139  allocate(statuses(hecmw_status_size, npe))
2140  n_send = 0
2141  do i = 1, npe
2142  irank = i-1
2143  is = add_imp_index(i-1)+1
2144  ie = add_imp_index(i)
2145  len = ie - is + 1
2146  if (len == 0) cycle
2147  tag = 4001
2148  n_send = n_send + 1
2149  call hecmw_isend_int(add_imp_item_remote(is:ie), len, irank, tag, &
2150  mpi_comm, requests(n_send))
2151  enddo
2152  !
2153  do i = 1, npe
2154  irank = i-1
2155  is = add_exp_index(i-1)+1
2156  ie = add_exp_index(i)
2157  len = ie - is + 1
2158  if (len == 0) cycle
2159  tag = 4001
2160  call hecmw_recv_int(add_exp_item(is:ie), len, irank, tag, &
2161  mpi_comm, statuses(:,1))
2162  enddo
2163  call hecmw_waitall(n_send, requests, statuses)
2164  end subroutine send_recv_add_imp_exp_item
2165 
2166  subroutine count_new_comm_nodes(npe, org_nnb, org_nbpe, org_index, n_add, n_new)
2167  implicit none
2168  integer(kind=kint), intent(in) :: npe, org_nnb
2169  !integer(kind=kint), intent(in) :: org_nbpe(org_nnb), org_index(0:org_nnb), n_add(npe)
2170  integer(kind=kint), pointer, intent(in) :: org_nbpe(:), org_index(:)
2171  integer(kind=kint), intent(in) :: n_add(:)
2172  integer(kind=kint), allocatable, intent(out) :: n_new(:)
2173  integer(kind=kint) :: i, irank, n_org
2174  allocate(n_new(npe))
2175  n_new(:) = n_add(:)
2176  do i = 1, org_nnb
2177  irank = org_nbpe(i)
2178  n_org = org_index(i) - org_index(i-1)
2179  n_new(irank+1) = n_new(irank+1) + n_org
2180  enddo
2181  end subroutine count_new_comm_nodes
2182 
2183  subroutine update_neighbor_pe(npe, n_new_imp, n_new_exp, &
2184  new_nnb, new_nbpe)
2185  implicit none
2186  integer(kind=kint), intent(in) :: npe
2187  integer(kind=kint), intent(in) :: n_new_imp(npe), n_new_exp(npe)
2188  integer(kind=kint), intent(out) :: new_nnb
2189  integer(kind=kint), pointer, intent(out) :: new_nbpe(:)
2190  integer(kind=kint) :: i
2191  new_nnb = 0
2192  do i = 1, npe
2193  if (n_new_imp(i) > 0 .or. n_new_exp(i) > 0) new_nnb = new_nnb+1
2194  enddo
2195  allocate(new_nbpe(new_nnb))
2196  new_nnb = 0
2197  do i = 1, npe
2198  if (n_new_imp(i) > 0 .or. n_new_exp(i) > 0) then
2199  new_nnb = new_nnb+1
2200  new_nbpe(new_nnb) = i-1
2201  endif
2202  enddo
2203  end subroutine update_neighbor_pe
2204 
2205  subroutine merge_comm_table(npe, org_nnb, org_nbpe, org_index, org_item, &
2206  new_nnb, new_nbpe, add_index, add_item, n_add, n_new, new_index, new_item)
2207  implicit none
2208  integer(kind=kint), intent(in) :: npe, org_nnb
2209  !integer(kind=kint), intent(in) :: org_nbpe(org_nnb), org_index(0:org_nnb), org_item(:)
2210  integer(kind=kint), pointer, intent(in) :: org_nbpe(:), org_index(:), org_item(:)
2211  integer(kind=kint), intent(in) :: new_nnb
2212  !integer(kind=kint), intent(in) :: new_nbpe(new_nnb), add_index(0:npe), add_item(:)
2213  integer(kind=kint), pointer, intent(in) :: new_nbpe(:)
2214  integer(kind=kint), allocatable, intent(in) :: add_index(:), add_item(:)
2215  integer(kind=kint), intent(in) :: n_add(npe), n_new(npe)
2216  integer(kind=kint), pointer, intent(out) :: new_index(:), new_item(:)
2217  integer(kind=kint), allocatable :: cnt(:)
2218  integer(kind=kint) :: i, irank, j, jrank, i0, j0, len
2219  ! if (associated(new_index)) deallocate(new_index)
2220  ! if (associated(new_item)) deallocate(new_item)
2221  allocate(new_index(0:new_nnb))
2222  new_index(0) = 0
2223  do i = 1, new_nnb
2224  irank = new_nbpe(i)
2225  new_index(i) = new_index(i-1) + n_new(irank+1)
2226  enddo
2227  allocate(new_item(new_index(new_nnb)))
2228  allocate(cnt(npe))
2229  cnt(:) = 0
2230  j = 1
2231  jrank = new_nbpe(j)
2232  do i = 1, org_nnb
2233  if (org_index(i) - org_index(i-1) == 0) cycle
2234  irank = org_nbpe(i)
2235  do while (jrank < irank)
2236  j = j + 1
2237  if (j > new_nnb) exit
2238  jrank = new_nbpe(j)
2239  enddo
2240  if (jrank /= irank) stop 'ERROR: merging comm table: org into new'
2241  i0 = org_index(i-1)
2242  len = org_index(i) - i0
2243  j0 = new_index(j-1)
2244  new_item(j0+1:j0+len) = org_item(i0+1:i0+len)
2245  cnt(jrank+1) = len
2246  enddo
2247  j = 1
2248  jrank = new_nbpe(j)
2249  do i = 1, npe
2250  if (n_add(i) == 0) cycle
2251  irank = i-1
2252  do while (jrank < irank)
2253  j = j + 1
2254  jrank = new_nbpe(j)
2255  enddo
2256  if (jrank /= irank) stop 'ERROR: merging comm table: add into new'
2257  i0 = add_index(i-1)
2258  len = add_index(i) - i0
2259  j0 = new_index(j-1) + cnt(jrank+1)
2260  new_item(j0+1:j0+len) = add_item(i0+1:i0+len)
2261  cnt(jrank+1) = cnt(jrank+1) + len
2262  if (cnt(jrank+1) /= new_index(j)-new_index(j-1)) stop 'ERROR: merging comm table'
2263  enddo
2264  deallocate(cnt)
2265  end subroutine merge_comm_table
2266 
2267  subroutine copy_vals_to_bt_int(nnb, imp_rows_index, imp_cols_index, &
2268  imp_rows_item, map, ndof2, imp_vals_item, BT_int)
2269  implicit none
2270  integer(kind=kint), intent(in) :: nnb
2271  integer(kind=kint), allocatable, intent(in) :: imp_rows_index(:), imp_cols_index(:)
2272  integer(kind=kint), intent(in) :: imp_rows_item(:,:), map(:)
2273  integer(kind=kint), intent(in) :: ndof2
2274  real(kind=kreal), intent(in) :: imp_vals_item(:)
2275  type (hecmwST_local_matrix), intent(inout) :: BT_int
2276  integer(kind=kint), allocatable :: cnt(:)
2277  integer(kind=kint) :: idom, is, ie, ic0, i, irow, ncol, j0, j
2278  allocate(cnt(bt_int%nr))
2279  cnt(:) = 0
2280  do idom = 1, nnb
2281  is = imp_rows_index(idom-1)+1
2282  ie = imp_rows_index(idom)
2283  ic0 = imp_cols_index(idom-1)
2284  do i = is, ie
2285  irow = imp_rows_item(1,i)
2286  ncol = imp_rows_item(2,i)
2287  if (irow < 1 .or. bt_int%nr < irow) stop 'ERROR: copy vals to BT_int: irow'
2288  j0 = bt_int%index(irow-1) + cnt(irow)
2289  do j = 1, ncol
2290  bt_int%item(j0+j) = map(ic0+j)
2291  bt_int%A(ndof2*(j0+j-1)+1:ndof2*(j0+j)) = imp_vals_item(ndof2*(ic0+j-1)+1:ndof2*(ic0+j))
2292  enddo
2293  cnt(irow) = cnt(irow) + ncol
2294  ic0 = ic0 + ncol
2295  enddo
2296  if (ic0 /= imp_cols_index(idom)) stop 'ERROR: copy vals to BT_int: ic0'
2297  enddo
2298  deallocate(cnt)
2299  end subroutine copy_vals_to_bt_int
2300 
2301  subroutine sort_and_uniq_rows(BTmat)
2302  use hecmw_array_util
2303  implicit none
2304  type (hecmwST_local_matrix), intent(inout) :: BTmat
2305  integer(kind=kint) :: nr, ndof, ndof2
2306  integer(kind=kint) :: irow, is, ie, is_new, ie_new, i, i_new
2307  integer(kind=kint) :: ndup, ndup_tot
2308  integer(kind=kint) :: js, je, js_new, je_new
2309  integer(kind=kint) :: new_nnz
2310  integer(kind=kint), allocatable :: cnt(:)
2311  integer(kind=kint), pointer :: sort_item(:), new_index(:), new_item(:)
2312  real(kind=kreal), pointer :: new_a(:)
2313  logical :: sorted
2314  real(kind=kreal) :: t0, t1
2315  t0 = hecmw_wtime()
2316  nr = btmat%nr
2317  ! check if already sorted
2318  sorted = .true.
2319  outer: do irow = 1, nr
2320  is = btmat%index(irow-1)+1
2321  ie = btmat%index(irow)
2322  do i = is, ie-1
2323  if (btmat%item(i) >= btmat%item(i+1)) then
2324  sorted = .false.
2325  exit outer
2326  endif
2327  enddo
2328  end do outer
2329  t1 = hecmw_wtime()
2330  if (timer >= 4) write(0, '(A,f10.4,L2)') "####### sort_and_uniq_rows (1) : ",t1-t0,sorted
2331  t0 = hecmw_wtime()
2332  if (sorted) return
2333  ! perform sort
2334  ndof = btmat%ndof
2335  ndof2 = ndof*ndof
2336  ! duplicate item array (sort_item)
2337  allocate(sort_item(btmat%nnz))
2338  do i = 1, btmat%nnz
2339  sort_item(i) = btmat%item(i)
2340  enddo
2341  ! sort and uniq item for each row
2342  allocate(cnt(nr))
2343  ndup_tot = 0
2344  !$omp parallel do default(none), &
2345  !$omp& schedule(dynamic,1), &
2346  !$omp& private(irow,is,ie,ndup), &
2347  !$omp& shared(nr,BTmat,sort_item,cnt), &
2348  !$omp& reduction(+:ndup_tot)
2349  do irow = 1, nr
2350  is = btmat%index(irow-1)+1
2351  ie = btmat%index(irow)
2352  call hecmw_qsort_int_array(sort_item, is, ie)
2353  call hecmw_uniq_int_array(sort_item, is, ie, ndup)
2354  cnt(irow) = (ie-is+1) - ndup
2355  ndup_tot = ndup_tot + ndup
2356  enddo
2357  !$omp end parallel do
2358  t1 = hecmw_wtime()
2359  if (timer >= 4) write(0, '(A,f10.4,I5)') "####### sort_and_uniq_rows (2) : ",t1-t0,ndup_tot
2360  t0 = hecmw_wtime()
2361  ! make new index and item array (new_index, new_item)
2362  if (ndup_tot == 0) then
2363  new_index => btmat%index
2364  new_nnz = btmat%nnz
2365  new_item => sort_item
2366  else
2367  allocate(new_index(0:nr))
2368  call make_index(nr, cnt, new_index)
2369  new_nnz = new_index(nr)
2370  allocate(new_item(new_nnz))
2371  do irow = 1, nr
2372  is = btmat%index(irow-1)+1
2373  ie = is+cnt(irow)-1
2374  is_new = new_index(irow-1)+1
2375  ie_new = is_new+cnt(irow)-1
2376  new_item(is_new:ie_new) = sort_item(is:ie)
2377  enddo
2378  deallocate(sort_item)
2379  endif
2380  deallocate(cnt)
2381  t1 = hecmw_wtime()
2382  if (timer >= 4) write(0, '(A,f10.4)') "####### sort_and_uniq_rows (3) : ",t1-t0
2383  t0 = hecmw_wtime()
2384  ! allocate and clear value array (new_A)
2385  allocate(new_a(ndof2*new_nnz))
2386  new_a(:) = 0.d0
2387  ! copy/add value from old A to new A
2388  !$omp parallel do default(none), &
2389  !$omp& schedule(dynamic,1), &
2390  !$omp& private(irow,is,ie,is_new,ie_new,i,i_new,js,je,js_new,je_new), &
2391  !$omp& shared(nr,BTmat,new_index,new_item,ndof2,new_A)
2392  do irow = 1, nr
2393  is = btmat%index(irow-1)+1
2394  ie = btmat%index(irow)
2395  is_new = new_index(irow-1)+1
2396  ie_new = new_index(irow)
2397  ! for each item in row
2398  do i = is, ie
2399  ! find place in new item
2400  call hecmw_bsearch_int_array(new_item, is_new, ie_new, btmat%item(i), i_new)
2401  if (i_new == -1) stop 'ERROR: sort_and_uniq_rows'
2402  js = ndof2*(i-1)+1
2403  je = ndof2*i
2404  js_new = ndof2*(i_new-1)+1
2405  je_new = ndof2*i_new
2406  new_a(js_new:je_new) = new_a(js_new:je_new) + btmat%A(js:je)
2407  enddo
2408  enddo
2409  !$omp end parallel do
2410  t1 = hecmw_wtime()
2411  if (timer >= 4) write(0, '(A,f10.4)') "####### sort_and_uniq_rows (4) : ",t1-t0
2412  t0 = hecmw_wtime()
2413  ! deallocate/update nnz, index, item, A
2414  if (ndup_tot == 0) then
2415  deallocate(btmat%item)
2416  btmat%item => new_item
2417  deallocate(btmat%A)
2418  btmat%A => new_a
2419  else
2420  btmat%nnz = new_nnz
2421  deallocate(btmat%index)
2422  btmat%index => new_index
2423  deallocate(btmat%item)
2424  btmat%item => new_item
2425  deallocate(btmat%A)
2426  btmat%A => new_a
2427  endif
2428  end subroutine sort_and_uniq_rows
2429 
2430  subroutine hecmw_localmat_add(Amat, Bmat, Cmat)
2431  implicit none
2432  type (hecmwst_local_matrix), intent(in) :: amat
2433  type (hecmwst_local_matrix), intent(in) :: bmat
2434  type (hecmwst_local_matrix), intent(out) :: cmat
2435  integer(kind=kint) :: ndof, ndof2, nr, nc, i, icnt, js, je, j, jcol, idx, i0, k
2436  integer(kind=kint), allocatable :: iw(:)
2437  if (amat%ndof /= bmat%ndof) stop 'ERROR: hecmw_localmat_add: non-matching ndof'
2438  ndof = amat%ndof
2439  ndof2 = ndof*ndof
2440  nr = min(amat%nr, bmat%nr)
2441  nc = max(amat%nc, bmat%nc)
2442  cmat%ndof = ndof
2443  cmat%nr = nr
2444  cmat%nc = nc
2445  cmat%nnz = 0
2446  allocate(cmat%index(0:nr))
2447  cmat%index(0) = 0
2448  allocate(iw(nc))
2449  do i = 1, nr
2450  icnt = 0
2451  ! Amat
2452  js = amat%index(i-1)+1
2453  je = amat%index(i)
2454  do j = js, je
2455  jcol = amat%item(j)
2456  icnt = icnt + 1
2457  iw(icnt) = jcol
2458  enddo
2459  ! Bmat
2460  js = bmat%index(i-1)+1
2461  je = bmat%index(i)
2462  lj1: do j = js, je
2463  jcol = bmat%item(j)
2464  do k = 1, icnt
2465  if (iw(k) == jcol) cycle lj1
2466  enddo
2467  icnt = icnt + 1
2468  iw(icnt) = jcol
2469  enddo lj1
2470  cmat%index(i) = cmat%index(i-1) + icnt
2471  enddo
2472  cmat%nnz = cmat%index(nr)
2473  allocate(cmat%item(cmat%nnz))
2474  allocate(cmat%A(ndof2*cmat%nnz))
2475  do i = 1, nr
2476  i0 = cmat%index(i-1)
2477  icnt = 0
2478  ! Amat
2479  js = amat%index(i-1)+1
2480  je = amat%index(i)
2481  do j = js, je
2482  jcol = amat%item(j)
2483  icnt = icnt + 1
2484  idx = i0 + icnt
2485  cmat%item(idx) = jcol
2486  cmat%A(ndof2*(idx-1)+1:ndof2*idx) = amat%A(ndof2*(j-1)+1:ndof2*j)
2487  enddo
2488  ! Bmat
2489  js = bmat%index(i-1)+1
2490  je = bmat%index(i)
2491  lj2: do j = js, je
2492  jcol = bmat%item(j)
2493  do k = 1, icnt
2494  idx = i0 + k
2495  if (cmat%item(idx) == jcol) then
2496  cmat%A(ndof2*(idx-1)+1:ndof2*idx) = &
2497  cmat%A(ndof2*(idx-1)+1:ndof2*idx) + bmat%A(ndof2*(j-1)+1:ndof2*j)
2498  cycle lj2
2499  endif
2500  enddo
2501  icnt = icnt + 1
2502  idx = i0 + icnt
2503  cmat%item(idx) = jcol
2504  cmat%A(ndof2*(idx-1)+1:ndof2*idx) = bmat%A(ndof2*(j-1)+1:ndof2*j)
2505  enddo lj2
2506  if (i0 + icnt /= cmat%index(i)) stop 'ERROR: merge localmat'
2507  enddo
2508  call sort_and_uniq_rows(cmat)
2509  end subroutine hecmw_localmat_add
2510 
2511  ! subroutine hecmw_localmat_add(Amat, Bmat, Cmat)
2512  ! implicit none
2513  ! type (hecmwST_local_matrix), intent(in) :: Amat
2514  ! type (hecmwST_local_matrix), intent(in) :: Bmat
2515  ! type (hecmwST_local_matrix), intent(out) :: Cmat
2516  ! integer(kind=kint) :: ndof, ndof2, nr, nc, i, js, je, j, jcol, nnz_row, idx, ks, ke, k, kcol
2517  ! if (Amat%ndof /= Bmat%ndof) stop 'ERROR: hecmw_localmat_add: non-matching ndof'
2518  ! ndof = Amat%ndof
2519  ! ndof2 = ndof*ndof
2520  ! nr = min(Amat%nr, Bmat%nr)
2521  ! nc = max(Amat%nc, Bmat%nc)
2522  ! Cmat%ndof = ndof
2523  ! Cmat%nr = nr
2524  ! Cmat%nc = nc
2525  ! Cmat%nnz = Amat%index(nr) + Bmat%index(nr)
2526  ! allocate(Cmat%index(0:nr))
2527  ! allocate(Cmat%item(Cmat%nnz))
2528  ! allocate(Cmat%A(ndof2 * Cmat%nnz))
2529  ! Cmat%index(0) = 0
2530  ! idx = 0
2531  ! do i = 1, nr
2532  ! ! Amat
2533  ! js = Amat%index(i-1)+1
2534  ! je = Amat%index(i)
2535  ! do j = js, je
2536  ! idx = idx + 1
2537  ! Cmat%item(idx) = Amat%item(j)
2538  ! Cmat%A(ndof2*(idx-1)+1:ndof2*idx) = Amat%A(ndof2*(j-1)+1:ndof2*j)
2539  ! enddo
2540  ! ! Bmat
2541  ! js = Bmat%index(i-1)+1
2542  ! je = Bmat%index(i)
2543  ! do j = js, je
2544  ! idx = idx + 1
2545  ! Cmat%item(idx) = Bmat%item(j)
2546  ! Cmat%A(ndof2*(idx-1)+1:ndof2*idx) = Bmat%A(ndof2*(j-1)+1:ndof2*j)
2547  ! enddo
2548  ! Cmat%index(i) = idx
2549  ! enddo
2550  ! if (Cmat%index(nr) /= Cmat%nnz) stop 'ERROR: merge localmat'
2551  ! call sort_and_uniq_rows(Cmat)
2552  ! end subroutine hecmw_localmat_add
2553 
2554  subroutine hecmw_localmat_init_with_hecmat(BKmat, hecMAT, num_lagrange)
2555  implicit none
2556  type (hecmwst_local_matrix), intent(inout) :: bkmat
2557  type (hecmwst_matrix), intent(in) :: hecmat
2558  integer(kind=kint), optional, intent(in) :: num_lagrange
2559  integer(kind=kint) :: ndof, ndof2, i, idx, idx2, js, je, j, k
2560  integer(kind=kint), allocatable :: incl_nz(:), cnt(:)
2561  logical :: check_nonzero
2562  !check_nonzero = .false.
2563  check_nonzero = .true. !!! always checking nonzero seems to be faster
2564  !
2565  ndof = hecmat%NDOF
2566  ndof2 = ndof*ndof
2567  ! nr, nc, nnz
2568  bkmat%nr = hecmat%NP
2569  bkmat%nc = hecmat%NP
2570  bkmat%ndof = ndof
2571  !
2572  if (present(num_lagrange)) then !!! TEMPORARY (DUE TO WRONG conMAT WHEN num_lagrange==0) !!!
2573  if (num_lagrange == 0) then
2574  bkmat%nnz = 0
2575  allocate(bkmat%index(0:bkmat%nr))
2576  bkmat%index(:) = 0
2577  bkmat%item => null()
2578  bkmat%A => null()
2579  return
2580  endif
2581  check_nonzero = .true.
2582  endif
2583  !
2584  if (check_nonzero) then
2585  allocate(incl_nz(hecmat%NPL + hecmat%NPU + hecmat%NP))
2586  allocate(cnt(bkmat%nr))
2587  incl_nz(:) = 0
2588  !$omp parallel default(none), &
2589  !$omp& private(i,idx,js,je,j,k), &
2590  !$omp& shared(BKmat,hecMAT,cnt,ndof2,incl_nz)
2591  !$omp do
2592  do i = 1, bkmat%nr
2593  idx = hecmat%indexL(i-1) + (i-1) + hecmat%indexU(i-1)
2594  cnt(i) = 0
2595  ! lower
2596  js = hecmat%indexL(i-1)+1
2597  je = hecmat%indexL(i)
2598  do j = js, je
2599  idx = idx + 1
2600  do k = 1, ndof2
2601  if (hecmat%AL(ndof2*(j-1)+k) /= 0.0d0) then
2602  incl_nz(idx) = 1
2603  cnt(i) = cnt(i) + 1
2604  exit
2605  endif
2606  enddo
2607  enddo
2608  ! diag
2609  idx = idx + 1
2610  do k = 1, ndof2
2611  if (hecmat%D(ndof2*(i-1)+k) /= 0.0d0) then
2612  incl_nz(idx) = 1
2613  cnt(i) = cnt(i) + 1
2614  exit
2615  endif
2616  enddo
2617  ! upper
2618  js = hecmat%indexU(i-1)+1
2619  je = hecmat%indexU(i)
2620  do j = js, je
2621  idx = idx + 1
2622  do k = 1, ndof2
2623  if (hecmat%AU(ndof2*(j-1)+k) /= 0.0d0) then
2624  incl_nz(idx) = 1
2625  cnt(i) = cnt(i) + 1
2626  exit
2627  endif
2628  enddo
2629  enddo
2630  if (idx /= hecmat%indexL(i) + i + hecmat%indexU(i)) stop 'ERROR: hecmw_localmat_init_with_hecmat: count'
2631  enddo
2632  !$omp end do
2633  !$omp end parallel
2634  ! index
2635  allocate(bkmat%index(0:bkmat%nr))
2636  call make_index(bkmat%nr, cnt, bkmat%index)
2637  deallocate(cnt)
2638  bkmat%nnz = bkmat%index(bkmat%nr)
2639  ! item, A
2640  allocate(bkmat%item(bkmat%nnz))
2641  allocate(bkmat%A(ndof2 * bkmat%nnz))
2642  !$omp parallel default(none), &
2643  !$omp& private(i,idx,idx2,js,je,j), &
2644  !$omp& shared(BKmat,hecMAT,ndof2,incl_nz)
2645  !$omp do
2646  do i = 1, bkmat%nr
2647  idx = hecmat%indexL(i-1) + (i-1) + hecmat%indexU(i-1)
2648  idx2 = bkmat%index(i-1)
2649  ! lower
2650  js = hecmat%indexL(i-1)+1
2651  je = hecmat%indexL(i)
2652  do j = js, je
2653  idx = idx + 1
2654  if (incl_nz(idx) == 1) then
2655  idx2 = idx2 + 1
2656  bkmat%item(idx2) = hecmat%itemL(j)
2657  bkmat%A(ndof2*(idx2-1)+1:ndof2*idx2) = hecmat%AL(ndof2*(j-1)+1:ndof2*j)
2658  endif
2659  enddo
2660  ! diag
2661  idx = idx + 1
2662  if (incl_nz(idx) == 1) then
2663  idx2 = idx2 + 1
2664  bkmat%item(idx2) = i
2665  bkmat%A(ndof2*(idx2-1)+1:ndof2*idx2) = hecmat%D(ndof2*(i-1)+1:ndof2*i)
2666  endif
2667  ! upper
2668  js = hecmat%indexU(i-1)+1
2669  je = hecmat%indexU(i)
2670  do j = js, je
2671  idx = idx + 1
2672  if (incl_nz(idx) == 1) then
2673  idx2 = idx2 + 1
2674  bkmat%item(idx2) = hecmat%itemU(j)
2675  bkmat%A(ndof2*(idx2-1)+1:ndof2*idx2) = hecmat%AU(ndof2*(j-1)+1:ndof2*j)
2676  endif
2677  enddo
2678  if (idx /= hecmat%indexL(i) + i + hecmat%indexU(i)) stop 'ERROR: hecmw_localmat_init_with_hecmat: copy'
2679  if (idx2 /= bkmat%index(i)) stop 'ERROR: hecmw_localmat_init_with_hecmat: index'
2680  enddo
2681  !$omp end do
2682  !$omp end parallel
2683  deallocate(incl_nz)
2684  else
2685  bkmat%nnz = hecmat%NPL + hecmat%NP + hecmat%NPU
2686  allocate(bkmat%index(0:bkmat%nr))
2687  allocate(bkmat%item(bkmat%nnz))
2688  allocate(bkmat%A(ndof2 * bkmat%nnz))
2689  bkmat%index(0) = 0
2690  !$omp parallel do default(none), &
2691  !$omp& private(i,idx,js,je,j), &
2692  !$omp& shared(BKmat,hecMAT,ndof2)
2693  do i = 1, bkmat%nr
2694  idx = hecmat%indexL(i-1) + (i-1) + hecmat%indexU(i-1)
2695  ! lower
2696  js = hecmat%indexL(i-1)+1
2697  je = hecmat%indexL(i)
2698  do j = js, je
2699  idx = idx + 1
2700  bkmat%item(idx) = hecmat%itemL(j)
2701  bkmat%A(ndof2*(idx-1)+1:ndof2*idx) = hecmat%AL(ndof2*(j-1)+1:ndof2*j)
2702  enddo
2703  ! diag
2704  idx = idx + 1
2705  bkmat%item(idx) = i
2706  bkmat%A(ndof2*(idx-1)+1:ndof2*idx) = hecmat%D(ndof2*(i-1)+1:ndof2*i)
2707  ! upper
2708  js = hecmat%indexU(i-1)+1
2709  je = hecmat%indexU(i)
2710  do j = js, je
2711  idx = idx + 1
2712  bkmat%item(idx) = hecmat%itemU(j)
2713  bkmat%A(ndof2*(idx-1)+1:ndof2*idx) = hecmat%AU(ndof2*(j-1)+1:ndof2*j)
2714  enddo
2715  bkmat%index(i) = idx
2716  if (idx /= hecmat%indexL(i) + i + hecmat%indexU(i)) stop 'ERROR: hecmw_localmat_init_with_hecmat: copy'
2717  enddo
2718  !$omp end parallel do
2719  endif
2720  end subroutine hecmw_localmat_init_with_hecmat
2721 
2722  subroutine hecmw_localmat_add_hecmat(BKmat, hecMAT)
2723  implicit none
2724  type (hecmwst_local_matrix), intent(inout) :: bkmat
2725  type (hecmwst_matrix), intent(in) :: hecmat
2726  type (hecmwst_local_matrix) :: w1mat, w2mat
2727  !! Should Be Simple If Non-Zero Profile Is Kept !!
2728  call hecmw_localmat_init_with_hecmat(w1mat, hecmat)
2729  if (debug >= 3) then
2730  write(700+hecmw_comm_get_rank(),*) 'BKmat (hecMAT)'
2731  if (debug == 3) then
2732  call hecmw_localmat_write_ij(w1mat, 700+hecmw_comm_get_rank())
2733  else
2734  call hecmw_localmat_write(w1mat, 700+hecmw_comm_get_rank())
2735  endif
2736  endif
2737  call hecmw_localmat_add(bkmat, w1mat, w2mat)
2738  call hecmw_localmat_free(bkmat)
2739  call hecmw_localmat_free(w1mat)
2740  bkmat%nr = w2mat%nr
2741  bkmat%nc = w2mat%nc
2742  bkmat%nnz = w2mat%nnz
2743  bkmat%ndof = w2mat%ndof
2744  bkmat%index => w2mat%index
2745  bkmat%item => w2mat%item
2746  bkmat%A => w2mat%A
2747  end subroutine hecmw_localmat_add_hecmat
2748 
2749  subroutine hecmw_localmat_multmat(BKmat, BTmat, hecMESH, BKTmat)
2750  implicit none
2751  type (hecmwst_local_matrix), intent(inout) :: bkmat
2752  type (hecmwst_local_matrix), intent(inout) :: btmat
2753  type (hecmwst_local_mesh), intent(inout) :: hecmesh
2754  type (hecmwst_local_matrix), intent(out) :: bktmat
2755  type (hecmwst_matrix_comm) :: heccomm
2756  type (hecmwst_local_mesh) :: hecmeshnew
2757  type (hecmwst_local_matrix), allocatable :: bt_exp(:)
2758  type (hecmwst_local_matrix) :: bt_imp, bt_all
2759  integer(kind=kint), allocatable :: exp_cols_index(:)
2760  integer(kind=kint), allocatable :: exp_cols_item(:,:)
2761  real(kind=kreal) :: t0, t1
2762  t0 = hecmw_wtime()
2763  !
2764  call make_comm_table(bkmat, hecmesh, heccomm)
2765  if (debug >= 1) write(0,*) 'DEBUG: hecmw_localmat_multmat: make_comm_table done'
2766  t1 = hecmw_wtime()
2767  if (timer >= 2) write(0,'(A,f10.4)') '##### hecmw_localmat_multmat (1) : ',t1-t0
2768  t0 = hecmw_wtime()
2769  !
2770  if (btmat%nr > hecmesh%nn_internal) then
2771  ! consider only internal part of BTmat
2772  if (debug >= 1) write(0,'(A)') 'DEBUG: hecmw_localmat_multmat: ignore external part of BTmat'
2773  btmat%nr = hecmesh%nn_internal
2774  btmat%nnz = btmat%index(btmat%nr)
2775  endif
2776  !
2777  call extract_bt_exp(btmat, heccomm, bt_exp)
2778  if (debug >= 1) write(0,*) 'DEBUG: hecmw_localmat_multmat: extract_BT_exp done'
2779  t1 = hecmw_wtime()
2780  if (timer >= 2) write(0,'(A,f10.4)') '##### hecmw_localmat_multmat (2) : ',t1-t0
2781  t0 = hecmw_wtime()
2782  !
2783  call prepare_column_info(hecmesh, bt_exp, exp_cols_index, exp_cols_item)
2784  if (debug >= 1) write(0,*) 'DEBUG: hecmw_localmat_multmat: prepare column info done'
2785  t1 = hecmw_wtime()
2786  if (timer >= 2) write(0,'(A,f10.4)') '##### hecmw_localmat_multmat (3) : ',t1-t0
2787  t0 = hecmw_wtime()
2788  !
2789  call send_bt_exp_and_recv_bt_imp(hecmesh, heccomm, bt_exp, exp_cols_index, exp_cols_item, bt_imp, hecmeshnew)
2790  if (debug >= 1) write(0,*) 'DEBUG: hecmw_localmat_multmat: send BT_exp and recv BT_imp done'
2791  t1 = hecmw_wtime()
2792  if (timer >= 2) write(0,'(A,f10.4)') '##### hecmw_localmat_multmat (4) : ',t1-t0
2793  t0 = hecmw_wtime()
2794  call free_comm_table(heccomm)
2795  !
2796  call concat_btmat_and_bt_imp(btmat, bt_imp, bt_all)
2797  if (debug >= 1) write(0,*) 'DEBUG: hecmw_localmat_multmat: concat BTmat and BT_imp into BT_all done'
2798  t1 = hecmw_wtime()
2799  if (timer >= 2) write(0,'(A,f10.4)') '##### hecmw_localmat_multmat (5) : ',t1-t0
2800  t0 = hecmw_wtime()
2801  call hecmw_localmat_free(bt_imp)
2802  !
2803  call multiply_mat_mat(bkmat, bt_all, bktmat)
2804  if (debug >= 1) write(0,*) 'DEBUG: hecmw_localmat_multmat: multiply BKmat and BT_all into BKTmat done'
2805  t1 = hecmw_wtime()
2806  if (timer >= 2) write(0,'(A,f10.4)') '##### hecmw_localmat_multmat (6) : ',t1-t0
2807  t0 = hecmw_wtime()
2808  call hecmw_localmat_free(bt_all)
2809  !
2810  if (hecmesh%n_neighbor_pe > 0) then
2811  hecmesh%n_node = hecmeshnew%n_node
2812  hecmesh%n_neighbor_pe = hecmeshnew%n_neighbor_pe
2813  deallocate(hecmesh%neighbor_pe)
2814  deallocate(hecmesh%import_index)
2815  deallocate(hecmesh%export_index)
2816  deallocate(hecmesh%import_item)
2817  deallocate(hecmesh%export_item)
2818  deallocate(hecmesh%node_ID)
2819  deallocate(hecmesh%global_node_ID)
2820  hecmesh%neighbor_pe => hecmeshnew%neighbor_pe
2821  hecmesh%import_index => hecmeshnew%import_index
2822  hecmesh%export_index => hecmeshnew%export_index
2823  hecmesh%import_item => hecmeshnew%import_item
2824  hecmesh%export_item => hecmeshnew%export_item
2825  hecmesh%node_ID => hecmeshnew%node_ID
2826  hecmesh%global_node_ID => hecmeshnew%global_node_ID
2827  if (debug >= 1) write(0,*) 'DEBUG: hecmw_localmat_multmat: update hecMESH done'
2828  t1 = hecmw_wtime()
2829  if (timer >= 2) write(0,'(A,f10.4)') '##### hecmw_localmat_multmat (7) : ',t1-t0
2830  endif
2831  end subroutine hecmw_localmat_multmat
2832 
2833  subroutine make_comm_table(BKmat, hecMESH, hecCOMM)
2834  use m_hecmw_comm_f
2835  implicit none
2836  type (hecmwst_local_matrix), intent(in) :: bkmat
2837  type (hecmwst_local_mesh), intent(in) :: hecmesh
2838  type (hecmwst_matrix_comm), intent(out) :: heccomm
2839  integer(kind=kint) :: nn_int, nn_ext, nnb, i, icol, irank, idom, idx, n_send, tag, js, je, len
2840  integer(kind=kint), allocatable :: is_nz_col(:), imp_cnt(:), exp_cnt(:), import_item_remote(:)
2841  integer(kind=kint), allocatable :: requests(:), statuses(:,:)
2842  heccomm%zero = hecmesh%zero
2843  heccomm%HECMW_COMM = hecmesh%MPI_COMM
2844  heccomm%PETOT = hecmesh%PETOT
2845  heccomm%PEsmpTOT = hecmesh%PEsmpTOT
2846  heccomm%my_rank = hecmesh%my_rank
2847  heccomm%errnof = hecmesh%errnof
2848  heccomm%n_subdomain = hecmesh%n_subdomain
2849  heccomm%n_neighbor_pe = hecmesh%n_neighbor_pe
2850  allocate(heccomm%neighbor_pe(heccomm%n_neighbor_pe))
2851  heccomm%neighbor_pe(:) = hecmesh%neighbor_pe(:)
2852  !
2853  nn_int = hecmesh%nn_internal
2854  nn_ext = hecmesh%n_node - hecmesh%nn_internal
2855  nnb = heccomm%n_neighbor_pe
2856  !
2857  ! check_external_nz_cols (by profile (not number))
2858  allocate(is_nz_col(nn_ext))
2859  is_nz_col(:) = 0
2860  do i = 1, bkmat%index(nn_int)
2861  icol = bkmat%item(i)
2862  if (icol > nn_int) is_nz_col(icol - nn_int) = 1
2863  enddo
2864  !
2865  ! count_nz_cols_per_rank
2866  allocate(imp_cnt(nnb))
2867  imp_cnt(:) = 0
2868  do i = 1, nn_ext
2869  if (is_nz_col(i) == 1) then
2870  irank = hecmesh%node_ID(2*(nn_int+i))
2871  call rank_to_idom(hecmesh, irank, idom)
2872  imp_cnt(idom) = imp_cnt(idom) + 1
2873  endif
2874  enddo
2875  if (debug >= 3) write(0,*) ' DEBUG3: imp_cnt',imp_cnt(:)
2876  !
2877  ! make_index
2878  allocate(heccomm%import_index(0:nnb))
2879  call make_index(nnb, imp_cnt, heccomm%import_index)
2880  if (debug >= 3) write(0,*) ' DEBUG3: import_index',heccomm%import_index(:)
2881  !
2882  ! fill item
2883  allocate(heccomm%import_item(heccomm%import_index(nnb)))
2884  imp_cnt(:) = 0
2885  do i = 1, nn_ext
2886  if (is_nz_col(i) == 1) then
2887  irank = hecmesh%node_ID(2*(nn_int+i))
2888  call rank_to_idom(hecmesh, irank, idom)
2889  imp_cnt(idom) = imp_cnt(idom) + 1
2890  idx = heccomm%import_index(idom-1)+imp_cnt(idom)
2891  heccomm%import_item(idx) = nn_int+i
2892  endif
2893  enddo
2894  if (debug >= 3) write(0,*) ' DEBUG3: import_item',heccomm%import_item(:)
2895  !
2896  allocate(import_item_remote(heccomm%import_index(nnb)))
2897  do i = 1, heccomm%import_index(nnb)
2898  import_item_remote(i) = hecmesh%node_ID(2*heccomm%import_item(i)-1)
2899  enddo
2900  if (debug >= 3) write(0,*) ' DEBUG3: import_item_remote',import_item_remote(:)
2901  !
2902  allocate(requests(2*nnb))
2903  allocate(statuses(hecmw_status_size, 2*nnb))
2904  !
2905  ! send/recv
2906  n_send = 0
2907  do idom = 1, nnb
2908  irank = heccomm%neighbor_pe(idom)
2909  n_send = n_send + 1
2910  tag = 6001
2911  call hecmw_isend_int(imp_cnt(idom), 1, irank, tag, heccomm%HECMW_COMM, requests(n_send))
2912  if (imp_cnt(idom) > 0) then
2913  js = heccomm%import_index(idom-1)+1
2914  je = heccomm%import_index(idom)
2915  len = je-js+1
2916  n_send = n_send + 1
2917  tag = 6002
2918  call hecmw_isend_int(import_item_remote(js:je), len, irank, tag, &
2919  heccomm%HECMW_COMM, requests(n_send))
2920  endif
2921  enddo
2922  !
2923  ! index
2924  allocate(exp_cnt(nnb))
2925  do idom = 1, nnb
2926  irank = heccomm%neighbor_pe(idom)
2927  tag = 6001
2928  call hecmw_recv_int(exp_cnt(idom), 1, irank, tag, heccomm%HECMW_COMM, statuses(:,1))
2929  enddo
2930  allocate(heccomm%export_index(0:nnb))
2931  call make_index(nnb, exp_cnt, heccomm%export_index)
2932  if (debug >= 3) write(0,*) ' DEBUG3: export_index',heccomm%export_index(:)
2933  !
2934  ! item
2935  allocate(heccomm%export_item(heccomm%export_index(nnb)))
2936  do idom = 1, nnb
2937  if (exp_cnt(idom) <= 0) cycle
2938  irank = heccomm%neighbor_pe(idom)
2939  js = heccomm%export_index(idom-1)+1
2940  je = heccomm%export_index(idom)
2941  len = je-js+1
2942  tag = 6002
2943  call hecmw_recv_int(heccomm%export_item(js:je), len, irank, tag, &
2944  heccomm%HECMW_COMM, statuses(:,1))
2945  enddo
2946  if (debug >= 3) write(0,*) ' DEBUG3: export_item',heccomm%export_item(:)
2947  call hecmw_waitall(n_send, requests, statuses)
2948  !
2949  deallocate(imp_cnt)
2950  deallocate(exp_cnt)
2951  deallocate(import_item_remote)
2952  end subroutine make_comm_table
2953 
2954  subroutine free_comm_table(hecCOMM)
2955  implicit none
2956  type (hecmwST_matrix_comm), intent(inout) :: hecCOMM
2957  deallocate(heccomm%neighbor_pe)
2958  deallocate(heccomm%import_index)
2959  deallocate(heccomm%import_item)
2960  deallocate(heccomm%export_index)
2961  deallocate(heccomm%export_item)
2962  end subroutine free_comm_table
2963 
2964  subroutine extract_bt_exp(BTmat, hecCOMM, BT_exp)
2965  implicit none
2966  type (hecmwST_local_matrix), intent(in) :: BTmat
2967  type (hecmwST_matrix_comm), intent(in) :: hecCOMM
2968  type (hecmwST_local_matrix), allocatable, intent(out) :: BT_exp(:)
2969  integer(kind=kint) :: ndof, ndof2, idom, idx_0, idx_n, j, jrow, nnz_row, idx, ks, ke, k
2970  if (heccomm%n_neighbor_pe == 0) return
2971  allocate(bt_exp(heccomm%n_neighbor_pe))
2972  ndof = btmat%ndof
2973  ndof2 = ndof * ndof
2974  do idom = 1, heccomm%n_neighbor_pe
2975  idx_0 = heccomm%export_index(idom-1)
2976  idx_n = heccomm%export_index(idom)
2977  bt_exp(idom)%nr = idx_n - idx_0
2978  bt_exp(idom)%nc = btmat%nc
2979  bt_exp(idom)%nnz = 0
2980  bt_exp(idom)%ndof = ndof
2981  allocate(bt_exp(idom)%index(0:bt_exp(idom)%nr))
2982  bt_exp(idom)%index(0) = 0
2983  do j = 1, bt_exp(idom)%nr
2984  jrow = heccomm%export_item(idx_0 + j)
2985  nnz_row = btmat%index(jrow) - btmat%index(jrow-1)
2986  bt_exp(idom)%index(j) = bt_exp(idom)%index(j-1) + nnz_row
2987  enddo
2988  bt_exp(idom)%nnz = bt_exp(idom)%index(bt_exp(idom)%nr)
2989  allocate(bt_exp(idom)%item(bt_exp(idom)%nnz))
2990  allocate(bt_exp(idom)%A(ndof2 * bt_exp(idom)%nnz))
2991  idx = 0
2992  do j = 1, bt_exp(idom)%nr
2993  jrow = heccomm%export_item(idx_0 + j)
2994  ks = btmat%index(jrow-1) + 1
2995  ke = btmat%index(jrow)
2996  do k = ks, ke
2997  idx = idx + 1
2998  bt_exp(idom)%item(idx) = btmat%item(k)
2999  bt_exp(idom)%A(ndof2*(idx-1)+1:ndof2*idx) = btmat%A(ndof2*(k-1)+1:ndof2*k)
3000  enddo
3001  if (idx /= bt_exp(idom)%index(j)) stop 'ERROR: extract BT_exp'
3002  enddo
3003  enddo
3004  end subroutine extract_bt_exp
3005 
3006  subroutine send_bt_exp_and_recv_bt_imp(hecMESH, hecCOMM, BT_exp, exp_cols_index, exp_cols_item, BT_imp, hecMESHnew)
3007  use m_hecmw_comm_f
3008  implicit none
3009  type (hecmwST_local_mesh), intent(in) :: hecMESH
3010  type (hecmwST_matrix_comm), intent(in) :: hecCOMM
3011  type (hecmwST_local_matrix), allocatable, intent(inout) :: BT_exp(:)
3012  integer(kind=kint), allocatable, intent(inout) :: exp_cols_index(:)
3013  integer(kind=kint), allocatable, intent(inout) :: exp_cols_item(:,:)
3014  type (hecmwST_local_matrix), intent(out) :: BT_imp
3015  type (hecmwST_local_mesh), intent(inout) :: hecMESHnew
3016  integer(kind=kint), allocatable :: nnz_imp(:), cnt(:), index_imp(:)
3017  integer(kind=kint), allocatable :: imp_cols_index(:)
3018  integer(kind=kint), allocatable :: imp_cols_item(:,:)
3019  real(kind=kreal), allocatable :: imp_vals_item(:)
3020  integer(kind=kint) :: nnb, ndof, ndof2, idom, irank, nr, n_send, tag, idx_0, idx_n, j, jj, nnz
3021  integer(kind=kint), allocatable :: requests(:)
3022  integer(kind=kint), allocatable :: statuses(:,:)
3023  integer(kind=kint), allocatable :: map(:), add_nodes(:,:)
3024  integer(kind=kint) :: n_add_node, i0
3025  nnb = heccomm%n_neighbor_pe
3026  if (nnb == 0) then
3027  bt_imp%nr = 0
3028  bt_imp%nc = 0
3029  bt_imp%nnz = 0
3030  bt_imp%ndof = 0
3031  allocate(bt_imp%index(0:0))
3032  bt_imp%index(0) = 0
3033  return
3034  endif
3035  ndof = bt_exp(1)%ndof
3036  ndof2 = ndof*ndof
3037  allocate(requests(nnb*3))
3038  allocate(statuses(hecmw_status_size, nnb*3))
3039  n_send = 0
3040  do idom = 1, nnb
3041  irank = heccomm%neighbor_pe(idom)
3042  nr = bt_exp(idom)%nr
3043  if (nr == 0) cycle
3044  n_send = n_send + 1
3045  tag = 3001
3046  call hecmw_isend_int(bt_exp(idom)%index(0:bt_exp(idom)%nr), bt_exp(idom)%nr + 1, &
3047  irank, tag, heccomm%HECMW_COMM, requests(n_send))
3048  if (bt_exp(idom)%nnz == 0) cycle
3049  n_send = n_send + 1
3050  tag = 3002
3051  call hecmw_isend_int(exp_cols_item(1,exp_cols_index(idom-1)+1), &
3052  cncol_item * bt_exp(idom)%nnz, irank, tag, heccomm%HECMW_COMM, requests(n_send))
3053  n_send = n_send + 1
3054  tag = 3003
3055  call hecmw_isend_r(bt_exp(idom)%A, ndof2 * bt_exp(idom)%nnz, &
3056  irank, tag, heccomm%HECMW_COMM, requests(n_send))
3057  enddo
3058  !
3059  ! BT_imp%nr = hecCOMM%import_index(nnb)
3060  bt_imp%nr = hecmesh%n_node - hecmesh%nn_internal
3061  bt_imp%nc = 0 !!! TEMPORARY
3062  bt_imp%nnz = 0
3063  bt_imp%ndof = ndof
3064  !
3065  allocate(nnz_imp(nnb))
3066  allocate(cnt(bt_imp%nr))
3067  !
3068  cnt(:) = 0
3069  do idom = 1, nnb
3070  irank = heccomm%neighbor_pe(idom)
3071  idx_0 = heccomm%import_index(idom-1)
3072  idx_n = heccomm%import_index(idom)
3073  nr = idx_n - idx_0
3074  if (nr == 0) then
3075  nnz_imp(idom) = 0
3076  cycle
3077  endif
3078  allocate(index_imp(0:nr))
3079  tag = 3001
3080  call hecmw_recv_int(index_imp(0:nr), nr+1, irank, tag, &
3081  heccomm%HECMW_COMM, statuses(:,1))
3082  nnz_imp(idom) = index_imp(nr)
3083  do j = 1, nr
3084  jj = heccomm%import_item(idx_0 + j) - hecmesh%nn_internal
3085  if (jj < 1 .or. bt_imp%nr < jj) stop 'ERROR: jj out of range'
3086  if (cnt(jj) /= 0) stop import rows?'
3087  cnt(jj) = index_imp(j) - index_imp(j-1)
3088  enddo
3089  deallocate(index_imp)
3090  enddo
3091  !
3092  allocate(imp_cols_index(0:nnb))
3093  call make_index(nnb, nnz_imp, imp_cols_index)
3094  deallocate(nnz_imp)
3095  !
3096  allocate(BT_imp%index(0:BT_imp%nr))
3097  call make_index(BT_imp%nr, cnt, BT_imp%index)
3098  deallocate(cnt)
3099  !
3100  BT_imp%nnz = BT_imp%index(BT_imp%nr)
3101  if (BT_imp%nnz /= imp_cols_index(nnb)) &
3102  stop 'error: total num of nonzero of bt_imp'
3103  !
3104  allocate(imp_cols_item(cNCOL_ITEM, BT_imp%nnz))
3105  allocate(imp_vals_item(ndof2 * BT_imp%nnz))
3106  !
3107  do idom = 1, nnb
3108  irank = hecCOMM%neighbor_pe(idom)
3109  idx_0 = imp_cols_index(idom-1)
3110  idx_n = imp_cols_index(idom)
3111  nnz = idx_n - idx_0
3112  if (nnz == 0) cycle
3113  tag = 3002
3114  call HECMW_RECV_INT(imp_cols_item(1, idx_0 + 1), cNCOL_ITEM * nnz, &
3115  irank, tag, hecCOMM%HECMW_COMM, statuses(:,1))
3116  tag = 3003
3117  call HECMW_RECV_R(imp_vals_item(ndof2*idx_0 + 1), ndof2 * nnz, &
3118  irank, tag, hecCOMM%HECMW_COMM, statuses(:,1))
3119  enddo
3120  call HECMW_Waitall(n_send, requests, statuses)
3121  if (DEBUG >= 2) write(0,*) ' debug2: send bt_imp and recv into temporary data done'
3122  !
3123  deallocate(requests)
3124  deallocate(statuses)
3125  !
3126  do idom = 1, nnb
3127  call hecmw_localmat_free(BT_exp(idom))
3128  enddo
3129  deallocate(BT_exp)
3130  deallocate(exp_cols_index)
3131  deallocate(exp_cols_item)
3132  !
3133  call copy_mesh(hecMESH, hecMESHnew)
3134  !
3135  call map_imported_cols(hecMESHnew, imp_cols_index(nnb), imp_cols_item, n_add_node, add_nodes, map, i0)
3136  if (DEBUG >= 2) write(0,*) ' debug2: map imported cols done'
3137  !
3138  call update_comm_table(hecMESHnew, n_add_node, add_nodes, i0)
3139  if (DEBUG >= 2) write(0,*) ' debug2: update comm_table done'
3140  !
3141  BT_imp%nc = hecMESHnew%n_node
3142  !
3143  allocate(BT_imp%item(BT_imp%nnz))
3144  allocate(BT_imp%A(ndof2 * BT_imp%nnz))
3145  call copy_vals_to_BT_imp(hecCOMM, hecMESH%nn_internal, imp_cols_index, map, imp_vals_item, BT_imp)
3146  if (DEBUG >= 2) write(0,*) ' debug2: copy vals to bt_imp done'
3147  !
3148  deallocate(imp_cols_index)
3149  deallocate(imp_cols_item)
3150  deallocate(imp_vals_item)
3151  deallocate(map)
3152  end subroutine send_BT_exp_and_recv_BT_imp
3153 
3154  subroutine copy_vals_to_BT_imp(hecCOMM, nn_internal, imp_cols_index, map, imp_vals_item, BT_imp)
3155  implicit none
3156  type (hecmwST_matrix_comm), intent(in) :: hecCOMM
3157  integer(kind=kint), intent(in) :: nn_internal
3158  integer(kind=kint), allocatable, intent(in) :: imp_cols_index(:)
3159  integer(kind=kint), intent(in) :: map(:)
3160  real(kind=kreal), intent(in) :: imp_vals_item(:)
3161  type (hecmwST_local_matrix), intent(inout) :: BT_imp
3162  integer(kind=kint) :: nnb, ndof2, idx, idom, idx_0, idx_n, nr, j, jrow, ks, ke, k
3163  nnb = hecCOMM%n_neighbor_pe
3164  ndof2 = BT_imp%ndof ** 2
3165  idx = 0
3166  do idom = 1, nnb
3167  idx_0 = hecCOMM%import_index(idom-1)
3168  idx_n = hecCOMM%import_index(idom)
3169  nr = idx_n - idx_0
3170  if (nr == 0) cycle
3171  do j = 1, nr
3172  jrow = hecCOMM%import_item(idx_0 + j) - nn_internal
3173  ks = BT_imp%index(jrow-1)+1
3174  ke = BT_imp%index(jrow)
3175  do k = ks, ke
3176  idx = idx + 1
3177  BT_imp%item(k) = map(idx)
3178  BT_imp%A(ndof2*(k-1)+1:ndof2*k) = imp_vals_item(ndof2*(idx-1)+1:ndof2*idx)
3179  enddo
3180  enddo
3181  if (idx /= imp_cols_index(idom)) stop 'error: copy vals to bt_imp'
3182  enddo
3183  end subroutine copy_vals_to_BT_imp
3184 
3185  subroutine concat_BTmat_and_BT_imp(BTmat, BT_imp, BT_all)
3186  implicit none
3187  type (hecmwST_local_matrix), intent(in) :: BTmat
3188  type (hecmwST_local_matrix), intent(in) :: BT_imp
3189  type (hecmwST_local_matrix), intent(out) :: BT_all
3190  integer(kind=kint) :: ndof, ndof2, i, ii
3191  ndof = BTmat%ndof
3192 .and. if (BT_imp%nr > 0 BT_imp%ndof /= ndof) stop 'error: concat btmat and bt_imp: ndof'
3193  ndof2 = ndof*ndof
3194  BT_all%nr = BTmat%nr + BT_imp%nr
3195  BT_all%nc = max(BTmat%nc, BT_imp%nc)
3196  BT_all%nnz = BTmat%nnz + BT_imp%nnz
3197  BT_all%ndof = ndof
3198  allocate(BT_all%index(0:BT_all%nr))
3199  allocate(BT_all%item(BT_all%nnz))
3200  allocate(BT_all%A(ndof2 * BT_all%nnz))
3201  BT_all%index(0) = 0
3202  do i = 1, BTmat%nr
3203  BT_all%index(i) = BTmat%index(i)
3204  enddo
3205  do i = 1, BT_imp%nr
3206  BT_all%index(BTmat%nr+i) = BT_all%index(BTmat%nr+i-1) + &
3207  BT_imp%index(i) - BT_imp%index(i-1)
3208  enddo
3209  do i = 1, BTmat%nnz
3210  BT_all%item(i) = BTmat%item(i)
3211  BT_all%A(ndof2*(i-1)+1:ndof2*i) = BTmat%A(ndof2*(i-1)+1:ndof2*i)
3212  enddo
3213  do i = 1, BT_imp%nnz
3214  ii = BTmat%nnz + i
3215  BT_all%item(ii) = BT_imp%item(i)
3216  BT_all%A(ndof2*(ii-1)+1:ndof2*ii) = BT_imp%A(ndof2*(i-1)+1:ndof2*i)
3217  enddo
3218  end subroutine concat_BTmat_and_BT_imp
3219 
3220  subroutine multiply_mat_mat(Amat, Bmat, Cmat)
3221  implicit none
3222  type (hecmwST_local_matrix), intent(in) :: Amat
3223  type (hecmwST_local_matrix), intent(in) :: Bmat
3224  type (hecmwST_local_matrix), intent(out) :: Cmat
3225  integer(kind=kint) :: ndof, ndof2, nr, nc, nnz, i, icnt
3226  integer(kind=kint) :: js, je, j, jj, ks, ke, k, kk, l, ll, l0
3227  integer(kind=kint), allocatable :: iw(:)
3228  real(kind=kreal), pointer :: Ap(:), Bp(:), Cp(:)
3229  real(kind=kreal) :: t0, t1
3230  t0 = hecmw_wtime()
3231  if (Amat%ndof /= Bmat%ndof) stop 'error: multiply_mat_mat: unmatching ndof'
3232  ndof = Amat%ndof
3233  ndof2 = ndof*ndof
3234  nr = Amat%nr
3235  nc = Bmat%nc
3236  if (Amat%nc /= Bmat%nr) then
3237  write(0,*) 'amat: nr, nc = ', Amat%nr, Amat%nc
3238  write(0,*) 'bmat: nr, nc = ', Bmat%nr, Bmat%nc
3239  stop 'error: multiply_mat_mat: unmatching size'
3240  endif
3241  Cmat%ndof = ndof
3242  Cmat%nr = nr
3243  Cmat%nc = nc
3244  allocate(Cmat%index(0:nr))
3245  Cmat%index(0) = 0
3246  !$omp parallel default(none), &
3247  !$omp& private(iw,i,icnt,js,je,j,jj,ks,ke,k,kk,l), &
3248  !$omp& shared(nr,nc,Amat,Bmat,Cmat)
3249  allocate(iw(nc))
3250  !$omp do
3251  do i = 1, nr
3252  icnt = 0
3253  js = Amat%index(i-1)+1
3254  je = Amat%index(i)
3255  do j = js, je
3256  jj = Amat%item(j)
3257  ks = Bmat%index(jj-1)+1
3258  ke = Bmat%index(jj)
3259  kl1: do k = ks, ke
3260  kk = Bmat%item(k)
3261  do l = 1, icnt
3262  if (iw(l) == kk) cycle kl1
3263  enddo
3264  icnt = icnt + 1
3265  iw(icnt) = kk
3266  enddo kl1
3267  enddo
3268  Cmat%index(i) = icnt
3269  enddo
3270  !$omp end do
3271  deallocate(iw)
3272  !$omp end parallel
3273  do i = 1, nr
3274  Cmat%index(i) = Cmat%index(i-1) + Cmat%index(i)
3275  enddo
3276  nnz = Cmat%index(nr)
3277  Cmat%nnz = nnz
3278  !write(0,*) 'nnz',nnz
3279  t1 = hecmw_wtime()
3280  if (TIMER >= 3) write(0, '(a,f10.4)') "###### multiply_mat_mat (1) : ",t1-t0
3281  t0 = hecmw_wtime()
3282  allocate(Cmat%item(nnz))
3283  allocate(Cmat%A(ndof2 * nnz))
3284  Cmat%A(:) = 0.0d0
3285  !$omp parallel default(none), &
3286  !$omp& private(i,icnt,l0,js,je,j,jj,Ap,ks,ke,k,kk,Bp,ll,l,Cp), &
3287  !$omp& shared(nr,Cmat,Amat,Bmat,ndof2,ndof)
3288  !$omp do
3289  do i = 1, nr
3290  icnt = 0
3291  l0 = Cmat%index(i-1)
3292  ! item
3293  js = Amat%index(i-1)+1
3294  je = Amat%index(i)
3295  do j = js, je
3296  jj = Amat%item(j)
3297  Ap => Amat%A(ndof2*(j-1)+1:ndof2*j)
3298  ks = Bmat%index(jj-1)+1
3299  ke = Bmat%index(jj)
3300  do k = ks, ke
3301  kk = Bmat%item(k)
3302  Bp => Bmat%A(ndof2*(k-1)+1:ndof2*k)
3303  ll = -1
3304  do l = 1, icnt
3305  if (Cmat%item(l0+l) == kk) then
3306  ll = l0 + l
3307  exit
3308  endif
3309  enddo
3310  if (ll < 0) then
3311  icnt = icnt + 1
3312  ll = l0 + icnt
3313  Cmat%item(ll) = kk
3314  endif
3315  Cp => Cmat%A(ndof2*(ll-1)+1:ndof2*ll)
3316  call blk_matmul_add(ndof, Ap, Bp, Cp)
3317  enddo
3318  enddo
3319  !write(0,*) 'l0,icnt,index(i)',Cmat%index(i-1),icnt,Cmat%index(i)
3320  if (l0+icnt /= Cmat%index(i)) stop 'error: multiply_mat_mat: unknown error'
3321  enddo
3322  !$omp end do
3323  !$omp end parallel
3324  t1 = hecmw_wtime()
3325  if (TIMER >= 3) write(0, '(a,f10.4)') "###### multiply_mat_mat (2) : ",t1-t0
3326  t0 = hecmw_wtime()
3327  call sort_and_uniq_rows(Cmat)
3328  t1 = hecmw_wtime()
3329  if (TIMER >= 3) write(0, '(a,f10.4)') "###### multiply_mat_mat (3) : ",t1-t0
3330  end subroutine multiply_mat_mat
3331 
3332  subroutine blk_matmul_add(ndof, A, B, AB)
3333  implicit none
3334  integer, intent(in) :: ndof
3335  real(kind=kreal), intent(in) :: A(:), B(:)
3336  real(kind=kreal), intent(inout) :: AB(:)
3337  integer :: ndof2, i, j, k, i0, j0, ij, ik, jk
3338  ndof2=ndof*ndof
3339  do i=1,ndof
3340  i0=(i-1)*ndof
3341  do j=1,ndof
3342  ij=i0+j
3343  j0=(j-1)*ndof
3344  do k=1,ndof
3345  ik=i0+k
3346  jk=j0+k
3347  !$omp atomic
3348  AB(ik)=AB(ik)+A(ij)*B(jk)
3349  enddo
3350  enddo
3351  enddo
3352  end subroutine blk_matmul_add
3353 
3354  subroutine hecmw_localmat_make_hecmat(hecMAT, BTtKTmat, hecTKT)
3355  implicit none
3356  type (hecmwST_matrix), intent(in) :: hecMAT
3357  type (hecmwST_local_matrix), intent(in) :: BTtKTmat
3358  type (hecmwST_matrix), intent(inout) :: hecTKT
3359  call make_new_hecmat(hecMAT, BTtKTmat, hecTKT)
3360  end subroutine hecmw_localmat_make_hecmat
3361 
3362  subroutine hecmw_localmat_shrink_comm_table(BKmat, hecMESH)
3363  implicit none
3364  type (hecmwST_local_matrix), intent(in) :: BKmat
3365  type (hecmwST_local_mesh), intent(inout) :: hecMESH
3366  type (hecmwST_matrix_comm) :: hecCOMM
3367  call make_comm_table(BKmat, hecMESH, hecCOMM)
3368  deallocate(hecMESH%import_index)
3369  deallocate(hecMESH%import_item)
3370  deallocate(hecMESH%export_index)
3371  deallocate(hecMESH%export_item)
3372  hecMESH%import_index => hecCOMM%import_index
3373  hecMESH%import_item => hecCOMM%import_item
3374  hecMESH%export_index => hecCOMM%export_index
3375  hecMESH%export_item => hecCOMM%export_item
3376  deallocate(hecCOMM%neighbor_pe)
3377  end subroutine hecmw_localmat_shrink_comm_table
3378 
3379 end module hecmw_local_matrix
subroutine, public hecmw_bsearch_int_array(array, istart, iend, val, idx)
recursive subroutine, public hecmw_qsort_int_array(array, istart, iend)
subroutine, public hecmw_uniq_int_array(array, istart, iend, ndup)
subroutine, public hecmw_localmat_shrink_comm_table(BKmat, hecMESH)
subroutine, public hecmw_localmat_init_with_hecmat(BKmat, hecMAT, num_lagrange)
subroutine trimatmul_ttkt(BTtmat, hecMAT, BTmat, BTtKT)
subroutine, public hecmw_trimatmul_ttkt(hecMESH, BTtmat, hecMAT, BTmat, iwS, num_lagrange, hecTKT)
subroutine, public hecmw_localmat_free(Tmat)
subroutine allocate_bt_int(hecMESH, ndof, imp_rows_index, imp_rows_item, BT_int)
subroutine, public hecmw_localmat_add(Amat, Bmat, Cmat)
subroutine count_new_comm_nodes(npe, org_nnb, org_nbpe, org_index, n_add, n_new)
subroutine, public hecmw_localmat_transpose(Tmat, Ttmat)
subroutine free_comm_table(hecCOMM)
subroutine, public hecmw_localmat_multmat(BKmat, BTmat, hecMESH, BKTmat)
subroutine send_recv_bt_ext_contents(hecMESH, BT_ext, exp_rows_index, exp_cols_index, exp_rows_item, exp_cols_item, imp_rows_index, imp_cols_index, imp_rows_item, imp_cols_item, imp_vals_item)
subroutine, public hecmw_localmat_mulvec(BTmat, V, TV)
subroutine count_add_imp_per_rank(n_add_node, add_nodes, npe, n_add_imp)
subroutine hecmw_trimatmul_ttkt_parallel(hecMESH, BTtmat, hecMAT, BTmat, iwS, num_lagrange, hecTKT)
subroutine, public hecmw_localmat_write(Tmat, iunit)
subroutine, public hecmw_localmat_blocking(Tmat, ndof, BTmat)
subroutine, public hecmw_localmat_make_hecmat(hecMAT, BTtKTmat, hecTKT)
subroutine, public hecmw_trimatmul_ttkt_mpc(hecMESH, hecMAT, hecTKT)
subroutine hecmw_trimatmul_ttkt_serial(hecMESH, BTtmat, hecMAT, BTmat, iwS, num_lagrange, hecTKT)
subroutine, public hecmw_localmat_assemble(BTmat, hecMESH, hecMESHnew)
subroutine, public hecmw_localmat_add_hecmat(BKmat, hecMAT)
subroutine, public hecmw_pair_array_append(parray, id, i1, i2)
subroutine, public hecmw_pair_array_finalize(parray)
integer(kind=kint) function, public hecmw_pair_array_find_id(parray, i1, i2)
subroutine, public hecmw_pair_array_init(parray, max_num)
subroutine, public hecmw_pair_array_sort(parray)
I/O and Utility.
Definition: hecmw_util_f.F90:7
integer(kind=4), parameter kreal
integer(kind=kint), parameter hecmw_status_size
integer(kind=kint) function hecmw_comm_get_rank()
real(kind=kreal) function hecmw_wtime()
subroutine hecmw_isend_int(sbuf, sc, dest, tag, comm, req)
subroutine hecmw_recv_int(rbuf, rc, source, tag, comm, stat)
subroutine hecmw_isend_r(sbuf, sc, dest, tag, comm, req)
subroutine hecmw_waitall(cnt, reqs, stats)
subroutine hecmw_recv_r(rbuf, rc, source, tag, comm, stat)
subroutine hecmw_alltoall_int(sbuf, sc, rbuf, rc, comm)