PROGRESS  master
prg_subgraphloop_mod.F90
Go to the documentation of this file.
1 
2 !
3 !
4 
6 
7  use prg_graph_mod
8  use prg_sp2_mod
9  use prg_timer_mod
11  use bml
12  use omp_lib
13 
14  implicit none
15 
16  private !Everything is private by default
17 
18  integer, parameter :: dp = kind(1.0d0)
19 
20  public :: prg_subgraphsp2loop
21  public :: prg_balanceparts
22  public :: prg_partordering
26 
27 contains
28 
29  !! Subgraph SP2 loop.
30  !!
31  !! \param h_bml Input Hamiltonian matrix
32  !! \param g_bml Input Graph as matrix
33  !! \param rho_bml Output density matrix
34  !! \param gp Input/Output graph partitioning
35  !! \param threshold threshold for matrix operations
36  subroutine prg_subgraphsp2loop(h_bml, g_bml, rho_bml, gp, threshold)
37 
38  type (bml_matrix_t), intent(in) :: h_bml, g_bml
39  type (bml_matrix_t), intent(inout) :: rho_bml
40  type (graph_partitioning_t), intent(inout) :: gp
41  real(dp), intent(in) :: threshold
42 
43  integer :: i, j, k
44  integer, allocatable :: vsize(:)
45  type (bml_matrix_t) :: x_bml
46  real(dp) :: thresh0
47 
48  allocate(vsize(2))
49 
50  thresh0 = 0.0_dp
51 
53 
54  ! Determine elements for each subgraph
55  !$omp parallel do default(none) &
56  !$omp private(i, vsize) &
57  !$omp shared(gp, h_bml, g_bml)
58  do i = 1, gp%totalParts
59 
60  call bml_matrix2submatrix_index(g_bml, &
61  gp%sgraph(i)%nodeInPart, gp%nnodesInPart(i), &
62  gp%sgraph(i)%core_halo_index, &
63  vsize, .true., h_bml)
64 
65  gp%sgraph(i)%lsize = vsize(1)
66  gp%sgraph(i)%llsize = vsize(2)
67 
68  enddo
69  !$omp end parallel do
70 
71  ! do i = 1, gp%totalParts
72  ! write(*,*)"i = ", i, " core size = ", gp%sgraph(i)%llsize, &
73  ! " full size = ", gp%sgraph(i)%lsize
74  ! write(*,*)"nnodes = ", gp%nnodesInPart(i), &
75  ! (gp%sgraph(i)%nodeInPart(k),k=1,gp%nnodesInPart(i))
76  ! enddo
77 
79 
80  deallocate(vsize)
81 
82  ! Balance parts by size of subgraph
83  if (getnranks() > 1) then
84  call prg_balanceparts(gp)
85  call prg_partordering(gp)
86  endif
87 
88  ! Process each part one at a time
89  !do i = 1, gp%nparts
90  do i = gp%localPartMin(gp%myRank+1), gp%localPartMax(gp%myRank+1)
91 
92  call bml_zero_matrix(bml_matrix_dense, bml_element_real, dp, &
93  gp%sgraph(i)%lsize, gp%sgraph(i)%lsize, x_bml);
94 
95  ! Extract subgraph and prg_normalize
97  call bml_matrix2submatrix(h_bml, x_bml, &
98  gp%sgraph(i)%core_halo_index, gp%sgraph(i)%lsize)
100 
101  ! Run SP2 on subgraph/submatrix
103  !call prg_sp2_submatrix_inplace(x_bml, threshold, gp%pp, &
104  call prg_sp2_submatrix_inplace(x_bml, thresh0, gp%pp, &
105  gp%maxIter, gp%vv, gp%mineval, gp%maxeval, &
106  gp%sgraph(i)%llsize)
108 
109  ! Reassemble into density matrix
111  call bml_submatrix2matrix(x_bml, rho_bml, &
112  gp%sgraph(i)%core_halo_index, gp%sgraph(i)%lsize, &
113  gp%sgraph(i)%llsize, threshold)
115 
116  call bml_deallocate(x_bml)
117 
118  enddo
119 
120  ! Fnorm
121  call prg_fnormgraph(gp)
122 
123  ! Collect density matrix over local and distributed parts
124  call prg_collectmatrixfromparts(gp, rho_bml)
125 
126  end subroutine prg_subgraphsp2loop
127 
132  subroutine prg_collectmatrixfromparts(gp, rho_bml)
133 
134  implicit none
135 
136  type (graph_partitioning_t), intent(inout) :: gp
137  type (bml_matrix_t), intent(inout) :: rho_bml
138 
139  if (getnranks() > 1) then
140 
141  ! Save original domain
142  call bml_save_domain(rho_bml)
143 
144  ! Update density matrix domain based on partitions of orbitals
145  ! for gather
146  call bml_update_domain(rho_bml, gp%localPartMin, gp%localPartMax, &
147  gp%nnodesInPart)
148  call bml_reorder(rho_bml, gp%reorder);
149 
150  ! Exchange/gather density matrix across ranks
151  call bml_allgathervparallel(rho_bml)
152 
153  ! Reorder density matrix to match original
154  call bml_reorder(rho_bml, gp%order)
155 
156  ! Restore to original domain
157  call bml_restore_domain(rho_bml)
158 
159  endif
160 
161  end subroutine prg_collectmatrixfromparts
162 
163  !! Balance subgraphs by size.
164  subroutine prg_balanceparts(gp)
165 
166  type (graph_partitioning_t), intent(inout) :: gp
167 
168  type (subgraph_t), allocatable :: temp_sgraph(:)
169  type (subgraph_t) :: temp
170  integer :: nranks, myrank, nparts, avgparts
171  integer :: i, j, rid, sid, onum, pnode, ip
172 
173  integer :: tempp
174  integer, allocatable :: tsize(:), torder(:)
175 
176  nranks = getnranks()
177  myrank = getmyrank()
178 
179  nparts = gp%totalParts
180  avgparts = nparts / nranks
181 
182  ! Sort parts by core+halo size
183  allocate(temp_sgraph(nparts))
184  allocate(tsize(nparts))
185  allocate(torder(nparts))
186 
187  do i = 1, nparts
188  temp_sgraph(i) = gp%sgraph(i)
189  torder(i) = i
190  tsize(i) = temp_sgraph(i)%llsize
191  enddo
192 
193  !! Sort parts by core+halo size
194  do i = 1, nparts-1
195  do j = i, nparts
196  if (tsize(i) .lt. tsize(j)) then
197  tempp = tsize(i)
198  tsize(i) = tsize(j)
199  tsize(j) = tempp
200 
201  tempp = torder(i)
202  torder(i) = torder(j)
203  torder(j) = tempp
204  endif
205  enddo
206  enddo
207 
208  !! Print ordered subgraph sizes after sorting
209  ! if (printRank() .eq. 1) then
210  ! write(*,*) "after sort:"
211  ! do i = 1, nParts
212  ! write(*,*)"order ", i, " part = ", temp_sgraph(i)%part, " size = ", &
213  ! temp_sgraph(i)%lsize
214  ! enddo
215  ! endif
216 
219  rid = 1
220  do i = 1, nranks
221  sid = sid + 1
222  do j = i, nparts, nranks
223  gp%sgraph(rid) = temp_sgraph(torder(j))
224  !write(*,*) "rank = ", i, " part = "), rid, " from = ", j
225  rid = rid + 1
226  sid = sid + 1
227  enddo
228  if (sid .le. gp%localPartExtent(i)) then
229  gp%sgraph(rid) = temp_sgraph(torder(nranks*avgparts+i))
230  !write(*,*) "rank = ", i, " part = ", rid, " from = ", nranks*avgparts+i
231  rid = rid + 1
232  endif
233  enddo
234 
235  !! Print ordered subgraph sizes after renumbering
236  ! if (printRank() .eq. 1) then
237  ! write(*,*) "after renumber:"
238  ! do i = 1, nParts
239  ! write(*,*)"order ", i, " part = ", gp%sgraph(i)%part, &
240  ! " core = ", gp%sgraph(i)%llsize, &
241  ! " size = ", gp%sgraph(i)%lsize, &
242  ! " min node = ", minval(gp%sgraph(i)%nodeInPart) + 1, &
243  ! " max node = ", maxval(gp%sgraph(i)%nodeInPart) + 1
244  ! enddo
245  ! endif
246 
247  !! Reset number of nodes per part
248  do i = 1, nparts
249  gp%nnodesInPart(i) = gp%sgraph(i)%llsize
250  gp%nnodesInPartAll(i) = gp%sgraph(i)%llsize
251  enddo
252 
253  deallocate(temp_sgraph)
254  deallocate(tsize)
255  deallocate(torder)
256 
257  end subroutine prg_balanceparts
258 
262  subroutine prg_partordering(gp)
263 
264  type (graph_partitioning_t), intent(inout) :: gp
265 
266  integer :: i, j, onum, pnode, nparts
267 
268  nparts = gp%totalParts
269 
270  !! Set ordering and reordering based on partitions
271  onum = 0
272  do i = 1, nparts
273  !!write(*,*) i, " Part ", ip, " nnodes = ", gp%sgraph(i)%llsize
274  do j = 1, gp%sgraph(i)%llsize
275  pnode = gp%sgraph(i)%nodeInPart(j)
276  !!write(*,*) " ", j, " Node ", pnode, " to ", onum
277  gp%reorder(pnode+1) = onum
278  gp%order(onum+1) = pnode
279  onum = onum + 1
280  enddo
281  enddo
282 
283  end subroutine prg_partordering
284 
291  subroutine prg_getgrouppartitionhalosfromgraph(gp, g_bml, hnode, djflag)
292 
293  implicit none
294 
295  type (graph_partitioning_t), intent(inout) :: gp
296  type (bml_matrix_t), intent(in) :: g_bml
297  integer, intent(in) :: hnode(*)
298  logical, intent(in) :: djflag
299 
300  integer :: i, j, inode
301  integer, allocatable :: vsize(:)
302 
303  allocate(vsize(2))
304 
306  !$omp parallel do default(none) &
307  !$omp private(i, j, inode, vsize) &
308  !$omp shared(gp, g_bml, hnode, djflag)
309  do i = 1, gp%totalParts
310 
311  do j = 1, gp%nnodesInPart(i)
312  inode = gp%sgraph(i)%nodeInPart(j)
313  gp%sgraph(i)%nodeInPart(j) = hnode(inode+1) - 1
314  enddo
315 
316  call bml_matrix2submatrix_index(g_bml, &
317  gp%sgraph(i)%nodeInPart, gp%nnodesInPart(i), &
318  gp%sgraph(i)%core_halo_index, &
319  vsize, djflag)
320 
321  gp%sgraph(i)%lsize = vsize(1)
322  gp%sgraph(i)%llsize = vsize(2)
323 
324  enddo
325  !$omp end parallel do
326 
327  deallocate(vsize)
328 
330 
336  subroutine prg_getpartitionhalosfromgraph(gp, g_bml, djflag)
337 
338  implicit none
339 
340  type (graph_partitioning_t), intent(inout) :: gp
341  type (bml_matrix_t), intent(in) :: g_bml
342  logical, intent(in) :: djflag
343 
344  integer :: i
345  integer, allocatable :: vsize(:)
346 
347  allocate(vsize(2))
348 
350  !$omp parallel do default(none) &
351  !$omp private(i, vsize) &
352  !$omp shared(gp, g_bml, djflag)
353  do i = 1, gp%totalParts
354 
355  call bml_matrix2submatrix_index(g_bml, &
356  gp%sgraph(i)%nodeInPart, gp%nnodesInPart(i), &
357  gp%sgraph(i)%core_halo_index, &
358  vsize, djflag)
359 
360  gp%sgraph(i)%lsize = vsize(1)
361  gp%sgraph(i)%llsize = vsize(2)
362 
363  enddo
364  !$omp end parallel do
365 
366  deallocate(vsize)
367 
368  end subroutine prg_getpartitionhalosfromgraph
369 
370 end module prg_subgraphloop_mod
The graph module.
integer, parameter dp
subroutine, public prg_fnormgraph(gp)
Accumulate trace norm across all subgraphs.
The parallel module.
integer function, public getnranks()
integer function, public getmyrank()
The SP2 module.
Definition: prg_sp2_mod.F90:7
subroutine, public prg_sp2_submatrix_inplace(rho_bml, threshold, pp, icount, vv, mineval, maxeval, core_size)
The subgraphloop module.
subroutine, public prg_getpartitionhalosfromgraph(gp, g_bml, djflag)
Get core+halo indeces for all partitions only using the graph.
subroutine, public prg_partordering(gp)
Set row ordering bases on parts.
subroutine, public prg_subgraphsp2loop(h_bml, g_bml, rho_bml, gp, threshold)
subroutine, public prg_getgrouppartitionhalosfromgraph(gp, g_bml, hnode, djflag)
Get core+halo indeces for all partitions only using the graph.
subroutine, public prg_balanceparts(gp)
subroutine, public prg_collectmatrixfromparts(gp, rho_bml)
Collect distributed parts into same matrix.
The timer module.
subroutine, public prg_timer_start(itimer, tag)
Start Timing.
integer, public subind_timer
integer, public subext_timer
subroutine, public prg_timer_stop(itimer, verbose)
Stop timing.
integer, public suball_timer
integer, public subsp2_timer