PROGRESS  master
prg_graphsolver_mod.F90
Go to the documentation of this file.
1 
6 
7  use bml
11  use prg_graph_mod
12  use prg_system_mod
13  use prg_timer_mod
14  use prg_extras_mod
17  use prg_genz_mod
18 
19  implicit none
20 
21  private !Everything is private by default
22 
23  integer, parameter :: dp = kind(1.0d0)
24 
26 
27 contains
28 
39  subroutine prg_build_densitygp_t0(ham_bml, g_bml, rho_bml, threshold, bndfil, &
40  & Ef, nparts, verbose)
41 
42  integer, parameter :: dp = kind(1.0d0)
43  type(bml_matrix_t), intent(in) :: ham_bml, g_bml
44  type(bml_matrix_t), intent(out) :: rho_bml
45  real(dp) :: bndfil, threshold
46  integer :: myrank, norbs, nnodes, tnnz, i, vsize(2), inorbs
47  integer, intent(inout) :: nparts
48  real(dp), intent(inout) :: ef
49  character(20) :: bml_type
50  integer, allocatable :: xadj(:), adjncy(:), vector(:)
51  integer, allocatable :: part(:), core_count(:), halo_count(:,:), ch_count(:)
52  type(graph_partitioning_t) :: gpat
53  real(dp) :: maxch, pnorm=6, smooth_maxch, sumcubes, mlsi, mlsii
54  type(system_type), allocatable :: syprt(:)
55  integer, optional, intent(in) :: verbose
56 
57  real(dp) :: error, dnocc_part, dnocc, nocc
58 
59  ! Initialize progress MPI and get ranks
60  if(getnranks() == 0) call prg_progress_init()
61  !call prg_progress_init()
62  myrank = getmyrank() + 1
63  bml_type = bml_get_type(ham_bml)
64 
65  ! Allocate bml matrices
66  norbs = bml_get_n(ham_bml)
67  if(.not. bml_allocated(rho_bml))then
68  call bml_zero_matrix(bml_type,bml_element_real,dp,norbs,norbs,rho_bml)
69  endif
70 
71  ! Using Metis for graph partitioning
72  allocate(xadj(norbs+1)) ! Adjacency in csr format
73 
74  tnnz = 0
75  do i=1,norbs
76  tnnz = tnnz + bml_get_row_bandwidth(g_bml,i)
77  enddo
78 
79  allocate(adjncy(tnnz+1))
80 
81  call bml_adjacency(g_bml, xadj, adjncy, 1)
82 
83  nnodes = norbs
84 
85  allocate(part(nnodes))
86  allocate(core_count(nparts))
87  allocate(ch_count(nparts))
88  allocate(halo_count(nparts, nnodes))
89 
90  call prg_metispartition(gpat, nnodes, nnodes, xadj, adjncy, nparts, part, core_count,&
91  ch_count, halo_count, sumcubes, maxch, smooth_maxch, pnorm)
92 
93  if(present(verbose))then
94  if(verbose > 1 .and. myrank == 1)then
95  write(*,*)"gpat = ",gpat%pname
96  write(*,*)"totalParts = ",gpat%totalParts
97  write(*,*)"totalNodes = ",gpat%totalNodes
98  write(*,*)"partMin = ",gpat%localPartMin(1)
99  write(*,*)"partMax = ",gpat%localPartMax(1)
100  endif
101  endif
102 
103  allocate(syprt(gpat%TotalParts))
104 
105  call prg_wait
106 
107  if(present(verbose) .and. (verbose > 1 .and. myrank == 1)) mlsi = mls()
108 
109  dnocc = 0.0_dp
110  ! Extract core halo indices from partitions
111  do i=1,gpat%TotalParts
112  call bml_matrix2submatrix_index(g_bml,&
113  gpat%sgraph(i)%nodeInPart,gpat%nnodesInPart(i),&
114  gpat%sgraph(i)%core_halo_index, &
115  vsize,.true.)
116  gpat%sgraph(i)%lsize = vsize(1)
117  gpat%sgraph(i)%llsize = vsize(2)
118  if(present(verbose))then
119  if(verbose > 1 .and. myrank == 1)then
120  write(*,*) 'nodeInpart size', size(gpat%sgraph(i)%nodeInPart)
121  write(*,*) 'nodeInpart(i)', gpat%nnodesInPart(i)
122  write(*,*)"Core and CH sizes:",vsize(2),vsize(1)
123  endif
124  endif
125  enddo
126 
127  ! Construct DM for every part
128  do i = gpat%localPartMin(myrank), gpat%localPartMax(myrank)
129  ! do i=1,gpat%TotalParts ! If no MPI
130  call bml_matrix2submatrix_index(g_bml,&
131  gpat%sgraph(i)%nodeInPart,gpat%nnodesInPart(i),&
132  gpat%sgraph(i)%core_halo_index, &
133  vsize,.true.)
134  gpat%sgraph(i)%lsize = vsize(1)
135  gpat%sgraph(i)%llsize = vsize(2)
136  inorbs = vsize(1)
137  call bml_zero_matrix("dense",bml_element_real,dp,inorbs,inorbs,syprt(i)%estr%ham)
138  if(allocated(vector))deallocate(vector)
139  allocate(vector(gpat%sgraph(i)%lsize))
140  vector(:) = gpat%sgraph(i)%core_halo_index(1:gpat%sgraph(i)%lsize)
141  call bml_matrix2submatrix(ham_bml, syprt(i)%estr%ham, vector, &
142  & inorbs)
143 
144  !Computing the density matrix with diagonalization
145  mlsii = mls()
146  call bml_zero_matrix("dense",bml_element_real,dp,inorbs,inorbs,syprt(i)%estr%rho)
147  call prg_build_density_t_fermi(syprt(i)%estr%ham,syprt(i)%estr%rho,bndfil, 0.1_dp, ef, 0, dnocc_part)
148  dnocc = dnocc + dnocc_part
149 
150  !call bml_print_matrix("rho_part_bml",syprt(i)%estr%rho,0,10,0,10)
151  if(present(verbose))then
152  if(verbose > 1 .and. myrank == 1) write(*,*)"Time for prg_build_density_T0",mls()-mlsii
153  endif
154  call bml_submatrix2matrix(syprt(i)%estr%rho,rho_bml,vector,gpat%sgraph(i)%lsize,gpat%sgraph(i)%llsize,threshold)
155  enddo
156 
157  ! Collect the small DM matrices into its full system corresponding
158  call prg_partordering(gpat)
159  call prg_collectmatrixfromparts(gpat, rho_bml)
160  call prg_wait
161 
162  nocc = bml_trace(rho_bml)
163  error = abs(nocc-2*bndfil*norbs)
164  if (nocc < 2*bndfil*norbs) dnocc = - dnocc
165 
166  if(present(verbose))then
167  if(verbose > 1 .and. myrank == 1) write(6,'(A,10es15.6)') 'Ef, Nocc, delta_nocc, gradient', &
168  ef, nocc, error, dnocc
169  endif
170 
171  if (error > 1.e-6_dp .and. abs(dnocc) > 1.e-6_dp) then
172  if ( error > 1.0_dp ) then
173  ef = ef - nocc/dnocc
174  else
175  ef = ef - error / dnocc
176  endif
177  endif
178 
179  if(present(verbose))then
180  if(verbose > 1 .and. myrank == 1) write(*,*)"Total time for graph solver =",mls()-mlsi
181  endif
182 
183  do i = gpat%localPartMin(myrank), gpat%localPartMax(myrank)
184  call bml_deallocate(syprt(i)%estr%ham)
185  call bml_deallocate(syprt(i)%estr%rho)
186  enddo
187  deallocate(syprt)
188 
189  !call prg_progress_shutdown
190  if(getnranks() == 0) call prg_progress_shutdown()
191  end subroutine prg_build_densitygp_t0
192 
193 
202  subroutine prg_build_zmatgp(over_bml, g_bml, zmat_bml, threshold, &
203  & nparts, verbose)
204 
205  integer, parameter :: dp = kind(1.0d0)
206  type(bml_matrix_t), intent(in) :: over_bml, g_bml
207  type(bml_matrix_t), intent(out) :: zmat_bml
208  real(dp) :: threshold
209  integer :: myrank, norbs, nnodes, tnnz, i, vsize(2), inorbs
210  integer, intent(inout) :: nparts
211  character(20) :: bml_type
212  integer, allocatable :: xadj(:), adjncy(:), vector(:)
213  integer, allocatable :: part(:), core_count(:), halo_count(:,:), ch_count(:)
214  type(graph_partitioning_t) :: gpat
215  real(dp) :: maxch, pnorm=6, smooth_maxch, sumcubes, mlsi, mlsii
216  type(system_type), allocatable :: syprt(:)
217  integer, optional, intent(in) :: verbose
218 
219  ! Initialize progress MPI and get ranks
220  if(getnranks() == 0) call prg_progress_init()
221  !call prg_progress_init()
222  myrank = getmyrank() + 1
223 
224  bml_type = bml_get_type(over_bml)
225 
226  ! Allocate bml matrices
227  norbs = bml_get_n(over_bml)
228  if(.not. bml_allocated(zmat_bml))then
229  call bml_zero_matrix(bml_type,bml_element_real,dp,norbs,norbs,zmat_bml)
230  endif
231 
232  ! Using Metis to do graph partitioning
233  allocate(xadj(norbs+1)) ! Adjacency in csr format
234 
235  tnnz = 0
236  do i=1,norbs
237  tnnz = tnnz + bml_get_row_bandwidth(g_bml,i)
238  enddo
239 
240  allocate(adjncy(tnnz+1))
241 
242  call bml_adjacency(g_bml, xadj, adjncy, 1)
243 
244  nnodes = norbs
245 
246  allocate(part(nnodes))
247  allocate(core_count(nparts))
248  allocate(ch_count(nparts))
249  allocate(halo_count(nparts, nnodes))
250 
251  call prg_metispartition(gpat, nnodes, nnodes, xadj, adjncy, nparts, part, core_count,&
252  ch_count, halo_count, sumcubes, maxch, smooth_maxch, pnorm)
253 
254  if(present(verbose))then
255  if(verbose > 1 .and. myrank == 1)then
256  write(*,*)"gpat = ",gpat%pname
257  write(*,*)"totalParts = ",gpat%totalParts
258  write(*,*)"totalNodes = ",gpat%totalNodes
259  write(*,*)"partMin = ",gpat%localPartMin(1)
260  write(*,*)"partMax = ",gpat%localPartMax(1)
261  endif
262  endif
263  allocate(syprt(gpat%TotalParts))
264 
265  call prg_wait
266 
267  mlsi = mls()
268 
269  ! Extract core halo indices from partitions
270  do i=1,gpat%TotalParts
271  call bml_matrix2submatrix_index(g_bml,&
272  gpat%sgraph(i)%nodeInPart,gpat%nnodesInPart(i),&
273  gpat%sgraph(i)%core_halo_index, &
274  vsize,.true.)
275  gpat%sgraph(i)%lsize = vsize(1)
276  gpat%sgraph(i)%llsize = vsize(2)
277  if(present(verbose))then
278  if(verbose > 1 .and. myrank == 1)then
279  write(*,*) 'nodeInpart size', size(gpat%sgraph(i)%nodeInPart)
280  write(*,*) 'nodeInpart(i)', gpat%nnodesInPart(i)
281  write(*,*)"Core and CH sizes:",vsize(2),vsize(1)
282  endif
283  endif
284  enddo
285 
286  ! Construct Zmat for every part
287  do i = gpat%localPartMin(myrank), gpat%localPartMax(myrank)
288  ! do i=1,gpat%TotalParts ! If no MPI
289  call bml_matrix2submatrix_index(g_bml,&
290  gpat%sgraph(i)%nodeInPart,gpat%nnodesInPart(i),&
291  gpat%sgraph(i)%core_halo_index, &
292  vsize,.true.)
293  gpat%sgraph(i)%lsize = vsize(1)
294  gpat%sgraph(i)%llsize = vsize(2)
295  inorbs = vsize(1)
296  call bml_zero_matrix("dense",bml_element_real,dp,inorbs,inorbs,syprt(i)%estr%over)
297  if(allocated(vector))deallocate(vector)
298  allocate(vector(gpat%sgraph(i)%lsize))
299  vector(:) = gpat%sgraph(i)%core_halo_index(1:gpat%sgraph(i)%lsize)
300  call bml_matrix2submatrix(over_bml, syprt(i)%estr%over, vector, &
301  & inorbs)
302 
303  !Computing the zmatrix with diagonalization
304  mlsii = mls()
305  call bml_zero_matrix("dense",bml_element_real,dp,inorbs,inorbs,syprt(i)%estr%zmat)
306  call prg_buildzdiag(syprt(i)%estr%over,syprt(i)%estr%zmat,threshold,inorbs,"dense")
307 
308  if(present(verbose))then
309  if(verbose > 1 .and. myrank == 1) write(*,*)"Time for prg_build_density_T0",mls()-mlsii
310  endif
311  call bml_submatrix2matrix(syprt(i)%estr%zmat,zmat_bml,vector,gpat%sgraph(i)%lsize,gpat%sgraph(i)%llsize,threshold)
312  enddo
313 
314  ! Collect the small DM matrices into its full system corresponding
315  call prg_partordering(gpat)
316  call prg_collectmatrixfromparts(gpat, zmat_bml)
317  call prg_wait
318 
319  if(present(verbose))then
320  if(verbose > 1 .and. myrank == 1)write(*,*)"Total time for graph solver =",mls()-mlsi
321  endif
322 
323  do i = gpat%localPartMin(myrank), gpat%localPartMax(myrank)
324  call bml_deallocate(syprt(i)%estr%over)
325  call bml_deallocate(syprt(i)%estr%zmat)
326  enddo
327  deallocate(syprt)
328 
329  !call prg_progress_shutdown
330  if(getnranks() == 0) call prg_progress_shutdown()
331  end subroutine prg_build_zmatgp
332 
333 end module prg_graphsolver_mod
Module to obtain the density matrix by diagonalizing an orthogonalized Hamiltonian.
subroutine, public prg_build_density_t_fermi(ham_bml, rho_bml, threshold, kbt, ef, verbose, drho)
Builds the density matrix from for electronic temperature T. Where, is the matrix eigenvector and ...
Extra routines.
real(dp) function, public mls()
To get the actual time in milliseconds.
To produce a matrix which is needed to orthogonalize .
Definition: prg_genz_mod.F90:6
subroutine, public prg_buildzdiag(smat_bml, zmat_bml, threshold, mdimin, bml_type, verbose, err_out)
Usual subroutine involving diagonalization. , where = eigenvectors and = eigenvalues....
The graph module.
Module for graph-based solvers.
subroutine, public prg_build_densitygp_t0(ham_bml, g_bml, rho_bml, threshold, bndfil, Ef, nparts, verbose)
Builds the density matrix from using a graph-based approach.
subroutine, public prg_build_zmatgp(over_bml, g_bml, zmat_bml, threshold, nparts, verbose)
Builds the inverse overlap factor matrix from using a graph-based approach.
The parallel module.
integer function, public getnranks()
integer function, public getmyrank()
subroutine, public prg_wait()
The partition module.
subroutine, public prg_metispartition(gp, ngroups, nnodes, xadj, adjncy, nparts, part, core_count, CH_count, Halo_count, sumCubes, maxCH, smooth_maxCH, pnorm)
Create graph partitions minizing number of cut edges.
The progress module.
subroutine, public prg_progress_init()
Initialize progress.
subroutine, public prg_progress_shutdown()
Shutdown progress.
The subgraphloop module.
subroutine, public prg_partordering(gp)
Set row ordering bases on parts.
subroutine, public prg_collectmatrixfromparts(gp, rho_bml)
Collect distributed parts into same matrix.
A module to read and handle chemical systems.
The timer module.