PROGRESS  master
prg_sp2_mod.F90
Go to the documentation of this file.
1 
7 module prg_sp2_mod
8 
9  use bml
11  use prg_timer_mod
13 
14  implicit none
15 
16  private !Everything is private by default
17 
18  integer, parameter :: dp = kind(1.0d0)
19  integer, parameter :: dp1 = kind(1.0)
20 
21  public :: prg_sp2_basic
22  public :: prg_sp2_alg1
23  public :: prg_sp2_alg1_genseq
24  public :: prg_sp2_alg1_seq
26  public :: prg_sp2_alg2
27  public :: prg_sp2_alg2_genseq
28  public :: prg_sp2_alg2_seq
30  public :: prg_sp2_submatrix
32  public :: prg_sp2_basic_tcore
33 
34 contains
35 
49  subroutine prg_sp2_basic(h_bml,rho_bml,threshold,bndfil,minsp2iter,maxsp2iter &
50  ,sp2conv,idemtol,verbose)
51 
52  implicit none
53  integer :: hdim,iter,minsp2iter,maxsp2iter
54  integer :: verbose
55  real(dp) :: trx, occ, trx2
56  real(dp) :: bndfil
57  real(dp) :: threshold
58  real(dp) :: idemtol
59  real(dp), allocatable :: trace(:)
60  character(len=*), intent(in) :: sp2conv
61  type(bml_matrix_t), intent(inout) :: rho_bml
62  type(bml_matrix_t), intent(in) :: h_bml
63  type(bml_matrix_t) :: x2_bml
64 
65 
66  hdim = bml_get_n(h_bml) !to get it from the bml matrix
67 
68  ! we're also using niklasson's scheme to determine convergence
69  occ = bndfil*real(hdim,dp)
70 
71  ! Normalize
72  call bml_copy(h_bml, rho_bml)
73  call prg_normalize(rho_bml)
74 
75  allocate(trace(2))
76 
77  ! X2 <- X
78  call bml_copy_new(rho_bml, x2_bml)
79 
80  do iter = 0, maxsp2iter
81 
82  trx = bml_trace(rho_bml)
83 
84  ! X2 <- X * X
85  call bml_multiply_x2(rho_bml, x2_bml, threshold, trace)
86 
87  trx2 = trace(2)
88 
89 #ifdef DO_MPI_BLOCK
90 
91  if (getnranks() > 1) then
92  call prg_sumrealreduce2(trx, trx2)
93  endif
94 #endif
95 
96  if(verbose.gt.1 .and. printrank() .eq. 1) write(*,*)"sp2iter", iter, occ, trx, abs(occ-trx)
97 
98  if(trx - occ .le. 0.0_dp) then
99 
100  ! X <- 2 * X - X2
101  call bml_add_deprecated(2.00_dp, rho_bml, -1.00_dp, x2_bml, threshold)
102 
103  trx = 2.0_dp * trx - trx2
104 
105  else
106 
107  ! X <- X2
108  call bml_copy(x2_bml,rho_bml) !x2 = d
109 
110  trx = trx2
111 
112  end if
113 
114  if(abs(occ-trx) .lt. idemtol .and. iter .gt. minsp2iter) exit
115 
116  if(iter .eq. maxsp2iter) then
117  write(*,*) "sp2 purification is not converging: stop!"
118  error stop
119  end if
120 
121 #ifdef DO_MPI_BLOCK
122 
123  if (getnranks() > 1) then
124  !call prg_allGatherParallel(rho_bml)
125  endif
126 #endif
127 
128  end do
129 
130  call bml_scale(2.0d0, rho_bml) !d = 2*d
131 
132  call bml_deallocate(x2_bml)
133  deallocate(trace)
134 
135  end subroutine prg_sp2_basic
136 
137 
138  subroutine prg_sp2_basic_tcore(h_bml,rho_bml,rhofull_bml,threshold,bndfil,minsp2iter,maxsp2iter &
139  ,sp2conv,idemtol,verbose)
140 
141  implicit none
142  integer :: hdim,iter,minsp2iter,maxsp2iter
143  integer :: verbose
144  real(dp) :: trx, occ, trx2
145  real(dp) :: bndfil
146  real(dp) :: eband, ebandx
147  real(dp) :: threshold
148  real(dp) :: idemtol, fnorm
149  real(dp1) :: epsilon
150  real(dp), allocatable :: trace(:)
151  character(len=*), intent(in) :: sp2conv
152  type(bml_matrix_t), intent(inout) :: rho_bml,rhofull_bml
153  type(bml_matrix_t), intent(in) :: h_bml
154  type(bml_matrix_t) :: x2_bml, aux_bml
155 
156  hdim = bml_get_n(h_bml) !to get it from the bml matrix
157 
158  ! we're also using niklasson's scheme to determine convergence
159  occ = bndfil*real(hdim,dp)
160 
161  ! Normalize
162  call bml_copy(h_bml, rho_bml)
163  call prg_normalize(rho_bml)
164 
165  allocate(trace(2))
166 
167  ! X2 <- X
168  call bml_copy_new(rho_bml, x2_bml)
169 
170  call bml_copy_new(rhofull_bml,aux_bml)
171  call bml_multiply(h_bml, rhofull_bml, aux_bml,0.5_dp,0.0_dp,0.0d0)
172  eband = bml_trace(aux_bml)
173 
174  if(verbose.gt.1 .and. printrank() .eq. 1) write(*,*)"sp2iter", "iter", "occ", "trx", "|occ-trx|","fnorm", "(ebandx - eband)/eband"
175  do iter = 0, maxsp2iter
176 
177  trx = bml_trace(rho_bml)
178 
179  ! X2 <- X * X
180  call bml_multiply_x2(rho_bml, x2_bml, threshold, trace)
181 
182  call bml_copy_new(rhofull_bml,aux_bml)
183  call bml_add_deprecated(0.50_dp, aux_bml, -1.00_dp, rho_bml, threshold)
184  fnorm = bml_fnorm(aux_bml)/bml_fnorm(rhofull_bml)
185  call bml_multiply(h_bml, rho_bml, aux_bml,1.0_dp,0.0_dp,threshold)
186  ebandx = bml_trace(aux_bml)
187 
188  trx2 = trace(2)
189 
190 #ifdef DO_MPI_BLOCK
191 
192  if (getnranks() > 1) then
193  call prg_sumrealreduce2(trx, trx2)
194  endif
195 #endif
196 
197  if(verbose.gt.1 .and. printrank() .eq. 1) write(*,*)"sp2iter", iter, occ, trx, abs(occ-trx),fnorm, (ebandx - eband)/eband
198 
199  if(trx - occ .le. 0.0_dp) then
200 
201  ! X <- 2 * X - X2
202  call bml_add_deprecated(2.00_dp, rho_bml, -1.00_dp, x2_bml, threshold)
203 
204  trx = 2.0_dp * trx - trx2
205 
206  else
207 
208  ! X <- X2
209  call bml_copy(x2_bml,rho_bml) !x2 = d
210 
211  trx = trx2
212 
213  end if
214 
215  ! if(abs(occ-trx) .lt. idemtol .and. iter .gt. minsp2iter) exit
216 
217 
218  if(iter .eq. maxsp2iter) then
219  write(*,*) "sp2 purification is not converging: stop!"
220  ! error stop
221  end if
222 
223 #ifdef DO_MPI_BLOCK
224 
225  if (getnranks() > 1) then
226  !call prg_allGatherParallel(rho_bml)
227  endif
228 #endif
229 
230  end do
231 
232  epsilon = 1.0
233  do while (1.0 + 0.5 * epsilon .NE. 1.0)
234  epsilon = 0.5 * epsilon
235  enddo
236  !machine prec = 2.2204460492503131E-016 (dp)
237  !machine prec = 1.19209290E-07
238 
239  write(*,*)"epsilon",epsilon
240 
241  call bml_scale(2.0d0, rho_bml) !d = 2*d
242 
243  call bml_deallocate(x2_bml)
244  deallocate(trace)
245 
246  end subroutine prg_sp2_basic_tcore
247 
248 
249 
250  !! Calculate the density matrix from a Hamiltonian matrix by
251  !! purification. The method implemented here is the SP2 method
252  !! [A. Niklasson, Phys. Rev. B, 66, 155115 (2002)].
253  !!
254  !! \param h_bml Input Hamiltonian matrix
255  !! \param rho_bml Output density matrix
256  !! \param threshold Threshold for sparse matrix algebra
257  !! \param bndfil Bond
258  !! \param minsp2iter Minimum sp2 iterations
259  !! \param maxsp2iter Maximum SP2 iterations
260  !! \param sp2conv Convergence type
261  !! \param idemtol Idempotency tolerance
262  !! \param verbose Verbosity level
263  subroutine prg_sp2_alg2(h_bml, rho_bml, threshold, bndfil, &
264  minsp2iter, maxsp2iter, sp2conv, idemtol,verbose)
265 
266  integer, intent(in) :: minsp2iter, maxsp2iter
267  real(dp), intent(in) :: threshold, bndfil, idemtol
268  character(len=*), intent(in) :: sp2conv
269  type(bml_matrix_t),intent(in) :: h_bml
270  type(bml_matrix_t),intent(inout) :: rho_bml
271 
272  integer, optional, intent(in) :: verbose
273  integer :: hdim, iter, breakloop
274  real(dp) :: trx, trx2, trxold, tr2xx2
275  real(dp) :: occ, limdiff
276  real(dp) :: idemperr, idemperr1, idemperr2
277  real(dp), allocatable :: trace(:)
278 
279  type(bml_matrix_t) :: x2_bml
280 
281  idemperr = 0.0_dp
282  idemperr1 = 0.0_dp
283  idemperr2 = 0.0_dp
284 
285  hdim = bml_get_n(h_bml)
286  occ = bndfil*float(hdim)
287  if (printrank() .eq. 1) write(*,*) 'OCC = ', occ
288 
289  !! Normalize
290  call bml_copy(h_bml, rho_bml)
291  call prg_normalize(rho_bml)
292 
293  allocate(trace(2))
294 
295  trx = bml_trace(rho_bml)
296 
297  iter = 0
298  breakloop = 0
299 
300  ! X2 <- X
301  call bml_copy_new(rho_bml, x2_bml)
302 
303  do while (breakloop .eq. 0 .and. iter .lt. maxsp2iter)
304 
305  iter = iter + 1
306 
307  ! X2 <- X * X
308  call bml_multiply_x2(rho_bml, x2_bml, threshold, trace)
309 
310  trx2 = trace(2)
311 
312 #ifdef DO_MPI_BLOCK
313 
314  if (getnranks() > 1) then
315  call prg_sumrealreduce2(trx, trx2)
316  endif
317 #endif
318 
319  if(present(verbose))then
320  if(verbose.ge.1) then
321  if (printrank() .eq. 1) write(*,*) 'iter = ', iter, 'trx = ', trx, ' trx2 = ', trx2
322  endif
323  endif
324 
325  tr2xx2 = 2.0_dp*trx - trx2
326  trxold = trx
327  limdiff = abs(trx2 - occ) - abs(tr2xx2 - occ)
328 
329  if (limdiff .ge. idemtol) then
330 
331  ! X <- 2 * X - X2
332  call bml_add_deprecated(2.0_dp, rho_bml, -1.0_dp, x2_bml, threshold)
333 
334  trx = 2.0_dp * trx - trx2
335 
336  elseif(limdiff .lt. -idemtol) then
337 
338  ! X <- X2
339  call bml_copy(x2_bml, rho_bml)
340 
341  trx = trx2
342 
343  else
344 
345  iter = iter - 1
346  trx = trxold
347  breakloop = 1
348 
349  end if
350 
351  idemperr2 = idemperr1
352  idemperr1 = idemperr
353  idemperr = abs(trx - trxold)
354 
355  if (sp2conv .eq. "Rel" .and. iter .ge. minsp2iter .and. &
356  (idemperr .ge. idemperr2 .or. idemperr .lt. idemtol)) then
357  breakloop = 1
358  end if
359 
360  if (iter .eq. maxsp2iter) then
361  write(*,*) "SP2 purification is not converging: STOP!"
362  stop
363  end if
364 
365 #ifdef DO_MPI_BLOCK
366 
367  if (getnranks() > 1) then
368  call prg_allgatherparallel(rho_bml)
369  endif
370 #endif
371 
372  end do
373 
374  ! X <- 2 * X
375  call bml_scale(2.0_dp, rho_bml) !D = 2.0_dp*D
376 
377  call bml_deallocate(x2_bml)
378  deallocate(trace)
379 
380  end subroutine prg_sp2_alg2
381 
382  !! Perform SP2 algorithm, generate sequence, and calculate norm.
383  !!
384  !! \param h_bml Input Hamiltonian matrix
385  !! \param rho_bml Output density matrix
386  !! \param threshold Threshold for sparse matrix algebra
387  !! \param bndfil Bond
388  !! \param minsp2iter Minimum sp2 iterations
389  !! \param maxsp2iter Maximum SP2 iterations
390  !! \param sp2conv Convergence type
391  !! \param idemtol Idempotency tolerance
392  !! \param pp Vector containing sequence of 0s and 1s
393  !! \param icount Sequence count
394  !! \param vv Vector for sum of squares per iteration
395  subroutine prg_sp2_alg2_genseq(h_bml, rho_bml, threshold, bndfil, minsp2iter, &
396  maxsp2iter, sp2conv, idemtol, pp, icount, vv, verbose)
397  implicit none
398  integer, intent(in) :: minsp2iter, maxsp2iter
399  integer, intent(inout) :: icount
400  integer, intent(inout) :: pp(:)
401  real(dp), intent(in) :: threshold, bndfil, idemtol
402  real(dp), intent(inout) :: vv(:)
403  character(len=*), intent(in) :: sp2conv
404  type(bml_matrix_t),intent(in) :: h_bml
405  type(bml_matrix_t),intent(inout) :: rho_bml
406 
407  integer, optional, intent(in) :: verbose
408  integer :: hdim, iter, breakloop, myrank
409  real(dp) :: trx, trx2, trxold, tr2xx2
410  real(dp) :: occ, limdiff, ssum
411  real(dp) :: idemperr, idemperr1, idemperr2
412  real(dp), allocatable :: trace(:)
413 
414  type(bml_matrix_t) :: x2_bml
415 
416  idemperr = 0.0_dp
417  idemperr1 = 0.0_dp
418  idemperr2 = 0.0_dp
419 
420  myrank = getmyrank()
421 
422  hdim = bml_get_n(h_bml)
423  occ = bndfil*float(hdim)
424  if (printrank() .eq. 1) write(*,*) 'OCC = ', occ
425 
426  !! Normalize
427  call bml_copy(h_bml, rho_bml)
428  call prg_normalize(rho_bml)
429 
430 #ifdef DO_MPI
431 
432  if (getnranks() .gt. 1 .and. &
433  bml_get_distribution_mode(rho_bml) .eq. bml_dmode_distributed) then
434  call prg_allgatherparallel(rho_bml)
435  endif
436 #endif
437 
438  allocate(trace(2))
439 
440  iter = 0
441  breakloop = 0
442 
443  call bml_copy_new(rho_bml, x2_bml)
444 
445  do while (breakloop .eq. 0 .and. iter .lt. maxsp2iter)
446 
447  iter = iter + 1
448 
449  ! X2 <- X * X
450  call bml_multiply_x2(rho_bml, x2_bml, threshold, trace)
451  ssum = bml_sum_squares2(rho_bml, x2_bml, 1.0_dp, -1.0_dp, threshold)
452  trx = trace(1)
453  trx2 = trace(2)
454 
455 #ifdef DO_MPI
456 
457  if (getnranks() .gt. 1 .and. &
458  bml_get_distribution_mode(rho_bml) .eq. bml_dmode_distributed) then
459  call prg_sumrealreduce3(trx, trx2, ssum)
460  endif
461 #endif
462 
463  vv(iter) = sqrt(ssum)
464 
465  if(present(verbose))then
466  if(verbose.ge.1) then
467  if (printrank() .eq. 1) write(*,*) 'iter = ', iter, 'trx = ', trx, ' trx2 = ', trx2
468  endif
469  endif
470 
471  tr2xx2 = 2.0_dp * trx - trx2
472  trxold = trx
473  limdiff = abs(trx2 - occ) - abs(tr2xx2 - occ)
474 
475  if (limdiff .ge. idemtol) then
476 
477  ! X <- 2 * X - X2
478  call bml_add_deprecated(2.0_dp, rho_bml, -1.0_dp, x2_bml, threshold)
479 
480  trx = 2.0_dp * trx - trx2
481 
482  pp(iter) = 0
483 
484  elseif(limdiff .lt. -idemtol) then
485 
486  ! X <- X2
487  call bml_copy(x2_bml, rho_bml)
488 
489  trx = trx2
490 
491  pp(iter) = 1
492 
493  else
494 
495  iter = iter - 1
496  trx = trxold
497  breakloop = 1
498 
499  end if
500 
501  idemperr2 = idemperr1
502  idemperr1 = idemperr
503  idemperr = abs(trx - trxold)
504 
505  if (sp2conv .eq. "Rel" .and. iter .ge. minsp2iter .and. &
506  (idemperr .ge. idemperr2 .or. idemperr .lt. idemtol)) then
507  breakloop = 1
508  end if
509 
510  if (iter .eq. maxsp2iter) then
511  write(*,*) "SP2 purification is not converging: STOP!"
512  stop
513  end if
514 
515 #ifdef DO_MPI
516 
517  if (getnranks() .gt. 1 .and. &
518  bml_get_distribution_mode(rho_bml) .eq. bml_dmode_distributed) then
519  call prg_allgatherparallel(rho_bml)
520  endif
521 #endif
522 
523  end do
524 
525  icount = iter
526 
527  ! X = 2 * X
528  call bml_scale(2.0_dp, rho_bml) !D = 2.0_dp*D
529 
530  call bml_deallocate(x2_bml)
531  deallocate(trace)
532 
533  end subroutine prg_sp2_alg2_genseq
534 
535  !! Perform SP2 algorithm given a sequence and calculate norm.
536  !!
537  !! \param h_bml Input Hamiltonian matrix
538  !! \param rho_bml Output density matrix
539  !! \param threshold Threshold for sparse matrix algebra
540  !! \param pp Vector containing sequence of 0s and 1s
541  !! \param icount Sequence count
542  !! \param vv Vector of sum of squares per iteration
543  subroutine prg_sp2_alg2_seq(h_bml, rho_bml, threshold, pp, icount, vv, verbose)
544 
545  integer, intent(inout) :: icount
546  integer, intent(inout) :: pp(:)
547  real(dp), intent(in) :: threshold
548  real(dp), intent(inout) :: vv(:)
549  type(bml_matrix_t),intent(in) :: h_bml
550  type(bml_matrix_t),intent(inout) :: rho_bml
551 
552  integer, optional, intent(in) :: verbose
553  integer :: iter
554  real(dp) :: trx, trx2, ssum
555  real(dp), allocatable :: trace(:)
556 
557  type(bml_matrix_t) :: x2_bml
558 
559  !! Normalize
560  call bml_copy(h_bml, rho_bml)
561  call prg_normalize(rho_bml)
562 
563  allocate(trace(2))
564 
565  trx = bml_trace(rho_bml)
566 
567  iter = 0
568 
569  ! X2 <- X
570  call bml_copy_new(rho_bml, x2_bml)
571 
572  do while (iter .lt. icount)
573 
574  iter = iter + 1
575 
576  ! X2 <- X * X
577  call bml_multiply_x2(rho_bml, x2_bml, threshold, trace)
578  ssum = bml_sum_squares2(rho_bml, x2_bml, 1.0_dp, -1.0_dp, threshold)
579  trx2 = trace(2)
580 
581 #ifdef DO_MPI_BLOCK
582 
583  if (getnranks() > 1) then
584  call prg_sumrealreduce3(trx, trx2, ssum)
585  endif
586 #endif
587 
588  vv(iter) = sqrt(ssum)
589 
590  if(present(verbose))then
591  if(verbose.ge.1) then
592  if (printrank() .eq. 1) write(*,*) 'iter = ', iter, 'trx = ', trx, ' trx2 = ', trx2
593  endif
594  endif
595 
596  if (pp(iter) .eq. 0) then
597 
598  ! X <- 2 * X - X2
599  call bml_add_deprecated(2.0_dp, rho_bml, -1.0_dp, x2_bml, threshold)
600 
601  trx = 2.0_dp * trx - trx2
602 
603  else
604 
605  ! X <- X2
606  call bml_copy(x2_bml, rho_bml)
607 
608  trx = trx2
609 
610  end if
611 
612 #ifdef DO_MPI_BLOCK
613 
614  if (getnranks() > 1) then
615  call prg_allgatherparallel(rho_bml)
616  endif
617 #endif
618 
619  end do
620 
621  ! X <- 2 * X
622  call bml_scale(2.0_dp, rho_bml) !D = 2.0_dp*D
623 
624  call bml_deallocate(x2_bml)
625  deallocate(trace)
626 
627  end subroutine prg_sp2_alg2_seq
628 
629  !! Perform SP2 algorithm given a sequence and calculate norm.
630  !!
631  !! \param rho_bml Input Hamiltonian/Output density matrix
632  !! \param threshold Threshold for sparse matrix algebra
633  !! \param pp Vector containing sequence of 0s and 1s
634  !! \param icount Sequence count
635  !! \param vv Vector of sum of squares per iteration
636  !! \param mineval Min value used for normalization (optional)
637  !! \param maxeval Max value used for normalization (optional)
638  subroutine prg_prg_sp2_alg2_seq_inplace(rho_bml, threshold, pp, icount, vv, &
639  mineval, maxeval, verbose)
640 
641  integer, intent(inout) :: icount
642  integer, intent(inout) :: pp(:)
643  real(dp), intent(in) :: threshold
644  real(dp), intent(inout) :: vv(:)
645  real(dp), optional, intent(in) :: mineval, maxeval
646  type(bml_matrix_t),intent(inout) :: rho_bml
647 
648  integer, optional, intent(in) :: verbose
649  integer :: iter
650  real(dp) :: trx, trx2, ssum
651  real(dp), allocatable :: trace(:)
652 
653  type(bml_matrix_t) :: x2_bml
654 
655  !! Normalize
656  call bml_normalize(rho_bml, mineval, maxeval)
657 
658  allocate(trace(2))
659 
660  trx = bml_trace(rho_bml)
661 
662  iter = 0
663 
664  ! X2 <- X
665  call bml_copy_new(rho_bml, x2_bml)
666 
667  do while (iter .lt. icount)
668 
669  iter = iter + 1
670 
671  ! X2 <- X * X
672  call bml_multiply_x2(rho_bml, x2_bml, threshold, trace)
673  ssum = bml_sum_squares2(rho_bml, x2_bml, 1.0_dp, -1.0_dp, threshold)
674  trx2 = trace(2)
675 
676 #ifdef DO_MPI_BLOCK
677 
678  if (getnranks() > 1) then
679  call prg_sumrealreduce3(trx, trx2, ssum)
680  endif
681 #endif
682 
683  vv(iter) = sqrt(ssum)
684 
685  if(present(verbose))then
686  if(verbose.ge.1) then
687  if (printrank() .eq. 1) write(*,*) 'iter = ', iter, 'trx = ', trx, ' trx2 = ', trx2
688  endif
689  endif
690 
691  if (pp(iter) .eq. 0) then
692 
693  ! X <- 2 * X - X2
694  call bml_add_deprecated(2.0_dp, rho_bml, -1.0_dp, x2_bml, threshold)
695 
696  trx = 2.0_dp * trx - trx2
697 
698  else
699 
700  ! X <- X2
701  call bml_copy(x2_bml, rho_bml)
702 
703  trx = trx2
704 
705  end if
706 
707 #ifdef DO_MPI_BLOCK
708 
709  if (getnranks() > 1) then
710  call prg_allgatherparallel(rho_bml)
711  endif
712 #endif
713 
714  end do
715 
716  ! X <- 2 * X
717  call bml_scale(2.0d0, rho_bml) !D = 2.0_dp*D
718 
719  call bml_deallocate(x2_bml)
720  deallocate(trace)
721 
722  end subroutine prg_prg_sp2_alg2_seq_inplace
723 
724  !! Perform SP2 algorithm.
725  !!
726  !! \param h_bml Input Hamiltonian matrix
727  !! \param rho_bml Output density matrix
728  !! \param threshold Threshold for sparse matrix algebra
729  !! \param bndfil Bond
730  !! \param minsp2iter Minimum sp2 iterations
731  !! \param maxsp2iter Maximum SP2 iterations
732  !! \param sp2conv Convergence type
733  !! \param idemtol Idempotency tolerance
734  subroutine prg_sp2_alg1(h_bml, rho_bml, threshold, bndfil, minsp2iter, maxsp2iter, &
735  sp2conv, idemtol, verbose)
736 
737  integer, intent(in) :: minsp2iter, maxsp2iter
738  integer, optional, intent(in) :: verbose
739  real(dp), intent(in) :: threshold, bndfil, idemtol
740  character(len=*), intent(in) :: sp2conv
741  type(bml_matrix_t),intent(in) :: h_bml
742  type(bml_matrix_t),intent(inout) :: rho_bml
743 
744  integer :: hdim, iter, breakloop
745  real(dp) :: trx, trx2
746  real(dp) :: occ, limdiff
747  real(dp) :: idemperr, idemperr1, idemperr2
748 
749  type(bml_matrix_t) :: x2_bml
750 
751  idemperr = 0.0_dp
752  idemperr1 = 0.0_dp
753  idemperr2 = 0.0_dp
754 
755  ! We're also using Niklasson's scheme to determine convergence
756 
757  hdim = bml_get_n(h_bml)
758  occ = bndfil*float(hdim)
759  if (printrank() .eq. 1)then
760  write(*,*) 'OCC = ',occ
761  write(*,*) 'Convergence type = ',sp2conv
762  write(*,*) 'Min./Max. number of iterations = ',minsp2iter,maxsp2iter
763  write(*,*) 'Tolerance = ',idemtol
764  endif
765 
766  !! Normalize
767  call bml_copy(h_bml, rho_bml)
768  call prg_normalize(rho_bml)
769 
770  trx = bml_trace(rho_bml)
771 
772  iter = 0
773  breakloop = 0
774 
775  call bml_copy_new(rho_bml, x2_bml)
776 
777  do while (breakloop .eq. 0 .and. iter .lt. maxsp2iter)
778 
779  iter = iter + 1
780 
781  ! X2 <- X
782  ! X2 <- X - X * X
783  call bml_copy(rho_bml, x2_bml)
784  call bml_multiply(rho_bml, rho_bml, x2_bml, -1.0_dp, 1.0_dp, threshold)
785 
786  trx2 = bml_trace(x2_bml)
787 
788 #ifdef DO_MPI_BLOCK
789 
790  if (getnranks() > 1) then
791  call prg_sumrealreduce2(trx, trx2)
792  endif
793 #endif
794 
795  if(present(verbose))then
796  if(verbose.ge.10)then
797  if (printrank() .eq. 1) write(*,*) 'iter = ', iter, 'trx = ', trx, ' trx2 = ', trx2
798  endif
799  endif
800 
801  limdiff = abs(trx - trx2 - occ) - abs(trx + trx2 - occ)
802 
803  if (limdiff .ge. idemtol) then
804  if (printrank() .eq. 1) write(*,*)"H1"
805  ! X <- X + (X - X * X) <- 2 * X - X * X
806  call bml_add_deprecated(1.0_dp, rho_bml, 1.0_dp, x2_bml, threshold)
807 
808  trx = trx + trx2
809 
810  elseif(limdiff .lt. -idemtol) then
811  if (printrank() .eq. 1) write(*,*)"H2"
812 
813  ! X <- X - (X - X * X) <- X * X
814  call bml_add_deprecated(1.0_dp, rho_bml, -1.0_dp, x2_bml, threshold)
815 
816  trx = trx - trx2
817 
818  elseif((limdiff .eq. 0.0) .and. (iter .eq. 1)) then
819  if (printrank() .eq. 1) write(*,*)"H3"
820 
821  ! X <- X - (X - X * X) <- X * X
822  call bml_add_deprecated(1.0_dp, rho_bml, -1.0_dp, x2_bml, threshold)
823 
824  trx = trx - trx2
825 
826  else
827  if (printrank() .eq. 1) write(*,*)"H4"
828  if (printrank() .eq. 1) write(*,*)"limdiff: ",limdiff
829 
830  iter = iter - 1
831  breakloop = 1
832 
833  end if
834 
835  idemperr2 = idemperr1
836  idemperr1 = idemperr
837  idemperr = abs(trx2)
838  if (iter .ge. minsp2iter) then
839  if (sp2conv .eq. "Rel" .and. &
840  (idemperr2 .le. idemperr .or. idemperr .lt. idemtol)) then
841  breakloop = 1
842  end if
843  end if
844 
845  if (iter .eq. maxsp2iter) then
846  write(*,*) "SP2 purification is not converging: STOP!"
847  stop
848  end if
849 
850 #ifdef DO_MPI_BLOCK
851 
852  if (getnranks() > 1) then
853  call prg_allgatherparallel(rho_bml)
854  endif
855 #endif
856 
857  end do
858 
859  ! X <- 2 * X
860  call bml_scale(2.0_dp, rho_bml) !D = 2.0_dp*D
861 
862  call bml_deallocate(x2_bml)
863 
864  end subroutine prg_sp2_alg1
865 
866  !! Perform SP2 algorithm, generate sequence, and calculate norm.
867  !!
868  !! \param h_bml Input Hamiltonian matrix
869  !! \param rho_bml Output density matrix
870  !! \param threshold Threshold for sparse matrix algebra
871  !! \param bndfil Bond
872  !! \param minsp2iter Minimum sp2 iterations
873  !! \param maxsp2iter Maximum SP2 iterations
874  !! \param sp2conv Convergence type
875  !! \param idemtol Idempotency tolerance
876  !! \param pp Vector containing sequence of 0s and 1s
877  !! \param icount Sequence count
878  !! \param vv Vector of sqrt of TrNorm per iteration
879  subroutine prg_sp2_alg1_genseq(h_bml, rho_bml, threshold, bndfil, &
880  minsp2iter, maxsp2iter, sp2conv, idemtol, &
881  pp, icount, vv)
882 
883  integer, intent(in) :: minsp2iter, maxsp2iter
884  integer, intent(inout) :: icount
885  integer, intent(inout) :: pp(:)
886  real(dp), intent(in) :: threshold, bndfil, idemtol
887  real(dp), intent(inout) :: vv(:)
888  character(len=*), intent(in) :: sp2conv
889  type(bml_matrix_t),intent(in) :: h_bml
890  type(bml_matrix_t),intent(inout) :: rho_bml
891 
892  integer :: hdim, iter, breakloop
893  real(dp) :: trx, trx2
894  real(dp) :: occ, limdiff, ssum
895  real(dp) :: idemperr, idemperr1, idemperr2
896  type(bml_matrix_t) :: x2_bml
897 
898  idemperr = 0.0_dp
899  idemperr1 = 0.0_dp
900  idemperr2 = 0.0_dp
901 
902  ! We're also using Niklasson's scheme to determine convergence
903 
904  hdim = bml_get_n(h_bml)
905  occ = bndfil*float(hdim)
906  if (printrank() .eq. 1) write(*,*) 'OCC = ', occ
907 
908  !! Normalize
909  call bml_copy(h_bml, rho_bml)
910  call prg_normalize(rho_bml)
911 
912  trx = bml_trace(rho_bml)
913 
914  iter = 0
915  breakloop = 0
916 
917  call bml_copy_new(rho_bml, x2_bml)
918 
919  do while (breakloop .eq. 0 .and. iter .lt. maxsp2iter)
920 
921  iter = iter + 1
922 
923  ! X2 <- X
924  ! X2 <- X - X * X
925  call bml_copy(rho_bml, x2_bml)
926  call bml_multiply(rho_bml, rho_bml, x2_bml, -1.0_dp, 1.0_dp, threshold)
927  ssum = bml_sum_squares(x2_bml)
928  trx2 = bml_trace(x2_bml)
929 
930 #ifdef DO_MPI_BLOCK
931 
932  if (getnranks() > 1) then
933  call prg_sumrealreduce3(trx, trx2, ssum)
934  endif
935 #endif
936 
937  vv(iter) = sqrt(ssum)
938 
939  if (printrank() .eq. 1) write(*,*) 'iter = ', iter, 'trx = ', trx, ' trx2 = ', trx2
940 
941  limdiff = abs(trx - trx2 - occ) - abs(trx + trx2 - occ)
942 
943  if (limdiff .ge. idemtol) then
944 
945  ! X <- X + (X - X * X) <- 2 * X - X * X
946  call bml_add_deprecated(1.0_dp, rho_bml, 1.0_dp, x2_bml, threshold)
947 
948  trx = trx + trx2
949 
950  pp(iter) = 0
951 
952  elseif(limdiff .lt. -idemtol) then
953 
954  ! X <- X - (X - X * X) <- X * X
955  call bml_add_deprecated(1.0_dp, rho_bml, -1.0_dp, x2_bml, threshold)
956 
957  trx = trx - trx2
958 
959  pp(iter) = 1
960 
961  else
962 
963  iter = iter - 1
964  breakloop = 1
965 
966  end if
967 
968  idemperr2 = idemperr1
969  idemperr1 = idemperr
970  idemperr = abs(trx2)
971 
972  if (sp2conv .eq. "Rel" .and. iter .ge. minsp2iter .and. &
973  (idemperr2 .le. idemperr .or. idemperr .lt. idemtol)) then
974  breakloop = 1
975  end if
976 
977  if (sp2conv .eq. "Abs" .and. abs(limdiff) .lt. idemtol) exit
978 
979  if (iter .eq. maxsp2iter) then
980  write(*,*) "SP2 purification is not converging: STOP!"
981  stop
982  end if
983 
984 #ifdef DO_MPI_BLOCK
985 
986  if (getnranks() > 1) then
987  call prg_allgatherparallel(rho_bml)
988  endif
989 #endif
990 
991  end do
992 
993  icount = iter
994 
995  ! X <- 2 * X
996  call bml_scale(2.0_dp, rho_bml) !D = 2.0_dp*D
997 
998  call bml_deallocate(x2_bml)
999 
1000  end subroutine prg_sp2_alg1_genseq
1001 
1002  !! Perform SP2 algorithm using sequence and calculate norm.
1003  !!
1004  !! \param h_bml Input Hamiltonian matrix
1005  !! \param rho_bml Output density matrix
1006  !! \param threshold Threshold for sparse matrix algebra
1007  !! \param pp Vector containing sequence of 0s and 1s
1008  !! \param icount Sequence count
1009  !! \param Vector of sum of squares per iteration
1010  subroutine prg_sp2_alg1_seq(h_bml, rho_bml, threshold, pp, icount, vv)
1011 
1012  integer, intent(inout) :: icount
1013  integer, intent(inout) :: pp(:)
1014  real(dp), intent(in) :: threshold
1015  real(dp), intent(inout) :: vv(:)
1016  type(bml_matrix_t),intent(in) :: h_bml
1017  type(bml_matrix_t),intent(inout) :: rho_bml
1018 
1019  integer :: iter, breakloop
1020  real(dp) :: trx, trx2, ssum
1021  type(bml_matrix_t) :: x2_bml
1022 
1023  ! We're also using Niklasson's scheme to determine convergence
1024 
1025  !! Normalize
1026  call bml_copy(h_bml, rho_bml)
1027  call prg_normalize(rho_bml)
1028 
1029  trx = bml_trace(rho_bml)
1030 
1031  iter = 0
1032  breakloop = 0
1033 
1034  call bml_copy_new(rho_bml, x2_bml)
1035 
1036  do while (iter .lt. icount)
1037 
1038  iter = iter + 1
1039 
1040  ! X2 <- X
1041  ! X2 <- X - X * X
1042  call bml_copy(rho_bml, x2_bml)
1043  call bml_multiply(rho_bml, rho_bml, x2_bml, -1.0_dp, 1.0_dp, threshold)
1044  ssum = bml_sum_squares(x2_bml)
1045 
1046  trx2 = bml_trace(x2_bml)
1047 
1048 #ifdef DO_MPI_BLOCK
1049 
1050  if (getnranks() > 1) then
1051  call prg_sumrealreduce3(trx, trx2, ssum)
1052  endif
1053 #endif
1054 
1055  vv(iter) = sqrt(ssum)
1056 
1057  if (printrank() .eq. 1) write(*,*) 'iter = ', iter, 'trx = ', trx, ' trx2 = ', trx2
1058 
1059  if (pp(iter) .eq. 0) then
1060 
1061  ! X <- X + (X - X * X) <- 2 * X - X * X
1062  call bml_add_deprecated(1.0_dp, rho_bml, 1.0_dp, x2_bml, threshold)
1063 
1064  trx = trx + trx2
1065 
1066  else
1067 
1068  ! X <- X - (X - X * X) <- X * X
1069  call bml_add_deprecated(1.0_dp, rho_bml, -1.0_dp, x2_bml, threshold)
1070 
1071  trx = trx - trx2
1072 
1073  end if
1074 
1075 #ifdef DO_MPI_BLOCK
1076 
1077  if (getnranks() > 1) then
1078  call prg_allgatherparallel(rho_bml)
1079  endif
1080 #endif
1081 
1082  end do
1083 
1084  ! X <- 2 * X
1085  call bml_scale(2.0_dp, rho_bml) !D = 2.0_dp*D
1086 
1087  call bml_deallocate(x2_bml)
1088 
1089  end subroutine prg_sp2_alg1_seq
1090 
1091  !! Perform SP2 algorithm using sequence and calculate norm.
1092  !!
1093  !! \param rho_bml Input Hamiltonian/Output density matrix
1094  !! \param threshold Threshold for sparse matrix algebra
1095  !! \param pp Vector containing sequence of 0s and 1s
1096  !! \param icount Sequence count
1097  !! \param Vector of sum of squares per iteration
1098  !! \param mineval Min value used for normalization (optional)
1099  !! \param maxeval Max value used for normalization (optional)
1100  subroutine prg_prg_sp2_alg1_seq_inplace(rho_bml, threshold, pp, icount, vv, &
1101  mineval, maxeval)
1102 
1103  integer, intent(inout) :: icount
1104  integer, intent(inout) :: pp(:)
1105  real(dp), intent(in) :: threshold
1106  real(dp), intent(inout) :: vv(:)
1107  real(dp), intent(in) :: mineval, maxeval
1108  type(bml_matrix_t),intent(inout) :: rho_bml
1109 
1110  integer :: iter, breakloop
1111  real(dp) :: trx, trx2, ssum
1112  type(bml_matrix_t) :: x2_bml
1113 
1114  ! We're also using Niklasson's scheme to determine convergence
1115 
1116  !! Normalize
1117  call bml_normalize(rho_bml, mineval, maxeval)
1118 
1119  trx = bml_trace(rho_bml)
1120 
1121  iter = 0
1122  breakloop = 0
1123 
1124  call bml_copy_new(rho_bml, x2_bml)
1125 
1126  do while (iter .lt. icount)
1127 
1128  iter = iter + 1
1129 
1130  ! X2 <- X
1131  ! X2 <- X - X * X
1132  call bml_copy(rho_bml, x2_bml)
1133  call bml_multiply(rho_bml, rho_bml, x2_bml, -1.0_dp, 1.0_dp, threshold)
1134  ssum = bml_sum_squares(x2_bml)
1135 
1136  trx2 = bml_trace(x2_bml)
1137 
1138 #ifdef DO_MPI_BLOCK
1139 
1140  if (getnranks() > 1) then
1141  call prg_sumrealreduce3(trx, trx2, ssum)
1142  endif
1143 #endif
1144 
1145  vv(iter) = sqrt(ssum)
1146 
1147  if (pp(iter) .eq. 0) then
1148 
1149  ! X <- X + (X - X * X) <- 2 * X - X * X
1150  call bml_add_deprecated(1.0_dp, rho_bml, 1.0_dp, x2_bml, threshold)
1151 
1152  trx = trx + trx2
1153 
1154  else
1155 
1156  ! X <- X - (X - X * X) <- X * X
1157  call bml_add_deprecated(1.0_dp, rho_bml, -1.0_dp, x2_bml, threshold)
1158 
1159  trx = trx - trx2
1160 
1161  end if
1162 
1163 #ifdef DO_MPI_BLOCK
1164 
1165  if (getnranks() > 1) then
1166  call prg_allgatherparallel(rho_bml)
1167  endif
1168 #endif
1169 
1170  end do
1171 
1172  ! X <- 2 * X
1173  call bml_scale(2.0_dp, rho_bml) !D = 2.0_dp*D
1174 
1175  call bml_deallocate(x2_bml)
1176 
1177  end subroutine prg_prg_sp2_alg1_seq_inplace
1178 
1190  subroutine prg_sp2_submatrix(ham_bml, rho_bml, threshold, pp, icount, vv, &
1191  mineval, maxeval, core_size)
1192 
1193  integer, intent(in) :: icount
1194  integer, intent(in) :: pp(:)
1195  integer, intent(in) :: core_size
1196  real(dp), intent(in) :: threshold
1197  real(dp), intent(inout) :: vv(:)
1198  real(dp), intent(in) :: mineval, maxeval
1199  type(bml_matrix_t),intent(in) :: ham_bml
1200  type(bml_matrix_t),intent(inout) :: rho_bml
1201 
1202  integer :: iter
1203  real(dp) :: trx, trx2, factor
1204  type(bml_matrix_t) :: x2_bml
1205 
1206  ! We're also using Niklasson's scheme to determine convergence
1207 
1208  !! Normalize
1209  call bml_copy(ham_bml, rho_bml)
1210  call bml_normalize(rho_bml, mineval, maxeval)
1211 
1212  trx = bml_trace(rho_bml)
1213 
1214  call bml_copy_new(rho_bml, x2_bml)
1215 
1216  do iter = 1, icount
1217 
1218  ! X2 <- X
1219  ! X2 <- X - X * X
1220  call bml_copy(rho_bml, x2_bml)
1221 
1222  call bml_multiply(rho_bml, rho_bml, x2_bml, -1.0_dp, 1.0_dp, threshold)
1223 
1224  vv(iter) = vv(iter) + bml_sum_squares_submatrix(x2_bml, core_size)
1225 
1226  trx2 = bml_trace(x2_bml)
1227 
1228  ! if (printRank() .eq. 1) write(*,*) 'iter = ', iter, &
1229  ! 'trx = ', trx, ' trx2 = ', trx2, ' vv = ', vv(iter)
1230 
1231  factor = 1.0_dp - 2.0_dp * pp(iter)
1232 
1233  ! X <- X + (X - X * X) <- 2 * X - X * X when pp(iter) == 0
1234  ! X <- X - (X - X * X) <- X * X when pp(iter) == 1
1235  call bml_add_deprecated(1.0_dp, rho_bml, factor, x2_bml, threshold)
1236 
1237  trx = trx + factor * trx2
1238 
1239  end do
1240 
1241  ! X <- 2 * X
1242  call bml_scale(2.0_dp, rho_bml) !D = 2.0_dp*D
1243 
1244  call bml_deallocate(x2_bml)
1245 
1246  end subroutine prg_sp2_submatrix
1247 
1248  !! Perform SP2 algorithm using sequence and calculate norm
1249  !! for a submatrix.
1250  !!
1251  !! \param rho_bml Input Hamiltonian/Output density matrix
1252  !! \param threshold Threshold for sparse matrix algebra
1253  !! \param pp Vector containing sequence of 0s and 1s
1254  !! \param icount Sequence count
1255  !! \param vv Vector of sum of squares per iteration
1256  !! \param mineval Min value used for normalization (optional)
1257  !! \param maxeval Max value used for normalization (optional)
1258  !! \param core_size Number of core rows
1259  subroutine prg_sp2_submatrix_inplace(rho_bml, threshold, pp, icount, vv, &
1260  mineval, maxeval, core_size)
1261 
1262  integer, intent(inout) :: icount
1263  integer, intent(inout) :: pp(:)
1264  integer, intent(in) :: core_size
1265  real(dp), intent(in) :: threshold
1266  real(dp), intent(inout) :: vv(:)
1267  real(dp), intent(in) :: mineval, maxeval
1268  type(bml_matrix_t),intent(inout) :: rho_bml
1269 
1270  integer :: iter
1271  real(dp) :: trx, trx2, factor
1272  type(bml_matrix_t) :: x2_bml
1273 
1274  ! We're also using Niklasson's scheme to determine convergence
1275 
1276  !! Normalize
1277  call bml_normalize(rho_bml, mineval, maxeval)
1278 
1279  trx = bml_trace(rho_bml)
1280 
1281  call bml_copy_new(rho_bml, x2_bml)
1282 
1283  do iter = 1, icount
1284 
1285  ! X2 <- X
1286  ! X2 <- X - X * X
1287  call bml_copy(rho_bml, x2_bml)
1288 
1289  call bml_multiply(rho_bml, rho_bml, x2_bml, -1.0_dp, 1.0_dp, threshold)
1290 
1291  vv(iter) = vv(iter) + bml_sum_squares_submatrix(x2_bml, core_size)
1292 
1293  trx2 = bml_trace(x2_bml)
1294 
1295  factor = 1.0_dp - 2.0_dp * pp(iter)
1296 
1297  ! X <- X + (X - X * X) <- 2 * X - X * X when pp(iter) == 0
1298  ! X <- X - (X - X * X) <- X * X when pp(iter) == 1
1299  call bml_add_deprecated(1.0_dp, rho_bml, factor, x2_bml, threshold)
1300 
1301  trx = trx + factor * trx2
1302 
1303  end do
1304 
1305  ! X <- 2 * X
1306  call bml_scale(2.0_dp, rho_bml) !D = 2.0_dp*D
1307 
1308  call bml_deallocate(x2_bml)
1309 
1310  end subroutine prg_sp2_submatrix_inplace
1311 
1312 end module prg_sp2_mod
The prg_normalize module.
integer, parameter dp
subroutine, public prg_normalize(h_bml)
Normalize a Hamiltonian matrix prior to running the SP2 algorithm.
The parallel module.
subroutine, public prg_sumrealreduce3(value1, value2, value3)
subroutine, public prg_allgatherparallel(a)
integer function, public printrank()
integer function, public getnranks()
integer function, public getmyrank()
subroutine, public prg_sumrealreduce2(value1, value2)
The SP2 module.
Definition: prg_sp2_mod.F90:7
subroutine, public prg_sp2_alg2(h_bml, rho_bml, threshold, bndfil, minsp2iter, maxsp2iter, sp2conv, idemtol, verbose)
subroutine, public prg_sp2_alg2_genseq(h_bml, rho_bml, threshold, bndfil, minsp2iter, maxsp2iter, sp2conv, idemtol, pp, icount, vv, verbose)
subroutine, public prg_sp2_alg1_seq(h_bml, rho_bml, threshold, pp, icount, vv)
subroutine, public prg_sp2_basic_tcore(h_bml, rho_bml, rhofull_bml, threshold, bndfil, minsp2iter, maxsp2iter, sp2conv, idemtol, verbose)
subroutine, public prg_prg_sp2_alg2_seq_inplace(rho_bml, threshold, pp, icount, vv, mineval, maxeval, verbose)
subroutine, public prg_sp2_submatrix(ham_bml, rho_bml, threshold, pp, icount, vv, mineval, maxeval, core_size)
Perform SP2 algorithm using sequence and calculate norm for a submatrix.
subroutine, public prg_sp2_alg2_seq(h_bml, rho_bml, threshold, pp, icount, vv, verbose)
subroutine, public prg_sp2_submatrix_inplace(rho_bml, threshold, pp, icount, vv, mineval, maxeval, core_size)
integer, parameter dp1
Definition: prg_sp2_mod.F90:19
subroutine, public prg_sp2_basic(h_bml, rho_bml, threshold, bndfil, minsp2iter, maxsp2iter, sp2conv, idemtol, verbose)
Calculates the density matrix from a Hamiltonian matrix by purification. The method implemented here ...
Definition: prg_sp2_mod.F90:51
subroutine, public prg_sp2_alg1_genseq(h_bml, rho_bml, threshold, bndfil, minsp2iter, maxsp2iter, sp2conv, idemtol, pp, icount, vv)
subroutine, public prg_prg_sp2_alg1_seq_inplace(rho_bml, threshold, pp, icount, vv, mineval, maxeval)
subroutine, public prg_sp2_alg1(h_bml, rho_bml, threshold, bndfil, minsp2iter, maxsp2iter, sp2conv, idemtol, verbose)
The timer module.