PROGRESS  master
prg_genz_mod.F90
Go to the documentation of this file.
1 
7 
9  use bml
10  use iso_c_binding
13  use prg_extras_mod
14 
15 #ifdef USE_NVTX
16  use prg_nvtx_mod
17 #endif
18 
19  implicit none
20 
21  private !Everything is private by default
22 
23  integer, parameter :: dp = kind(1.0d0)
24 
27 
31  type, public :: genzspinp
32 
34  integer :: verbose
35 
37  integer :: nfirst
38 
40  integer :: nrefi
41 
43  integer :: nreff
44 
46  real(dp) :: numthresi
47 
49  real(dp) :: numthresf
50 
52  logical :: integration
53 
55  integer :: igenz
56 
58  logical :: zsp
59 
61  integer :: mdim
62 
64  character(20) :: bml_type
65 
66  end type genzspinp
67 
68 contains
69 
72  subroutine prg_parse_zsp(input,filename)
73  type(genzspinp), intent(inout) :: input
74  integer, parameter :: nkey_char = 1, nkey_int = 5, nkey_re = 2, nkey_log = 2
75  character(len=*) :: filename
76 
77  !Library of keywords with the respective defaults.
78  character(len=50), parameter :: keyvector_char(nkey_char) = [character(len=100) :: &
79  'BMLType=']
80  character(len=100) :: valvector_char(nkey_char) = [character(len=100) :: &
81  'Dense']
82 
83  character(len=50), parameter :: keyvector_int(nkey_int) = [character(len=50) :: &
84  'Verbose=','NFirst=','NRefI=','NRefF=','MDim=']
85  integer :: valvector_int(nkey_int) = (/ &
86  0,10,3,1,-1 /)
87 
88  character(len=50), parameter :: keyvector_re(nkey_re) = [character(len=50) :: &
89  'NumthreshI=','NumthreshF=' ]
90  real(dp) :: valvector_re(nkey_re) = (/&
91  1.0e-8, 1.0e-5 /)
92 
93  character(len=50), parameter :: keyvector_log(nkey_log) = [character(len=100) :: &
94  'ZSP=','Int=']
95  logical :: valvector_log(nkey_log) = (/&
96  .false., .true. /)
97 
98  !Start and stop characters
99  character(len=50), parameter :: startstop(2) = [character(len=50) :: &
100  'ZSP{', '}']
101 
102  call prg_parsing_kernel(keyvector_char,valvector_char&
103  ,keyvector_int,valvector_int,keyvector_re,valvector_re,&
104  keyvector_log,valvector_log,trim(filename),startstop)
105 
106  !Characters
107  if(valvector_char(1) == "Dense")then
108  input%bml_type = bml_matrix_dense
109  elseif(valvector_char(1) == "Ellpack")then
110  input%bml_type = bml_matrix_ellpack
111  elseif(valvector_char(1) == "Ellblock")then
112  input%bml_type = bml_matrix_ellblock
113  endif
114 
115  !Integers
116  input%verbose = valvector_int(1)
117  input%nfirst = valvector_int(2)
118  input%nrefi = valvector_int(3)
119  input%nreff = valvector_int(4)
120  input%mdim = valvector_int(5)
121 
122  !Logicals
123  input%zsp = valvector_log(1)
124  input%integration = valvector_log(2)
125 
126  !Reals
127  input%numthresi = valvector_re(1)
128  input%numthresf = valvector_re(2)
129 
130  end subroutine prg_parse_zsp
131 
138  subroutine prg_init_zspmat(igenz,zk1_bml,zk2_bml,zk3_bml&
139  ,zk4_bml,zk5_bml,zk6_bml,norb,bml_type,bml_element_type)
140  integer :: norb, igenz
141  character(20) :: bml_type, my_bml_element_type
142  character(20), optional :: bml_element_type
143  type(bml_matrix_t) :: zk1_bml
144  type(bml_matrix_t) :: zk2_bml
145  type(bml_matrix_t) :: zk3_bml
146  type(bml_matrix_t) :: zk4_bml
147  type(bml_matrix_t) :: zk5_bml
148  type(bml_matrix_t) :: zk6_bml
149 
150  if(present(bml_element_type))then
151  my_bml_element_type = bml_element_type
152  else
153  my_bml_element_type = bml_element_real
154  endif
155 
156  igenz = 0
157 
158  if(bml_get_n(zk1_bml).le.0)then
159  call bml_zero_matrix(bml_type,my_bml_element_type,dp,norb,norb,zk1_bml)
160  call bml_zero_matrix(bml_type,my_bml_element_type,dp,norb,norb,zk2_bml)
161  call bml_zero_matrix(bml_type,my_bml_element_type,dp,norb,norb,zk3_bml)
162  call bml_zero_matrix(bml_type,my_bml_element_type,dp,norb,norb,zk4_bml)
163  call bml_zero_matrix(bml_type,my_bml_element_type,dp,norb,norb,zk5_bml)
164  call bml_zero_matrix(bml_type,my_bml_element_type,dp,norb,norb,zk6_bml)
165  endif
166 
167  end subroutine prg_init_zspmat
168 
181  subroutine prg_buildzdiag(smat_bml,zmat_bml,threshold,mdimin,bml_type,verbose,err_out)
182  ! use extras
183  real(dp) :: err_check
184  character(len=*), intent(in) :: bml_type
185  character(20) :: bml_element_type
186  integer :: i, j, mdim, norb
187  integer, intent(in) :: mdimin
188  integer, optional, intent(in) :: verbose
189  logical, optional, intent(inout) :: err_out
190  logical :: zeroeval
191  real(dp) :: mls_i
192  real(dp) :: invsqrt, threshold
193  real(dp), allocatable :: nono_evals(:), nonotmp(:,:), smat(:,:), umat(:,:)
194  real(dp), allocatable :: zmat(:,:)
195  type(bml_matrix_t) :: nonotmp_bml, saux_bml, umat_bml, umat_t_bml
196  type(bml_matrix_t) :: zmat_bml
197  type(bml_matrix_t), intent(inout) :: smat_bml
198 #ifdef USE_OFFLOAD
199  type(c_ptr) :: nonotmp_bml_c_ptr, umat_bml_c_ptr
200  integer :: ld
201  real(c_double), pointer :: nonotmp_bml_ptr(:,:), umat_bml_ptr(:,:)
202 #endif
203 
204 
205  if(present(verbose))then
206  if(verbose >= 1)then
207  write(*,*)""; write(*,*)"In buildzdiag ..."
208  endif
209  endif
210 
211  if(bml_get_precision(smat_bml) == 1 .or.&
212  &bml_get_precision(smat_bml) == 2)then
213  bml_element_type = "real"
214  elseif(bml_get_precision(smat_bml) == 3 .or.&
215  &bml_get_precision(smat_bml) == 4)then
216  bml_element_type = "complex"
217  endif
218 
219  norb = bml_get_n(smat_bml)
220  mdim = bml_get_m(smat_bml)
221 
222  !Allocate temporary matrices.
223  allocate(nono_evals(norb))
224 
225  if(.not. bml_allocated(zmat_bml))then
226  call bml_zero_matrix(bml_matrix_dense,bml_element_type,dp,norb,norb,zmat_bml)
227  endif
228 
229  !Auxiliary matrices.
230  call bml_zero_matrix(bml_type,bml_element_type,dp,norb,norb,umat_bml)
231  call bml_zero_matrix(bml_type,bml_element_type,dp,norb,norb,nonotmp_bml)
232 
233  !Eigenvectors and eigenalues of the overlap s.
234  mls_i = mls()
235  call bml_diagonalize(smat_bml,nono_evals,umat_bml)
236 
237  if(present(verbose))then
238  if(verbose >= 1)then
239  write(*,*)"Time for S diag = "//to_string(mls() - mls_i)//" ms"
240  endif
241  endif
242  if(any(nono_evals.le.0.0_dp))then
243  if(present(verbose))then
244  if(present(err_out))then
245  err_out = .true.
246  else
247  stop "ERROR at prg_buildZdiag: Overlap matrix is not possitive definite..."
248  endif
249  else
250  stop "ERROR at prg_buildZdiag: Overlap matrix is not possitive definite..."
251  endif
252  endif
253 #ifdef USE_OFFLOAD
254  umat_bml_c_ptr = bml_get_data_ptr_dense(umat_bml)
255  nonotmp_bml_c_ptr = bml_get_data_ptr_dense(nonotmp_bml)
256  ld = bml_get_ld_dense(umat_bml)
257  call c_f_pointer(umat_bml_c_ptr,umat_bml_ptr,shape=[ld,norb])
258  call c_f_pointer(nonotmp_bml_c_ptr,nonotmp_bml_ptr,shape=[ld,norb])
259  !$acc enter data copyin(nono_evals(1:norb))
260  !$acc parallel loop collapse(2) deviceptr(umat_bml_ptr,nonotmp_bml_ptr) &
261  !$acc present(nono_evals)
262  do i = 1, norb
263  do j = 1, norb
264  nonotmp_bml_ptr(j,i) = umat_bml_ptr(i,j) / sqrt(nono_evals(i))
265  end do
266  end do
267  !$acc end loop
268  !$acc exit data &
269  !$acc delete(nono_evals(1:norb))
270 #else
271  allocate(umat(norb,norb))
272  allocate(nonotmp(norb,norb))
273 
274  call bml_export_to_dense(umat_bml, umat)
275 
276  nonotmp = 0.0_dp
277  zeroeval = .false.
278 
279  !Doing s^-1/2 u^t
280 
281  !$omp parallel do default(none) &
282  !$omp shared(norb,nono_evals,umat,nonotmp,zeroEval) &
283  !$omp private(i,j,invsqrt)
284  do i = 1, norb
285  invsqrt = 1.0_dp/sqrt(nono_evals(i))
286  do j = 1, norb
287  nonotmp(i,j) = umat(j,i) * invsqrt
288  end do
289  end do
290  !$omp end parallel do
291  call bml_import_from_dense(bml_type, nonotmp, nonotmp_bml, threshold, mdim)
292  deallocate(nonotmp)
293  deallocate(umat)
294 #endif
295 
296  !Doing u s^-1/2 u^t
297  call bml_multiply(umat_bml,nonotmp_bml,zmat_bml,1.0_dp,0.0_dp,threshold)
298 
299  deallocate(nono_evals)
300  call bml_deallocate(umat_bml)
301  call bml_deallocate(nonotmp_bml)
302 
303 
304  end subroutine prg_buildzdiag
305 
318  subroutine prg_buildzsparse(smat_bml,zmat_bml,igenz,mdim,&
319  bml_type,zk1_bml,zk2_bml,zk3_bml&
320  ,zk4_bml,zk5_bml,zk6_bml,nfirst,nrefi,nreff&
321  ,thresholdi,thresholdf,integration,verbose)
322  !use extras
323  !real(dp) :: err_check
324  !real(dp), allocatable :: smat(:,:), zmat(:,:)
325  character(20) :: bml_type
326  integer :: kk, i, igenz, mdim
327  integer :: nfirst, norb, nref, nreff
328  integer :: nrefi, verbose
329  logical :: integration
330  real(dp) :: sec_i, sec_ii
331  real(dp) :: threshold, thresholdf, thresholdi
332  type(bml_matrix_t) :: smat_bml, zk1_bml, zk2_bml, zk3_bml
333  type(bml_matrix_t) :: zk4_bml, zk5_bml, zk6_bml, zmat_bml
334 
335  norb = bml_get_n(smat_bml)
336  if(mdim<0) mdim = norb
337 
338  kk=6 !Total number of stored z matrices for the xl integration scheme.
339 
340  if(verbose.eq.1)sec_ii=mls() !Gets the actual time in ms.
341 
342  ! Number of refinements and threshold value for the first "nfirst" iterations.
343  if(igenz.lt.nfirst)then
344  nref=nrefi; threshold = thresholdi !For the firsts iterations.
345  else
346  nref=nreff; threshold = thresholdf !For the following iterations.
347  end if
348 
349  if(verbose.eq.1)sec_i=mls() !Firs calculation of z using the graph approach.
350  if(igenz.eq.1) call prg_genz_sp_initialz0(smat_bml,zmat_bml,norb,mdim,bml_type,threshold)
351  if(verbose.eq.1)write(*,*)"Time for prg_initial estimate "//to_string(mls()-sec_i)//" ms"
352 
353  if(verbose.eq.1)sec_i=mls()! integration scheme.
354  if(integration)then
355  call prg_genz_sp_int(zmat_bml,zk1_bml,zk2_bml,zk3_bml&
356  &,zk4_bml,zk5_bml,zk6_bml,igenz,norb,bml_type,threshold)
357  end if
358  if(verbose.eq.1)write(*,*)"Time for xl scheme "//to_string(mls()-sec_i)//" ms"
359 
360  if(verbose.eq.1)sec_i=mls()! Refinement.
361  call prg_genz_sp_ref(smat_bml,zmat_bml,nref,mdim,bml_type,threshold)
362  if(verbose.eq.1)write(*,*)"Time for prg_genz_sp_ref "//to_string(mls()-sec_i)//" ms"
363 
364  ! call bml_export_to_dense()
365  ! call prg_delta(xmat,smat,norb,err_check) !to check for the accuracy of the approximation (prg_delta)
366  ! write(*,*)"err", err_check, norb
367  ! stop
368 
369  if(verbose.eq.1)write(*,*)"Time for prg_buildZsparse "//to_string(igenz)//" "//to_string(mls()-sec_ii)//" ms"
370 
371  end subroutine prg_buildzsparse
372 
384  subroutine prg_genz_sp_initialz0(smat_bml,zmat_bml,norb,mdim,bml_type_f,threshold)
385  character(20) :: bml_type, bml_type_f, bml_element_type
386  integer :: i, ii, j, jj
387  integer :: k, l, mdim, norb
388  integer, allocatable :: kk(:)
389  real(dp) :: err_check, invsqrt, threshold, thresholdz0
390  real(dp), allocatable :: sitmp(:,:), smat(:,:), sthres(:,:), stmp(:,:)
391  real(dp), allocatable :: stmp_evals(:), utmp(:,:), zmat(:,:), ztmp(:,:)
392  type(bml_matrix_t) :: sitmp_bml, sthres_bml, stmp_bml, utmp_bml
393  type(bml_matrix_t) :: xtmp_bml, zmat_bml
394  type(bml_matrix_t), intent(in) :: smat_bml
395 
396  !When this subroutine is parallelized, the following threshold can be set to larger (coarser) values.
397  !The reason for this is to reduce the memory per thread.
398  thresholdz0= 1.0d-10 !This could be passed as an input.
399 
400  bml_type= bml_matrix_dense !All the operations are performed in bml_dense.
401  ! bml_element_type = bml_get_element_type(smat_bml)
402  if(bml_get_precision(smat_bml) == 1 .or.&
403  &bml_get_precision(smat_bml) == 2)then
404  bml_element_type = "real"
405  elseif(bml_get_precision(smat_bml) == 3 .or.&
406  &bml_get_precision(smat_bml) == 4)then
407  bml_element_type = "complex"
408  endif
409 
410  !Part of the operations are still done in pure dense format.
411  allocate(zmat(norb,norb))
412  zmat = 0.0_dp
413 
414  allocate(sthres(norb,norb))
415 
416  allocate(smat(norb,norb))
417  call bml_export_to_dense(smat_bml,smat)
418 
419  call bml_zero_matrix(bml_type,bml_element_type,dp,norb,norb,sthres_bml) !a thresholded version of s
420 
421  call bml_import_from_dense(bml_type,smat,sthres_bml,thresholdz0,mdim)
422 
423  call bml_export_to_dense(sthres_bml,sthres) !We will use the dense to extract the small blocks.
424 
425  allocate(kk(norb)) !The nonzero positions in the row
426 
427  !NOTE: The following loop must be OMP parallelized as parallelization is trivial, however, as BML
428  !operations are taking all the threads this is not possible. BML needs to recognize automatically
429  !when "threads" are requested by the hosting code.
430  do i = 1, norb !Z0 is the prg_initial guess for the iterative refinement.
431 
432  jj=0
433  kk=0
434  do j=1,norb
435  if(abs(sthres(i,j)).gt.thresholdz0)then
436  jj=jj+1
437  kk(jj)=j
438  endif
439  enddo
440  k=jj
441 
442  !The followings will be dense matrices since they are the small blocks.
443  call bml_zero_matrix(bml_type,bml_element_type,dp, k,k,stmp_bml)
444  call bml_zero_matrix(bml_type,bml_element_type,dp, k,k,sitmp_bml)
445  call bml_zero_matrix(bml_type,bml_element_type,dp, k,k,utmp_bml)
446  call bml_zero_matrix(bml_type,bml_element_type,dp, k,k,xtmp_bml)
447 
448  allocate(stmp(k,k),sitmp(k,k))
449  allocate(utmp(k,k),stmp_evals(k),ztmp(k,k))
450 
451  stmp = 0.0_dp !Small dense block to be extracted from s.
452  stmp_evals = 0.0_dp !eigenvalues for the small dense s matrices.
453  sitmp = 0.0_dp
454  utmp = 0.0_dp !Matrix containing the eigenvectors of small s
455  ztmp = 0.0_dp
456 
457  do j = 1, k !Extracting the small s matrices
458  do l = 1, k
459  stmp(l,j) = smat(kk(l),kk(j))
460  end do
461  end do
462 
463  call bml_import_from_dense(bml_type, stmp, stmp_bml)
464  call bml_diagonalize(stmp_bml,stmp_evals,utmp_bml)
465  call bml_export_to_dense(utmp_bml,utmp)
466 
467  do j = 1, k !Applying the inverse sqrt function to the eigenvalues.
468  invsqrt = 1.0_dp/sqrt(stmp_evals(j))
469  do l = 1, k
470  sitmp(l,j) = utmp(l,j) * invsqrt
471  end do
472  end do
473 
474  utmp=transpose(utmp)
475 
476  call bml_import_from_dense(bml_type , utmp , utmp_bml)
477  call bml_import_from_dense(bml_type, sitmp, sitmp_bml)
478  call bml_multiply(sitmp_bml,utmp_bml,xtmp_bml,1.0_dp,0.0_dp)
479  call bml_export_to_dense(xtmp_bml,ztmp)
480 
481  do l = 1, k !Reconstructing the large Z0 based in the small Z.
482  do j = 1, k
483  if(kk(j).eq.i)then !For the row corresponding to the extracted dense block.
484  zmat(i,kk(l)) = ztmp(j,l)
485  end if
486  end do
487  end do
488 
489  deallocate(stmp,sitmp) !Deallocate temporary matrices
490  deallocate(utmp)
491  deallocate(stmp_evals)
492  deallocate(ztmp)
493  call bml_deallocate(sitmp_bml)
494  call bml_deallocate(stmp_bml)
495  call bml_deallocate(utmp_bml)
496 
497  end do
498 
499  call bml_zero_matrix(bml_type_f,bml_element_type,dp,norb,norb,zmat_bml)
500  call bml_import_from_dense(bml_type_f,zmat, zmat_bml,threshold,mdim) !Converting to bml format
501 
502  ! call prg_delta(zmat,smat,norb,err_check) !to check for the accuracy of the approximation (prg_delta)
503  ! call sparsity(smat,norb,spa)
504  ! write(*,*)"err", err_check, norb
505  ! stop
506 
507  deallocate(sthres)
508  deallocate(kk)
509  call bml_deallocate(sthres_bml)
510 
511  end subroutine prg_genz_sp_initialz0
512 
524  subroutine prg_genz_sp_initial_zmat(smat_bml,zmat_bml,norb,mdim,bml_type_f,threshold)
525  ! use extras
526  implicit none
527  character(20) :: bml_type, bml_type_f, bml_element_type
528  integer :: i, ii, j, jj
529  integer :: k, l, mdim, norb
530  integer, allocatable :: kk(:)
531  real(dp), intent(in) :: threshold
532  real(dp) :: err_check, invsqrt
533  real(dp) :: thresholdz0
534  real(dp), allocatable :: sitmp(:,:), smat(:,:), sthres(:,:)
535  real(dp), allocatable :: stmp(:,:)
536  real(dp), allocatable :: stmp_evals(:), utmp(:,:)
537  real(dp), allocatable :: zmat(:,:), ztmp(:,:)
538  type(bml_matrix_t) :: sitmp_bml, sthres_bml, stmp_bml
539  type(bml_matrix_t) :: xtmp_bml, zmat_bml, utmp_bml
540  type(bml_matrix_t), intent(in) :: smat_bml
541 
542  !When this subroutine is parallelized, the following threshold can be set to
543  !larger (coarser) values.
544  !The reason for this is to reduce the memory per thread.
545  !thresholdz0= 1.0d-10 !This could be passed as an input.
546 
547  bml_type= bml_matrix_dense !All the operations are performed in bml_dense.
548  ! bml_element_type = bml_get_element_type(smat_bml)
549  if(bml_get_precision(smat_bml) == 1 .or.&
550  &bml_get_precision(smat_bml) == 2)then
551  bml_element_type = "real"
552  elseif(bml_get_precision(smat_bml) == 3 .or.&
553  &bml_get_precision(smat_bml) == 4)then
554  bml_element_type = "complex"
555  endif
556 
557  !Part of the operations are still done in pure dense format.
558  allocate(zmat(norb,norb))
559  allocate(sthres(norb,norb))
560 
561  allocate(smat(norb,norb))
562  call bml_export_to_dense(smat_bml,smat)
563 
564  ! a thresholded version of s
565  call bml_zero_matrix(bml_type,bml_element_type,dp,norb,norb,sthres_bml)
566 
567  !call bml_import_from_dense(bml_type,smat,sthres_bml,thresholdZ0,mdim)
568  call bml_import_from_dense(bml_type,smat,sthres_bml,threshold,mdim)
569 
570  ! We will use the dense to extract the small blocks.
571  call bml_export_to_dense(sthres_bml,sthres)
572 
573  allocate(kk(norb)) !The nonzero positions in the row
574 
575  !NOTE: The following loop must be OMP parallelized as parallelization is
576  !trivial, however, as BML
577  !operations are taking all the threads this is not possible. BML needs to
578  !recognize automatically
579  !when "threads" are requested by the hosting code.
580  do i = 1, norb !Z0 is the prg_initial guess for the iterative refinement.
581  !do i = bml_getLocalRowMin(smat_bml, getMyRank()), &
582  ! bml_getLocalRowMax(smat_bml, getMyRank())
583 
584  jj=0
585  kk=0
586  do j=1,norb
587  !if(abs(sthres(i,j)).gt.thresholdZ0)then
588  if(abs(sthres(i,j)).gt.threshold)then
589  jj=jj+1
590  kk(jj)=j
591  endif
592  enddo
593  k=jj
594 
595  !The followings will be dense matrices since they are the small blocks.
596  call bml_zero_matrix(bml_type,bml_element_type,dp, k,k,stmp_bml)
597  call bml_zero_matrix(bml_type,bml_element_type,dp, k,k,sitmp_bml)
598  call bml_zero_matrix(bml_type,bml_element_type,dp, k,k,utmp_bml)
599  call bml_zero_matrix(bml_type,bml_element_type,dp, k,k,xtmp_bml)
600 
601  allocate(stmp(k,k),sitmp(k,k))
602  allocate(utmp(k,k),stmp_evals(k),ztmp(k,k))
603 
604  stmp = 0.0_dp !Small dense block to be extracted from s.
605  stmp_evals = 0.0_dp !eigenvalues for the small dense s matrices.
606  sitmp = 0.0_dp
607  utmp = 0.0_dp !Matrix containing the eigenvectors of small s
608  ztmp = 0.0_dp
609 
610  do j = 1, k !Extracting the small s matrices
611  do l = 1, k
612  stmp(l,j) = smat(kk(l),kk(j))
613  end do
614  end do
615 
616  call bml_import_from_dense(bml_matrix_dense, stmp, stmp_bml)
617  call bml_diagonalize(stmp_bml,stmp_evals,utmp_bml)
618  call bml_export_to_dense(utmp_bml,utmp)
619 
620  do j = 1, k !Applying the inverse sqrt function to the eigenvalues.
621  invsqrt = 1.0_dp/sqrt(stmp_evals(j))
622  do l = 1, k
623  sitmp(l,j) = utmp(l,j) * invsqrt
624  end do
625  end do
626 
627  utmp=transpose(utmp)
628 
629  call bml_import_from_dense(bml_type, utmp, utmp_bml)
630  call bml_import_from_dense(bml_type, sitmp, sitmp_bml)
631  call bml_multiply(sitmp_bml,utmp_bml,xtmp_bml,1.0_dp,0.0_dp)
632  call bml_export_to_dense(xtmp_bml,ztmp)
633 
634  do l = 1, k !Reconstructing the large Z0 based in the small Z.
635  do j = 1, k
636  if(kk(j).eq.i)then !For the row corresponding to the extracted dense
637  !block.
638  zmat(i,kk(l)) = ztmp(j,l)
639  end if
640  end do
641  end do
642 
643  deallocate(stmp,sitmp) !Deallocate temporary matrices
644  deallocate(utmp)
645  deallocate(stmp_evals)
646  deallocate(ztmp)
647  call bml_deallocate(sitmp_bml)
648  call bml_deallocate(stmp_bml)
649  call bml_deallocate(utmp_bml)
650 
651  end do
652 
653  !call bml_zero_matrix(bml_type_f,bml_element_type,bml_element_precision,norb,norb,zmat_bml)
654  call bml_import_from_dense(bml_type_f,zmat,zmat_bml,threshold,mdim, &
655  bml_get_distribution_mode(smat_bml))
656  !Converting to bml format
657 
658  ! call prg_delta(zmat,smat,norb,err_check) !to check for the accuracy of
659  ! the approximation (prg_delta)
660  ! call sparsity(smat,norb,spa)
661  ! write(*,*)"err", err_check, norb
662  ! stop
663  deallocate(sthres)
664  deallocate(kk)
665  call bml_deallocate(sthres_bml)
666 
667  end subroutine prg_genz_sp_initial_zmat
668 
679  subroutine prg_genz_sp_int(zmat_bml,zk1_bml,zk2_bml,zk3_bml&
680  &,zk4_bml,zk5_bml,zk6_bml,igenz,norb,bml_type&
681  &,threshold)
682  integer :: igenz,norb,KK
683  character(20) :: bml_element_type
684  real(dp) :: alpha, kappa, c0, c1, c2, c3, c4, c5
685  real(dp) :: threshold
686  type(bml_matrix_t) :: zmat_bml
687  type(bml_matrix_t) :: zk1_bml,zk2_bml,zk3_bml,zk4_bml,zk5_bml,zk6_bml
688  character(20) :: bml_type
689 
690  kk=6
691  alpha=0.0180_dp
692  kappa=1.82_dp
693 
694  ! bml_element_type = bml_get_element_type(zmat_bml)
695  if(bml_get_precision(zmat_bml) == 1 .or.&
696  &bml_get_precision(zmat_bml) == 2)then
697  bml_element_type = "real"
698  elseif(bml_get_precision(zmat_bml) == 3 .or.&
699  &bml_get_precision(zmat_bml) == 4)then
700  bml_element_type = "complex"
701  endif
702 
703  !The following constants are the original constants premultiplied by alpha.
704 
705  c0=-0.1080_dp
706  c1=0.2520_dp
707  c2=-0.1440_dp
708  c3=-0.0540_dp
709  c4=0.0720_dp
710  c5=-0.0180_dp
711 
712 
713  if(igenz.eq.kk)then
714 
715  call bml_zero_matrix(bml_type,bml_element_type,dp,norb ,norb,zk1_bml)
716  call bml_zero_matrix(bml_type,bml_element_type,dp,norb ,norb,zk2_bml)
717  call bml_zero_matrix(bml_type,bml_element_type,dp,norb ,norb,zk3_bml)
718  call bml_zero_matrix(bml_type,bml_element_type,dp,norb ,norb,zk4_bml)
719  call bml_zero_matrix(bml_type,bml_element_type,dp,norb ,norb,zk5_bml)
720  call bml_zero_matrix(bml_type,bml_element_type,dp,norb ,norb,zk6_bml)
721 
722  call bml_copy(zmat_bml,zk1_bml);call bml_copy(zmat_bml,zk2_bml);call bml_copy(zmat_bml,zk3_bml)
723  call bml_copy(zmat_bml,zk4_bml);call bml_copy(zmat_bml,zk5_bml);call bml_copy(zmat_bml,zk6_bml)
724 
725  end if
726 
727  if(igenz.ge.kk)then !Here we change z by applying the t-r integration scheme
728 
729  call bml_add_deprecated(1.0_dp,zmat_bml,-1.0_dp,zk6_bml,threshold) !Z(t)-Z~(t)
730  call bml_scale(kappa,zmat_bml)
731  call bml_add_deprecated(1.0_dp,zmat_bml,2.0_dp,zk6_bml,threshold) !Z(t)+2Z~(t)
732  call bml_add_deprecated(1.0_dp,zmat_bml,-1.0_dp,zk5_bml,threshold) !Z(t)-Z~(t-dt)
733 
734  !Dissipation force term:
735  call bml_add_deprecated(1.0_dp,zmat_bml,c0,zk6_bml,threshold) !Z(t)+c0*Z~(t)
736  call bml_add_deprecated(1.0_dp,zmat_bml,c1,zk5_bml,threshold)
737  call bml_add_deprecated(1.0_dp,zmat_bml,c2,zk4_bml,threshold)
738  call bml_add_deprecated(1.0_dp,zmat_bml,c3,zk3_bml,threshold)
739  call bml_add_deprecated(1.0_dp,zmat_bml,c4,zk2_bml,threshold)
740  call bml_add_deprecated(1.0_dp,zmat_bml,c5,zk1_bml,threshold) !Z(t)+c0*Z~(t-5*dt)
741 
742  end if
743 
744  if(igenz.ge.kk)then !Here we are shifting the z matrices.
745 
746  call bml_copy(zk2_bml,zk1_bml) !Z~(t-5*dt)=Z~(t-4*dt)
747  call bml_copy(zk3_bml,zk2_bml) !Z~(t-4*dt)=Z~(t-3*dt)
748  call bml_copy(zk4_bml,zk3_bml)
749  call bml_copy(zk5_bml,zk4_bml)
750  call bml_copy(zk6_bml,zk5_bml) !Z~(t-dt)=Z~(t)
751  call bml_copy(zmat_bml,zk6_bml) !Z~(t)=Z~(t+dt)
752 
753  end if
754 
755  end subroutine prg_genz_sp_int
756 
765  subroutine prg_genz_sp_ref(smat_bml,zmat_bml,nref,norb,bml_type,threshold)
766  integer :: k
767  integer, intent(inout) :: norb
768  integer, intent(in) :: nref
769  real(dp) :: err_check, sec_i
770  real(dp), allocatable :: smat(:,:), zmat(:,:)
771  real(dp),intent(in) :: threshold
772  type(bml_matrix_t) :: temp_bml
773  type(bml_matrix_t) :: temp1_bml
774  type(bml_matrix_t) :: temp2_bml
775  type(bml_matrix_t) :: idscaled_bml
776  type(bml_matrix_t) :: xmat_t_bml
777  type(bml_matrix_t),intent(inout) :: zmat_bml
778  type(bml_matrix_t) :: aux_bml
779  type(bml_matrix_t), intent(in) :: smat_bml
780  character(20),intent(in) :: bml_type
781  character(20) :: bml_element_type
782 
783  norb = bml_get_n(smat_bml)
784 
785  ! bml_element_type = bml_get_element_type(smat_bml)
786  if(bml_get_precision(smat_bml) == 1 .or.&
787  &bml_get_precision(smat_bml) == 2)then
788  bml_element_type = "real"
789  elseif(bml_get_precision(smat_bml) == 3 .or.&
790  &bml_get_precision(smat_bml) == 4)then
791  bml_element_type = "complex"
792  endif
793 
794  call bml_zero_matrix(bml_type,bml_element_type,dp,norb,norb,idscaled_bml)
795 
796  call bml_add_identity(idscaled_bml, 1.0_dp, threshold) !1.0 [0] + 1.0 I
797  call bml_scale(1.8750_dp,idscaled_bml) ! 1.875*I
798 
799  call bml_noinit_matrix(bml_type,bml_element_type,dp,norb ,norb,temp_bml)
800  call bml_noinit_matrix(bml_type,bml_element_type,dp,norb ,norb,temp2_bml)
801 
802  sec_i=mls() !Firs calculation of z using the graph approach.
803  do k = 1, nref !Iterative refinement
804 
805  !Enforcing symmetry (in bml).
806  call bml_transpose_new(zmat_bml, xmat_t_bml) !Z^t
807  call bml_add_deprecated(0.50_dp,zmat_bml, 0.50_dp, xmat_t_bml,threshold) !(Z^t+Z)/2
808 
809  call bml_multiply(smat_bml,zmat_bml,temp_bml, 1.0_dp, 0.0_dp,threshold) !S*Z
810 
811  call bml_multiply(zmat_bml,temp_bml, temp2_bml, 1.0_dp, 0.0_dp,threshold) !X = Z^t*S*Z
812 
813  call bml_multiply(temp2_bml, temp2_bml, temp_bml, 1.0_dp, 0.0_dp,threshold) !X*X
814 
815  call bml_scale(0.3750_dp, temp_bml)
816  call bml_scale(-1.250_dp, temp2_bml)
817 
818  !Temp = 1.875*I - 1.25*X + 0.375*X^2
819  call bml_add_deprecated(1.0_dp,temp2_bml,1.0_dp, idscaled_bml,threshold)
820  call bml_add_deprecated(1.0_dp,temp_bml,1.0_dp, temp2_bml,threshold)
821 
822  call bml_multiply(zmat_bml,temp_bml,temp2_bml, 1.0_dp, 0.0_dp,threshold) !Z*Temp
823 
824  call bml_copy(temp2_bml,zmat_bml)
825 
826  end do
827  call bml_deallocate(temp_bml)
828  call bml_deallocate(temp1_bml)
829  call bml_deallocate(temp2_bml)
830 
831  write(*,*)"Time for ref loop "//to_string(mls()-sec_i)//" ms"
832 
833 
834  call bml_deallocate(idscaled_bml)
835  call bml_deallocate(xmat_t_bml)
836 
837  end subroutine prg_genz_sp_ref
838 
839 end module prg_genz_mod
Extra routines.
real(dp) function, public mls()
To get the actual time in milliseconds.
integer, parameter dp
To produce a matrix which is needed to orthogonalize .
Definition: prg_genz_mod.F90:6
subroutine, public prg_buildzsparse(smat_bml, zmat_bml, igenz, mdim, bml_type, zk1_bml, zk2_bml, zk3_bml, zk4_bml, zk5_bml, zk6_bml, nfirst, nrefi, nreff, thresholdi, thresholdf, integration, verbose)
Inverse factorization using Niklasson's algorithm.
subroutine, public prg_genz_sp_initial_zmat(smat_bml, zmat_bml, norb, mdim, bml_type_f, threshold)
Initial estimation of Z.
subroutine, public prg_genz_sp_ref(smat_bml, zmat_bml, nref, norb, bml_type, threshold)
Iterative refinement.
subroutine, public prg_parse_zsp(input, filename)
The parser for genz solver.
subroutine, public prg_buildzdiag(smat_bml, zmat_bml, threshold, mdimin, bml_type, verbose, err_out)
Usual subroutine involving diagonalization. , where = eigenvectors and = eigenvalues....
subroutine prg_genz_sp_int(zmat_bml, zk1_bml, zk2_bml, zk3_bml, zk4_bml, zk5_bml, zk6_bml, igenz, norb, bml_type, threshold)
Inverse factorization using Niklasson's algorithm.
subroutine, public prg_genz_sp_initialz0(smat_bml, zmat_bml, norb, mdim, bml_type_f, threshold)
Initial estimation of Z.
subroutine, public prg_init_zspmat(igenz, zk1_bml, zk2_bml, zk3_bml, zk4_bml, zk5_bml, zk6_bml, norb, bml_type, bml_element_type)
Initiates the matrices for the Xl integration of Z.
Some general parsing functions.
subroutine, public prg_parsing_kernel(keyvector_char, valvector_char, keyvector_int, valvector_int, keyvector_re, valvector_re, keyvector_log, valvector_log, filename, startstop)
The general parsing function. It is used to vectorize a set of "keywords" "value" pairs as included i...
Module to handle input output files for the PROGRESS lib.
The parallel module.
Input for the genz driver. This type controlls all the variables that are needed by genz.