_static/doxy/prg__implicit__fermi__mod_8F90_source.html

 ! The Implicit Recursive Fermi O(N) module.

 !! \ingroup PROGRESS

 !! \brief Here are subroutines implementing Niklasson's implicit recursive fermi dirac exact

 !! density matrix purification algorithm.

 !!

 module prg_implicit_fermi_mod


   use omp_lib

   use bml

   use prg_normalize_mod

   use prg_densitymatrix_mod

   use prg_timer_mod

   use prg_parallel_mod

   use prg_ewald_mod


   implicit none


   private  !Everything is private by default


   integer, parameter :: dp = kind(1.0d0)


   public :: prg_implicit_fermi

   public :: prg_implicit_fermi_save_inverse

   public :: prg_implicit_fermi_zero

   public :: prg_test_density_matrix

   public :: prg_implicit_fermi_response

   public :: prg_implicit_fermi_first_order_response

   public :: prg_finite_diff


 contains


   subroutine prg_implicit_fermi_save_inverse(Inv_bml, h_bml, p_bml, nsteps, nocc, &

        mu, beta, occErrLimit, threshold, tol,SCF_IT, occiter, totns)


     implicit none


     type(bml_matrix_t), intent(in) :: h_bml

     type(bml_matrix_t), intent(inout) :: p_bml, inv_bml(nsteps)

     integer, intent(in) :: nsteps, scf_it

     real(dp), intent(in) :: nocc, threshold

     real(dp), intent(in) :: tol

     real(dp), intent(in) :: occerrlimit, beta

     real(dp), intent(inout) :: mu

     integer, intent(inout) :: occiter, totns


     type(bml_matrix_t) :: w_bml, y_bml, d_bml, aux_bml, p2_bml, i_bml, ai_bml

     real(dp) :: trdpdmu, trp0, occerr, alpha, newerr

     real(dp) :: cnst, ofactor, mustep, preverr

     real(dp), allocatable :: trace(:), gbnd(:)

     character(20) :: bml_type

     integer :: n, m, i, iter, muadj, prev, maxiter, nsiter, nsdm


     bml_type = bml_get_type(h_bml)

     n = bml_get_n(h_bml)

     m = bml_get_m(h_bml)


     allocate(trace(2))

     call bml_zero_matrix(bml_type, bml_element_real, dp, n, m, p2_bml)

     call bml_zero_matrix(bml_type, bml_element_real, dp, n, m, d_bml)

     call bml_zero_matrix(bml_type, bml_element_real, dp, n, m, w_bml)

     call bml_zero_matrix(bml_type, bml_element_real, dp, n, m, aux_bml)

     call bml_zero_matrix(bml_type, bml_element_real, dp, n, m, y_bml)

     call bml_identity_matrix(bml_type, bml_element_real, dp, n, m, i_bml)

     call bml_identity_matrix(bml_type, bml_element_real, dp, n, m, ai_bml)


     occerr = 10.0_dp

     newerr = 1000_dp

     preverr = 1000_dp

     alpha = 1.0_dp

     prev = 1

     iter = 0

     maxiter = 30

     cnst = beta/(1.0_dp*2**(nsteps+2))


     if (scf_it .eq. 1) then

       alpha = 4.0_dp

       ! Normalization

       ! P0 = 0.5*I - cnst*(H0-mu0*I)

       call bml_copy(h_bml, p_bml)

       call prg_normalize_implicit_fermi(p_bml, cnst, mu)

       ! Generate good starting guess for (2*(P2-P)+1)^-1 using conjugate gradient

       call bml_multiply_x2(p_bml, p2_bml, threshold, trace)

       ! Y = 2*(P2-P) + II

       call bml_copy(p2_bml, y_bml)

       call bml_add(y_bml, p_bml, 1.0_dp, -1.0_dp, threshold)

       call bml_scale_add_identity(y_bml, 2.0_dp, 1.0_dp, threshold)

       call prg_conjgrad(y_bml, ai_bml, i_bml, aux_bml, d_bml, w_bml, 0.0001_dp, threshold)

       do i = 1, nsteps

         call bml_copy(i_bml, inv_bml(i))

       enddo

     end if


     do while ((occerr .gt. occerrlimit .or. muadj .eq. 1) .and. iter < maxiter)

       iter = iter + 1

       muadj = 0

       write(*,*) 'mu =', mu

       ! Normalization

       ! P0 = 0.5*I - cnst*(H0-mu0*I)

       call bml_copy(h_bml, p_bml)

       call prg_normalize_implicit_fermi(p_bml, cnst, mu)


       nsdm = 0

       do i = 1, nsteps

         call bml_multiply_x2(p_bml, p2_bml, threshold, trace)

         ! Y = 2*(P2-P) + I

         call bml_copy(p2_bml, y_bml)

         call bml_add(y_bml, p_bml, 1.0_dp, -1.0_dp, threshold)

         call bml_scale_add_identity(y_bml, 2.0_dp, 1.0_dp, threshold)

         ! Find inverse ai = (2*(P2-P)+I)^-1

         !call prg_conjgrad(y_bml, Inv_bml(i), I_bml, w_bml, d_bml, aux_bml, tol, threshold)

         !call bml_copy(Inv_bml(i),ai_bml)

         if (iter .eq. 1) then

           call prg_conjgrad(y_bml, inv_bml(i), i_bml, aux_bml, d_bml, w_bml, 0.001_dp, threshold)

         endif

         call prg_newtonschulz(y_bml, inv_bml(i), d_bml, w_bml, aux_bml, i_bml, tol, threshold, nsiter)

         nsdm = nsdm + nsiter

         call bml_multiply(inv_bml(i), p2_bml, p_bml, 1.0_dp, 0.0_dp, threshold)

         !call bml_copy(ai_bml, Inv_bml(i)) ! Save inverses for use in perturbation response calculation

       enddo


       write(*,*) 'Number of Newton-Schulz iterations:',nsdm

       totns = totns + nsdm


       trp0 = bml_trace(p_bml)

       trdpdmu = beta*(trp0 - bml_sum_squares(p_bml)) ! sum p(i,j)**2

       occerr = abs(trp0 - nocc)

       write(*,*) 'occerr =', occerr


       ! If occupation error is too large, do bisection method

       if (occerr > 1.0_dp) then

         if (nocc-trp0 < 0.0_dp .and. prev .eq. -1) then

           prev = -1

         else if (nocc-trp0 > 0.0_dp .and. prev .eq. 1) then

           prev = 1

         else if (nocc-trp0 > 0.0_dp .and. prev .eq. -1) then

           prev = 1

           alpha = alpha/2

         else

           prev = -1

           alpha = alpha/2

         endif

         mu = mu + prev*alpha

         muadj = 1


         ! Otherwise do Newton

       else if (occerr .gt. occerrlimit) then

         mustep = (nocc -trp0)/trdpdmu

         mu = mu + mustep

         muadj = 1

         preverr = occerr

       end if

     enddo


     if (iter .ge. maxiter) then

       write(*,*) 'Could not converge chemical potential in prg_impplicit_fermi_save_inverse'

     end if

     ! Adjusting the occupation sometimes causes the perturbation calculation to not converge.

     ! For now we recompute the DM one extra time if mu was adjusted.

     !if (muadj .eq. 1) then

     ! Adjust occupation

     ! call bml_copy(p_bml, d_bml)

     ! call bml_scale_add_identity(d_bml, -1.0_dp, 1.0_dp, threshold)

     ! call bml_multiply(p_bml, d_bml, w_bml, 1.0_dp, 0.0_dp, threshold)

     ! ofactor = ((nocc - trP0)/trdPdmu) * beta

     ! call bml_add(p_bml, w_bml, 1.0_dp, ofactor, threshold)

     !end if

     occiter = occiter + iter

     call bml_scale(2.0_dp,p_bml)

     deallocate(trace)


     call bml_deallocate(p2_bml)

     call bml_deallocate(w_bml)

     call bml_deallocate(d_bml)

     call bml_deallocate(y_bml)

     call bml_deallocate(aux_bml)

     call bml_deallocate(ai_bml)

     call bml_deallocate(i_bml)


   end subroutine prg_implicit_fermi_save_inverse


   subroutine prg_implicit_fermi(h_bml, p_bml, nsteps, k, nocc, &

        mu, beta, method, osteps, occErrLimit, threshold, tol)


     implicit none


     type(bml_matrix_t), intent(in) :: h_bml

     type(bml_matrix_t), intent(inout) :: p_bml

     integer, intent(in) :: osteps, nsteps, method, k

     real(dp), intent(in) :: nocc, threshold

     real(dp), intent(in) :: tol

     real(dp), intent(in) :: occerrlimit, beta

     real(dp), intent(inout) :: mu


     type(bml_matrix_t) :: w_bml, y_bml, d_bml, p2_bml, aux1_bml, aux2_bml, i_bml, ai_bml

     real(dp) :: trdpdmu, trp0, occerr

     real(dp) :: cnst, ofactor

     real(dp), allocatable :: trace(:), gbnd(:)

     character(20) :: bml_type

     integer :: n, m, i, iter, exp_order


     bml_type = bml_get_type(h_bml)

     n = bml_get_n(h_bml)

     m = bml_get_m(h_bml)


     allocate(trace(2))

     call bml_zero_matrix(bml_type, bml_element_real, dp, n, m, p2_bml)

     call bml_zero_matrix(bml_type, bml_element_real, dp, n, m, d_bml)

     call bml_zero_matrix(bml_type, bml_element_real, dp, n, m, w_bml)

     call bml_zero_matrix(bml_type, bml_element_real, dp, n, m, y_bml)

     if (k .ge. 2) then

       call bml_zero_matrix(bml_type, bml_element_real, dp, n, m, aux1_bml)

       call bml_zero_matrix(bml_type, bml_element_real, dp, n, m, aux2_bml)

     endif

     if (method .eq. 1) then

       call bml_identity_matrix(bml_type, bml_element_real, dp, n, m, i_bml)

       call bml_identity_matrix(bml_type, bml_element_real, dp, n, m, ai_bml)

     endif


     occerr = 10000.0_dp

     iter = 0

     exp_order = k**nsteps

     cnst = beta/(4*exp_order)


     do while ((osteps .eq. 0 .and. occerr .gt. occerrlimit) .or. &

          (osteps .gt. 0 .and. iter .lt. osteps))

       iter = iter + 1


       ! Normalization

       ! P0 = 0.5*II - cnst*(H0-mu0*II)

       call bml_copy(h_bml, p_bml)

       call prg_normalize_implicit_fermi(p_bml, cnst, mu)


       if (method .eq. 0) then

         write(*,*) "Doing CG"

         do i = 1, nsteps


           if (k .eq. 2) then

             call bml_multiply_x2(p_bml, p2_bml, threshold, trace)


             ! Y = 2*(P2-P) + II

             call bml_copy(p2_bml, y_bml)

             call bml_add(y_bml, p_bml, 1.0_dp, -1.0_dp, threshold)

             call bml_scale_add_identity(y_bml, 2.0_dp, 1.0_dp, threshold)

           else

             call prg_setup_linsys(p_bml, y_bml, p2_bml, d_bml, w_bml, aux1_bml, aux2_bml, k, threshold)

           end if

           call prg_conjgrad(y_bml, p_bml, p2_bml, d_bml, aux1_bml, w_bml, tol, threshold)

         enddo

       else

         write(*,*) "Doing NS"

         do i = 1, nsteps


           if (k .eq. 2) then

             call bml_multiply_x2(p_bml, p2_bml, threshold, trace)


             ! Y = 2*(P2-P) + II

             call bml_copy(p2_bml, y_bml)

             call bml_add(y_bml, p_bml, 1.0_dp, -1.0_dp, threshold)

             call bml_scale_add_identity(y_bml, 2.0_dp, 1.0_dp, threshold)

           else

             call prg_setup_linsys(p_bml, y_bml, p2_bml, d_bml, w_bml, aux1_bml, aux2_bml, k, threshold)

           end if

           if (i .eq. 1) then

             call prg_conjgrad(y_bml, ai_bml, i_bml, aux1_bml, d_bml, w_bml, 0.9_dp, threshold)

           end if

           !call prg_newtonschulz(y_bml, ai_bml, d_bml, w_bml, aux1_bml, I_bml, tol, threshold)

           call bml_multiply(ai_bml, p2_bml, p_bml, 1.0_dp, 0.0_dp, threshold)

         enddo


       end if

       trdpdmu = bml_trace(p_bml)

       trp0 = trdpdmu

       trdpdmu = trdpdmu - bml_sum_squares(p_bml) ! sum p(i,j)**2

       trdpdmu = beta * trdpdmu

       occerr = abs(trp0 - nocc)

       if (occerr .gt. occerrlimit) then

         mu = mu + (nocc - trp0)/trdpdmu

       end if

       write(*,*) "mu =", mu

     enddo


     ! Adjust occupation

     ! X = II-P0

     call bml_copy(p_bml, d_bml)

     call bml_scale_add_identity(d_bml, -1.0_dp, 1.0_dp, threshold)


     call bml_multiply(p_bml, d_bml, w_bml, 1.0_dp, 0.0_dp, threshold)

     ofactor = ((nocc - trp0)/trdpdmu) * beta

     !call bml_add(p_bml, w_bml, 1.0_dp, ofactor, threshold)

     !call bml_print_matrix("P adjusted occupation",p_bml,0,10,0,10)


     deallocate(trace)


     call bml_deallocate(p2_bml)

     call bml_deallocate(w_bml)

     call bml_deallocate(d_bml)

     call bml_deallocate(y_bml)

     if (k .ge. 2) then

       call bml_deallocate(aux1_bml)

       call bml_deallocate(aux2_bml)

     endif

     if (method .eq. 1) then

       call bml_deallocate(ai_bml)

       call bml_deallocate(i_bml)

     endif


   end subroutine prg_implicit_fermi


   subroutine prg_implicit_fermi_zero(h_bml, p_bml, nsteps, mu, method, threshold, tol)


     implicit none


     type(bml_matrix_t), intent(in) :: h_bml

     type(bml_matrix_t), intent(inout) :: p_bml

     integer, intent(in) :: nsteps, method

     real(dp), intent(in) :: mu, threshold

     real(dp), intent(inout), optional :: tol


     type(bml_matrix_t) :: w_bml, y_bml, c_bml, d_bml, p2_bml, aux1_bml, aux2_bml, i_bml, ai_bml

     real(dp) :: cnst

     real(dp), allocatable :: trace(:), gbnd(:)

     character(20) :: bml_type

     integer :: n, m, i


     bml_type = bml_get_type(h_bml)

     n = bml_get_n(h_bml)

     m = bml_get_m(h_bml)


     allocate(trace(2))

     allocate(gbnd(2))

     call bml_zero_matrix(bml_type, bml_element_real, dp, n, m, p2_bml)

     call bml_zero_matrix(bml_type, bml_element_real, dp, n, m, d_bml)

     call bml_zero_matrix(bml_type, bml_element_real, dp, n, m, w_bml)

     call bml_zero_matrix(bml_type, bml_element_real, dp, n, m, y_bml)

     call bml_zero_matrix(bml_type, bml_element_real, dp, n, m, c_bml)

     if (method .eq. 1) then

       call bml_identity_matrix(bml_type, bml_element_real, dp, n, m, i_bml)

       call bml_identity_matrix(bml_type, bml_element_real, dp, n, m, ai_bml)

     endif


     call bml_copy(h_bml, p_bml)

     call bml_gershgorin(p_bml, gbnd)

     cnst = 0.5*min(1/(mu-gbnd(1)),1/(gbnd(2)-mu))

     call prg_normalize_implicit_fermi(p_bml, cnst, mu)


     if (method .eq. 0) then

       write(*,*) "Doing CG"

       do i = 1, nsteps


         call bml_multiply_x2(p_bml, p2_bml, threshold, trace)


         ! Y = 2*(P2-P) + II

         call bml_copy(p2_bml, y_bml)

         call bml_add(y_bml, p_bml, 1.0_dp, -1.0_dp, threshold)

         call bml_scale_add_identity(y_bml, 2.0_dp, 1.0_dp, threshold)

         call prg_conjgrad(y_bml, p_bml, p2_bml, d_bml, w_bml, c_bml, tol, threshold)

       enddo

     else

       write(*,*) "Doing NS"

       do i = 1, nsteps


         ! Y = 2*(P2-P) + II

         call bml_copy(p2_bml, y_bml)

         call bml_add(y_bml, p_bml, 1.0_dp, -1.0_dp, threshold)

         call bml_scale_add_identity(y_bml, 2.0_dp, 1.0_dp, threshold)

         if (i .eq. 1) then

           call prg_conjgrad(y_bml, ai_bml, i_bml, c_bml, d_bml, w_bml, 0.9_dp, threshold)

         end if

         !call prg_newtonschulz(y_bml, ai_bml, d_bml, w_bml, c_bml, I_bml, tol, threshold)

         call bml_multiply(ai_bml, p2_bml, p_bml, 1.0_dp, 0.0_dp, threshold)

       enddo

     endif


     deallocate(gbnd)

     deallocate(trace)


     call bml_deallocate(p2_bml)

     call bml_deallocate(w_bml)

     call bml_deallocate(d_bml)

     call bml_deallocate(y_bml)

     call bml_deallocate(c_bml)

     if (method .eq. 1) then

       call bml_deallocate(ai_bml)

       call bml_deallocate(i_bml)

     endif


   end subroutine prg_implicit_fermi_zero


   subroutine prg_implicit_fermi_first_order_response(H0_bml, H1_bml, P0_bml, P1_bml, &

        Inv_bml, nsteps, mu0, beta, nocc, threshold)


     implicit none


     type(bml_matrix_t), intent(in) :: h0_bml, h1_bml, inv_bml(nsteps)

     type(bml_matrix_t), intent(inout) :: p0_bml,p1_bml

     real(dp), intent(in) :: mu0, threshold

     real(dp)  :: mu1

     real(dp), intent(in) :: beta, nocc

     integer, intent(in) :: nsteps

     type(bml_matrix_t) :: b0_bml, b_bml, c_bml, c0_bml

     character(20) :: bml_type

     real(dp) :: p1_trace, dpdmu_trace, p1b_trace, mu1b, cnst

     integer :: n, m, i, j, k


     bml_type = bml_get_type(h0_bml)

     n = bml_get_n(h0_bml)

     m = bml_get_m(h0_bml)


     call bml_zero_matrix(bml_type, bml_element_real, dp, n, m, b0_bml)

     call bml_zero_matrix(bml_type, bml_element_real, dp, n, m, b_bml)

     call bml_zero_matrix(bml_type, bml_element_real, dp, n, m, c_bml)

     call bml_zero_matrix(bml_type, bml_element_real, dp, n, m, c0_bml)


     cnst = beta/(2**(2+nsteps))


     ! P0 = 0.5*II - cnst*(H0-mu0*II)

     call bml_copy(h0_bml, p0_bml)

     call prg_normalize_implicit_fermi(p0_bml, cnst, mu0)


     ! P1 =  - cnst*H1

     call bml_copy(h1_bml, p1_bml)

     call bml_scale(-1.0_dp*cnst, p1_bml)

     do i = 1, nsteps


       ! Calculate coefficient matrices

       ! C0 = P0^2

       call bml_multiply(p0_bml, p0_bml, c0_bml, 1.0_dp, 0.0_dp, threshold)

       ! C = P0*P1+P1*P0, B = 2(P1 - C)

       call bml_multiply(p0_bml, p1_bml, c_bml, 1.0_dp, 0.0_dp, threshold)

       call bml_multiply(p1_bml, p0_bml, c_bml, 1.0_dp, 1.0_dp, threshold)

       call bml_copy(p1_bml, b_bml)

       call bml_add(b_bml, c_bml, 2.0_dp, -2.0_dp, threshold)

       ! Get next P0

       call bml_multiply(inv_bml(i), c0_bml, p0_bml, 1.0_dp, 0.0_dp, threshold)

       ! Get next P1

       ! C = P0*P1+P1*P0 + 2(P1 -P0*P1-P1*P0)*P0(i+1)

       call bml_multiply(b_bml, p0_bml, c_bml, 1.0_dp, 1.0_dp, threshold)

       call bml_multiply(inv_bml(i), c_bml, p1_bml, 1.0_dp, 0.0_dp, threshold)

     enddo


     !    do i = 1, nsteps-1

     ! D = A^-1*P0

     !      call bml_multiply(Inv_bml(i), B0_bml, C0_bml, 1.0_dp, 0.0_dp, threshold)

     !      call bml_multiply(C0_bml, B0_bml, B_bml, 1.0_dp, 0.0_dp, threshold)

     ! B0 = A^-1*P0^2

     !      call bml_copy(B_bml,B0_bml)

     ! B = I + D -P0*D

     !      call bml_add(B_bml, C0_bml, -1.0_dp, 1.0_dp, threshold)

     !      call bml_scale_add_identity(B_bml, 1.0_dp, 1.0_dp, threshold)

     ! P1 = 2D*P1(I+D-P0*D)

     !      call bml_multiply(C0_bml, P1_bml, C_bml, 1.0_dp, 0.0_dp, threshold)

     !      call bml_multiply(C_bml, B_bml, P1_bml, 2.0_dp, 0.0_dp, threshold)

     !    enddo

     !      call bml_multiply(B0_bml, P1_bml, C_bml, 2.0_dp, 0.0_dp, threshold)

     !      call bml_copy(P1_bml, B_bml)

     !      call bml_add(B_bml, C_bml, 2.0_dp, -2.0_dp, threshold)

     ! Get next P1

     !      call bml_multiply(B_bml, P0_bml, C_bml, 1.0_dp, 1.0_dp, threshold)

     !      call bml_multiply(Inv_bml(i), C_bml, P1_bml, 1.0_dp, 0.0_dp, threshold)


     ! dPdmu = beta*P0(I-P0)

     call bml_copy(p0_bml, b_bml)

     call bml_scale_add_identity(b_bml, -1.0_dp, 1.0_dp, threshold)

     call bml_multiply(p0_bml, b_bml, c_bml, beta, 0.0_dp, threshold)

     dpdmu_trace = bml_trace(c_bml)

     p1_trace = bml_trace(p1_bml)

     mu1 =  - p1_trace/dpdmu_trace

     if (abs(dpdmu_trace) > 1e-8) then

       call bml_add(p1_bml,c_bml,1.0_dp,mu1,threshold)

     endif


     call bml_deallocate(b_bml)

     call bml_deallocate(b0_bml)

     call bml_deallocate(c_bml)

     call bml_deallocate(c0_bml)


   end subroutine prg_implicit_fermi_first_order_response


   subroutine prg_implicit_fermi_response(H0_bml, H1_bml, H2_bml, H3_bml, P0_bml, P1_bml, P2_bml, P3_bml, &

        nsteps, mu0, mu, beta, nocc, occ_tol, lin_tol, order, threshold)


     implicit none


     type(bml_matrix_t), intent(in) :: h0_bml, h1_bml, h2_bml, h3_bml

     type(bml_matrix_t), intent(inout) :: p0_bml, p1_bml, p2_bml, p3_bml

     real(dp), intent(inout) :: mu0

     real(dp), allocatable, intent(inout) :: mu(:)

     real(dp), intent(in) :: beta, occ_tol, lin_tol, nocc

     integer, intent(in) :: nsteps

     type(bml_matrix_t) :: i_bml, tmp1_bml, tmp2_bml, tmp3_bml, c0_bml, t_bml, ti_bml

     type(bml_matrix_t), allocatable :: b_bml(:), p_bml(:), c_bml(:), h_bml(:)

     real(dp), allocatable :: p_trace(:), trace(:)

     character(20) :: bml_type

     real(dp) :: occ_err, p0_trace, pmu_trace, cnst, threshold, tol, lambda, h

     integer :: n, m, order, i, j, k


     k = 0

     occ_err = 10000.0

     allocate(p_trace(order))

     allocate(b_bml(order))

     allocate(c_bml(order))

     allocate(p_bml(order))

     allocate(h_bml(order))


     bml_type = bml_get_type(h0_bml)

     n = bml_get_n(h0_bml)

     m = bml_get_m(h0_bml)


     do i = 1, order

       mu(i) = 0.0_dp

       call bml_zero_matrix(bml_type, bml_element_real, dp, n, m, b_bml(i))

       call bml_zero_matrix(bml_type, bml_element_real, dp, n, m, c_bml(i))

     end do


     call bml_zero_matrix(bml_type, bml_element_real, dp, n, m, tmp1_bml)

     call bml_zero_matrix(bml_type, bml_element_real, dp, n, m, tmp3_bml)

     call bml_zero_matrix(bml_type, bml_element_real, dp, n, m, tmp2_bml)

     call bml_zero_matrix(bml_type, bml_element_real, dp, n, m, c0_bml)

     call bml_zero_matrix(bml_type, bml_element_real, dp, n, m, t_bml)

     call bml_zero_matrix(bml_type, bml_element_real, dp, n, m, ti_bml)

     call bml_identity_matrix(bml_type, bml_element_real, dp, n, m, i_bml)


     h_bml(1) = h1_bml

     p_bml(1) = p1_bml

     if (order .gt. 1) then

       h_bml(2) = h2_bml

       p_bml(2) = p2_bml

     end if

     if (order .gt. 2) then

       h_bml(3) = h3_bml

       p_bml(3) = p3_bml

     end if


     cnst = beta/(2**(2+nsteps))


     do while (occ_err .gt. occ_tol)

       k = k + 1

       ! P0 = 0.5*II - cnst*(H0-mu0*II)

       call bml_copy(h0_bml, p0_bml)

       call prg_normalize_implicit_fermi(p0_bml, cnst, mu0)


       ! P(j) =  - cnst*(H(j)-mu(j)*II)

       do j = 1, order

         call bml_copy(h_bml(j), p_bml(j))

         call prg_normalize_implicit_fermi(p_bml(j), cnst, mu(j))

         call bml_scale_add_identity(p_bml(j), 1.0_dp, -0.5_dp, threshold)

       end do


       do i = 1, nsteps


         ! Calculate coefficient matrices


         ! C0 = P0^2

         call bml_multiply(p0_bml, p0_bml, c0_bml, 1.0_dp, 0.0_dp, threshold)

         ! C1 = P0*P1+P1*P0, B1 = 2(P1 - C1)

         call bml_multiply(p0_bml, p_bml(1), c_bml(1), 1.0_dp, 0.0_dp, threshold)

         call bml_multiply(p_bml(1), p0_bml, c_bml(1), 1.0_dp, 1.0_dp, threshold)

         call bml_copy(p_bml(1), b_bml(1))

         call bml_add(b_bml(1), c_bml(1), 2.0_dp, -2.0_dp, threshold)

         if (order > 1) then

           ! C2 = P1^2 + P0*P2 + P2*P0, B2 = 2(P2 - C2)

           call bml_multiply(p_bml(1), p_bml(1), c_bml(2), 1.0_dp, 0.0_dp, threshold)

           call bml_multiply(p0_bml, p_bml(2), c_bml(2), 1.0_dp, 1.0_dp, threshold)

           call bml_multiply(p_bml(2), p0_bml, c_bml(2), 1.0_dp, 1.0_dp, threshold)

           call bml_copy(p_bml(2), b_bml(2))

           call bml_add(b_bml(2), c_bml(2), 2.0_dp, -2.0_dp, threshold)

         end if

         if (order > 2) then

           ! C3 = P1*P2 + P2+P1 + P0*P3 + P3*P0, B3 = 2(P3 - C3)

           call bml_multiply(p_bml(1), p_bml(2), c_bml(3), 1.0_dp, 0.0_dp, threshold)

           call bml_multiply(p_bml(2), p_bml(1), c_bml(3), 1.0_dp, 1.0_dp, threshold)

           call bml_multiply(p0_bml, p_bml(3), c_bml(3), 1.0_dp, 1.0_dp, threshold)

           call bml_multiply(p_bml(3), p0_bml, c_bml(3), 1.0_dp, 1.0_dp, threshold)

           call bml_copy(p_bml(3), b_bml(3))

           call bml_add(b_bml(3), c_bml(3), 2.0_dp, -2.0_dp, threshold)

         endif

         ! T = 2P0^2 - 2P0 + I

         call bml_copy(c0_bml, t_bml)

         call bml_add(t_bml, p0_bml, 1.0_dp, -1.0_dp, threshold)

         call bml_scale_add_identity(t_bml, 2.0_dp, 1.0_dp, threshold)

         ! Find T-inverse

         if (i .eq. 1) then

           call prg_conjgrad(t_bml, ti_bml, i_bml, tmp1_bml, tmp2_bml, tmp3_bml,0.01_dp, threshold)

           call bml_identity_matrix(bml_type, bml_element_real, dp, n, m, i_bml)

         end if

         !call prg_newtonschulz(T_bml, Ti_bml, tmp1_bml, tmp2_bml, tmp3_bml, I_bml, lin_tol, threshold)

         ! Get next P0

         call bml_multiply(ti_bml, c0_bml, p0_bml, 1.0_dp, 0.0_dp, threshold)

         ! Get next P1

         call bml_multiply(b_bml(1), p0_bml, c_bml(1), 1.0_dp, 1.0_dp, threshold)

         call bml_multiply(ti_bml, c_bml(1), p_bml(1), 1.0_dp, 0.0_dp, threshold)

         if (order > 1) then

           ! Get next P2

           call bml_multiply(b_bml(2), p0_bml, c_bml(2), 1.0_dp, 1.0_dp, threshold)

           call bml_multiply(b_bml(1), p_bml(1), c_bml(2), 1.0_dp, 1.0_dp, threshold)

           call bml_multiply(ti_bml, c_bml(2), p_bml(2), 1.0_dp, 0.0_dp, threshold)

         end if

         if (order > 2) then

           ! Get next P3

           call bml_multiply(b_bml(3), p0_bml, c_bml(3), 1.0_dp, 1.0_dp, threshold)

           call bml_multiply(b_bml(2), p_bml(1), c_bml(3), 1.0_dp, 1.0_dp, threshold)

           call bml_multiply(b_bml(1), p_bml(2), c_bml(3), 1.0_dp, 1.0_dp, threshold)

           call bml_multiply(ti_bml, c_bml(3), p_bml(3), 1.0_dp, 0.0_dp, threshold)

         endif

       enddo


       ! Pmu = beta*P0(I-P0)

       call bml_copy(p0_bml, tmp1_bml)

       call bml_scale_add_identity(tmp1_bml, -1.0_dp, 1.0_dp, threshold)

       call bml_multiply(p0_bml, tmp1_bml, tmp2_bml, beta, 0.0_dp, threshold)


       pmu_trace = bml_trace(tmp2_bml)

       p0_trace = bml_trace(p0_bml)

       occ_err = abs(p0_trace-nocc)

       mu0 = mu0 + (nocc - p0_trace)/pmu_trace

       do i = 1, order

         p_trace(i) = bml_trace(p_bml(i))

         mu(i) = mu(i) - p_trace(i)/pmu_trace

         occ_err = occ_err + abs(p_trace(i))

       enddo


       write(*,*) "occ_err =", occ_err

       if (k .gt. 50) then

         write(*,*) "Chemical potential is not converging"

         exit

       endif


     enddo


     do i = 1, order

       call bml_deallocate(b_bml(i))

       call bml_deallocate(c_bml(i))

     end do


     call bml_deallocate(i_bml)

     call bml_deallocate(tmp1_bml)

     call bml_deallocate(tmp2_bml)

     call bml_deallocate(t_bml)

     call bml_deallocate(ti_bml)

     deallocate(p_trace)

     deallocate(b_bml)

     deallocate(c_bml)


   end subroutine prg_implicit_fermi_response


   !using finite differences.

   !! \param H0_bml Input Hamiltonian matrix.

   !! \param H_list Input List of one to third order Hamiltonian perturbations

   !! \param mu0 Shifted chemical potential.

   !! \param mu List of first to third order perturbations in the chemical

   !!  potential.

   !! \param beta Input inverse temperature.

   !! \param order Calculate response up to this order.

   !! \param lambda Perturbation parameter

   !! \param h Finite difference step size

   !! \param threshold Threshold for matrix algebra.

   subroutine prg_finite_diff(H0_bml, H_list, mu0, mu_list, beta, order, lambda, h, threshold)


     implicit none


     type(bml_matrix_t), intent(in) :: h0_bml

     real(dp), intent(in) :: mu0

     type(bml_matrix_t), allocatable, intent(in) :: h_list(:)

     real(dp), allocatable, intent(in) :: mu_list(:)

     real(dp), intent(in) :: lambda, beta, threshold, h

     integer, intent(in) :: order

     character(20) :: bml_type

     real(dp) :: mu_1minus, mu_1plus, mu_2minus, mu_2plus, mu_central

     real(dp) :: lambda_f, lambda_b, lambda_2f, lambda_2b

     integer :: n, m, i

     type(bml_matrix_t) :: d0_bml, d1minus_bml, d1plus_bml, d2plus_bml, d2minus_bml, tmp1_bml


     bml_type = bml_get_type(h0_bml)

     n = bml_get_n(h0_bml)

     m = bml_get_m(h0_bml)


     call bml_zero_matrix(bml_type, bml_element_real, dp, n, m, tmp1_bml)

     call bml_zero_matrix(bml_type, bml_element_real, dp, n, m, d0_bml)

     call bml_zero_matrix(bml_type, bml_element_real, dp, n, m, d1minus_bml)

     call bml_zero_matrix(bml_type, bml_element_real, dp, n, m, d1plus_bml)

     call bml_zero_matrix(bml_type, bml_element_real, dp, n, m, d2plus_bml)

     call bml_zero_matrix(bml_type, bml_element_real, dp, n, m, d2minus_bml)


     lambda_f = lambda+h

     lambda_b = lambda-h

     lambda_2f = lambda+2*h

     lambda_2b = lambda-2*h


     ! Calculate first order difference

     ! P1 = (F(H0 + lambda_f*H1, mu0 + lambda_f*mu(1)) - F(H0 + lambda*H1, mu0 +

     ! lambda*H1))/h

     call bml_copy(h0_bml, tmp1_bml)

     mu_central = mu0

     do i = 1, order

       call bml_add(tmp1_bml, h_list(i), 1.0_dp, lambda**i, threshold)

       mu_central = mu_central + lambda**i*mu_list(i)

     end do

     call prg_get_density_matrix(tmp1_bml, d0_bml, beta, mu_central, threshold)


     call bml_copy(h0_bml, tmp1_bml)

     mu_1plus = mu0

     do i = 1, order

       call bml_add(tmp1_bml, h_list(i), 1.0_dp, lambda_f**i, threshold)

       mu_1plus = mu_1plus + lambda_f**i*mu_list(i)

     end do

     call prg_get_density_matrix(tmp1_bml, d1plus_bml, beta, mu_1plus, threshold)

     call bml_copy(d1plus_bml, tmp1_bml)

     call bml_add(tmp1_bml, d0_bml, 1.0_dp/h, -1.0_dp/h, threshold)


     call bml_scale(1000.0_dp, tmp1_bml)

     call bml_print_matrix("Finite diff - Order 1 * 1000", tmp1_bml, 0,10,0,10)


     if (order .gt. 1) then

       ! Calculate second order difference

       call bml_copy(h0_bml, tmp1_bml)

       mu_1minus = mu0

       do i = 1, order

         call bml_add(tmp1_bml, h_list(i), 1.0_dp, lambda_b**i, threshold)

         mu_1minus = mu_1minus + lambda_b**i*mu_list(i)

       end do

       call prg_get_density_matrix(tmp1_bml, d1minus_bml, beta, mu_1minus, threshold)

       call bml_copy(d0_bml, tmp1_bml)

       call bml_add(tmp1_bml, d1minus_bml, -1.0_dp/(h*h), 0.5_dp/(h*h), threshold)

       call bml_add(tmp1_bml, d1plus_bml, 1.0_dp, 0.5_dp/(h*h))


       call bml_scale(1000.0_dp, tmp1_bml)

       call bml_print_matrix("Finite diff - Order 2 * 1000", tmp1_bml, 0,10,0,10)

     end if


     if (order .gt. 2) then

       ! Calculate third order difference

       call bml_copy(h0_bml, tmp1_bml)

       mu_2minus = mu0

       do i = 1, order

         call bml_add(tmp1_bml, h_list(i), 1.0_dp, lambda_2b**i, threshold)

         mu_2minus = mu_2minus + lambda_2b**i*mu_list(i)

       end do

       call prg_get_density_matrix(tmp1_bml, d2minus_bml, beta, mu_2minus, threshold)

       call bml_copy(h0_bml, tmp1_bml)

       mu_2plus = mu0

       do i = 1, order

         call bml_add(tmp1_bml, h_list(i), 1.0_dp, lambda_2f**i, threshold)

         mu_2plus = mu_2plus + lambda_2f**i*mu_list(i)

       end do

       call prg_get_density_matrix(tmp1_bml, d2plus_bml, beta, mu_2plus, threshold)

       call bml_copy(d2plus_bml, tmp1_bml)

       call bml_add(tmp1_bml, d2minus_bml, 1.0_dp/(12.0_dp*h**3), -1.0_dp/(12.0_dp*h**3), threshold)

       call bml_add(tmp1_bml, d1plus_bml, 1.0_dp, -1.0/(6.0_dp*h**3), threshold)

       call bml_add(tmp1_bml, d1minus_bml, 1.0_dp, 1.0/(6.0_dp*h**3), threshold)


       call bml_scale(1000.0_dp, tmp1_bml)

       call bml_print_matrix("Finite diff - Order 3 * 1000", tmp1_bml, 0,10,0,10)

     end if


     call bml_deallocate(tmp1_bml)

     call bml_deallocate(d0_bml)

     call bml_deallocate(d1minus_bml)

     call bml_deallocate(d1plus_bml)

     call bml_deallocate(d2plus_bml)

     call bml_deallocate(d2minus_bml)


   end subroutine prg_finite_diff


   subroutine prg_setup_linsys(p_bml, A_bml, b_bml, p2_bml, y_bml, aux_bml, &

        aux1_bml, k, threshold)


     implicit none


     type(bml_matrix_t), intent(inout) :: A_bml, b_bml, p2_bml, y_bml, aux_bml, aux1_bml

     type(bml_matrix_t), intent(in) :: p_bml

     real(dp), intent(in) :: threshold

     integer, intent(in) :: k

     character(20) :: bml_type

     integer :: M, N, i


     if (k .eq. 2) then

       call bml_multiply(p_bml, p_bml, b_bml, 1.0_dp, 0.0_dp, threshold)

       call bml_copy(b_bml, a_bml)

       call bml_add(a_bml, p_bml, 2.0_dp, -2.0_dp, threshold)

       call bml_scale_add_identity(a_bml, 1.0_dp, 1.0_dp, threshold)


     else

       call bml_multiply(p_bml, p_bml, p2_bml, 1.0_dp, 0.0_dp, threshold)

       call bml_copy(p2_bml, y_bml)

       call bml_add(y_bml, p_bml, 1.0_dp, -2.0_dp, threshold)

       call bml_scale_add_identity(y_bml, 1.0_dp, 1.0_dp, threshold)


       call bml_copy(p2_bml, b_bml)

       call bml_copy(y_bml, a_bml)

       do i = 1,(k-2)/2

         call bml_multiply(b_bml, p2_bml, aux_bml, 1.0_dp, 0.0_dp, threshold)

         call bml_multiply(a_bml, y_bml, aux1_bml, 1.0_dp, 0.0_dp, threshold)

         call bml_copy(aux_bml, b_bml)

         call bml_copy(aux1_bml, a_bml)

       enddo

       call bml_add(a_bml, b_bml, 1.0_dp, 1.0_dp, threshold)

     end if


   end subroutine prg_setup_linsys


   subroutine prg_newtonschulz(a_bml, ai_bml, r_bml, tmp_bml, d_bml, I_bml, tol, threshold, num_iter)


     implicit none


     type(bml_matrix_t), intent(inout) :: ai_bml, r_bml, tmp_bml, d_bml

     type(bml_matrix_t), intent(in) :: a_bml, I_bml

     real(dp), intent(in) :: threshold, tol

     integer, intent(out) :: num_iter

     real(dp) :: err,prev_err,scaled_tol

     integer :: i,N,N2


     n = bml_get_n(a_bml)

     err = 100000.0

     i = 0

     do while(err > tol)

       !write(*,*) 'iter = ', i

       !write(*,*) 'ns error =', err

       call bml_copy(ai_bml, tmp_bml)

       call bml_multiply(a_bml, ai_bml, r_bml, 1.0_dp, 0.0_dp, threshold)

       call bml_scale_add_identity(r_bml, -1.0_dp, 1.0_dp, threshold)

       prev_err = err

       err = bml_fnorm(r_bml)

       !write(*,*) "err = ", err

       !write(*,*) "prev_err = ", prev_err

       if (prev_err+0.01 < err) then

         write(*,*) 'NS did not converge, calling conjugate gradient'

         call prg_conjgrad(a_bml, ai_bml, i_bml, r_bml, tmp_bml, d_bml, 0.00001_dp, threshold)

       else

         call bml_multiply(tmp_bml, r_bml, ai_bml, 1.0_dp, 1.0_dp, threshold)

       endif

       i = i + 1

     enddo

     num_iter = i

     !write(*,*) "Number of NS iterations:", i

   end subroutine prg_newtonschulz


   ! Preconditioned CG, preconditioner inverse diagonal of A

   subroutine prg_pcg(A_bml, p_bml, p2_bml, d_bml, wtmp_bml, cg_tol, threshold)


     implicit none


     type(bml_matrix_t), intent(in) :: A_bml

     type(bml_matrix_t), intent(inout) :: p_bml, p2_bml, d_bml, wtmp_bml

     real(dp), intent(in) :: cg_tol, threshold


     type(bml_matrix_t) :: M_bml, z_bml

     real(dp), allocatable :: diagonal(:)

     real(dp) :: alpha, beta

     character(20) :: bml_type

     integer :: k,N,M

     real(dp) :: r_norm_old, r_norm_new


     bml_type = bml_get_type(p_bml)

     n = bml_get_n(p_bml)

     m = bml_get_m(p_bml)


     call bml_zero_matrix(bml_type, bml_element_real, dp, n, m, z_bml)

     call bml_zero_matrix(bml_type, bml_element_real, dp, n, m, m_bml)


     allocate(diagonal(n))

     call bml_get_diagonal(a_bml, diagonal)

     do k = 1,n

       diagonal(k) = 1.0_dp/diagonal(k)

     enddo

     call bml_set_diagonal(m_bml, diagonal)


     call bml_multiply(a_bml, p_bml, p2_bml, -1.0_dp, 1.0_dp, threshold)

     call bml_multiply(m_bml, p2_bml, z_bml, 1.0_dp, 0.0_dp, threshold)

     r_norm_new = bml_trace_mult(z_bml, p2_bml)

     call bml_copy(z_bml, d_bml)

     k = 0


     do while (bml_sum_squares(p2_bml) .gt. cg_tol)


       write(*,*) "r_norm", bml_sum_squares(p2_bml)

       k = k + 1

       if (k .ne. 1) then

         beta = r_norm_new/r_norm_old

         call bml_add(d_bml, z_bml, beta, 1.0_dp, threshold)

       endif


       call bml_multiply(a_bml, d_bml, wtmp_bml, 1.0_dp, 0.0_dp, threshold)

       alpha = bml_trace_mult(p2_bml,z_bml)/bml_trace_mult(d_bml, wtmp_bml)

       call bml_add(p_bml, d_bml, 1.0_dp, alpha, threshold)

       call bml_add(p2_bml, wtmp_bml, 1.0_dp, -alpha, threshold)

       call bml_multiply(m_bml, p2_bml, z_bml, 1.0_dp, 0.0_dp, threshold)

       r_norm_old = r_norm_new

       r_norm_new = bml_trace_mult(p2_bml,z_bml)

       if (k .gt. 100) then

         write(*,*) "PCG is not converging"

         stop

       endif

     enddo

     write(*,*) "Number of iterations:", k


     call bml_deallocate(z_bml)

     call bml_deallocate(m_bml)

     deallocate(diagonal)


   end subroutine prg_pcg


   subroutine prg_conjgrad(A_bml, p_bml, p2_bml, tmp_bml, d_bml, w_bml, cg_tol, threshold)


     implicit none


     type(bml_matrix_t), intent(in) :: A_bml, p2_bml

     type(bml_matrix_t), intent(inout) :: p_bml, tmp_bml, d_bml, w_bml

     real(dp), intent(in) :: cg_tol, threshold


     real(dp) :: alpha, beta

     integer :: k

     real(dp) :: r_norm_old, r_norm_new


     call bml_copy(p2_bml,tmp_bml)

     call bml_multiply(a_bml, p_bml, tmp_bml, -1.0_dp, 1.0_dp, threshold)

     r_norm_new = bml_sum_squares(tmp_bml)

     k = 0


     do while (r_norm_new .gt. cg_tol)


       !  write(*,*) r_norm_new

       k = k + 1

       if (k .eq. 1) then

         call bml_copy(tmp_bml, d_bml)

       else

         beta = r_norm_new/r_norm_old

         call bml_add(d_bml, tmp_bml, beta, 1.0_dp, threshold)

       endif


       call bml_multiply(a_bml, d_bml, w_bml, 1.0_dp, 0.0_dp, threshold)

       alpha = r_norm_new/bml_trace_mult(d_bml, w_bml)


       call bml_add(p_bml, d_bml, 1.0_dp, alpha, threshold)

       call bml_add(tmp_bml, w_bml, 1.0_dp, -alpha, threshold)

       r_norm_old = r_norm_new

       r_norm_new = bml_sum_squares(tmp_bml)

       if (k .gt. 50) then

         write(*,*) "Conjugate gradient is not converging"

         stop

       endif

     enddo

     !write(*,*) "Number of CG-iterations:", k


   end subroutine prg_conjgrad


   subroutine prg_get_density_matrix(ham_bml, p_bml, beta, mu, threshold)


     implicit none


     type(bml_matrix_t), intent(in) :: ham_bml

     type(bml_matrix_t), intent(inout) :: p_bml

     real(dp), intent(in) ::  beta, threshold

     real(dp), intent(in) :: mu

     character(20) :: bml_type

     integer :: N, M, i

     real(dp), allocatable ::  eigenvalues(:)

     type(bml_matrix_t) :: eigenvectors_bml,occupation_bml,aux_bml,aux1_bml,i_bml


     bml_type = bml_get_type(p_bml)

     n = bml_get_n(p_bml)

     m = bml_get_m(p_bml)


     allocate(eigenvalues(n))


     call bml_zero_matrix(bml_type,bml_element_real,dp,n,m,eigenvectors_bml)

     call bml_zero_matrix(bml_type,bml_element_real,dp,n,m,occupation_bml)

     call bml_zero_matrix(bml_type,bml_element_real,dp,n,m,aux_bml)

     call bml_zero_matrix(bml_type,bml_element_real,dp,n,m,aux1_bml)

     call bml_identity_matrix(bml_type,bml_element_real,dp,n,m,i_bml)


     call bml_diagonalize(ham_bml,eigenvalues,eigenvectors_bml)


     do i=1,n

       eigenvalues(i) = fermi(eigenvalues(i),mu,beta)

     enddo


     call bml_set_diagonal(occupation_bml, eigenvalues)

     call bml_multiply(eigenvectors_bml, occupation_bml, aux_bml, 1.0_dp, 0.0_dp,threshold)

     call bml_transpose_new(eigenvectors_bml, aux1_bml)

     call bml_multiply(aux_bml, aux1_bml, p_bml, 1.0_dp, 0.0_dp, threshold)


     call bml_deallocate(eigenvectors_bml)

     call bml_deallocate(occupation_bml)

     call bml_deallocate(aux_bml)

     call bml_deallocate(aux1_bml)

     call bml_deallocate(i_bml)


     deallocate(eigenvalues)


   end subroutine prg_get_density_matrix


   !potential

   !! \param ham_bml Input hamiltonian

   !! \param p_bml Output density matrix

   !! \param beta Inverse temperature

   !! \param mu Chemical potential

   !! \param nocc Number of occupied states

   !! \param osteps Outer loop steps to converge chemical potential

   !! \param occErrLimit Occupation error limit.

   !! \param threshold Threshold for matrix algebra

   subroutine prg_test_density_matrix(ham_bml, p_bml, beta, mu, nocc, osteps, occErrLimit, threshold)


     implicit none


     type(bml_matrix_t), intent(in) :: ham_bml

     type(bml_matrix_t), intent(inout) :: p_bml

     real(dp), intent(in) ::  beta, nocc, occerrlimit, threshold

     real(dp), intent(inout) :: mu

     integer, intent(in) :: osteps

     character(20) :: bml_type

     integer :: n, m, i, iter

     real(dp) :: trdpdmu, trp0, ofactor, occerr

     real(dp), allocatable ::  eigenvalues(:)

     type(bml_matrix_t) :: eigenvectors_bml,occupation_bml,aux_bml,aux1_bml,i_bml


     bml_type = bml_get_type(p_bml)

     n = bml_get_n(p_bml)

     m = bml_get_m(p_bml)


     allocate(eigenvalues(n))


     call bml_zero_matrix(bml_type,bml_element_real,dp,n,m,eigenvectors_bml)

     call bml_zero_matrix(bml_type,bml_element_real,dp,n,m,occupation_bml)

     call bml_zero_matrix(bml_type,bml_element_real,dp,n,m,aux_bml)

     call bml_zero_matrix(bml_type,bml_element_real,dp,n,m,aux1_bml)

     call bml_identity_matrix(bml_type,bml_element_real,dp,n,m,i_bml)


     occerr = 1000.0_dp

     iter = 0


     do while ((osteps .eq. 0 .and. occerr .gt. occerrlimit) .or. &

          (osteps .gt. 0 .and. iter .lt. osteps))

       iter = iter + 1


       call bml_diagonalize(ham_bml,eigenvalues,eigenvectors_bml)


       do i=1,n

         eigenvalues(i) = fermi(eigenvalues(i),mu,beta)

       enddo


       call bml_set_diagonal(occupation_bml, eigenvalues)

       call bml_multiply(eigenvectors_bml, occupation_bml, aux_bml, 1.0_dp, 0.0_dp,threshold)

       call bml_transpose_new(eigenvectors_bml, aux1_bml)

       call bml_multiply(aux_bml, aux1_bml, p_bml, 1.0_dp, 0.0_dp, threshold)

       call bml_print_matrix("test density",p_bml,0,10,0,10)

       trdpdmu = bml_trace(p_bml)

       trp0 = trdpdmu

       trdpdmu = trdpdmu - bml_sum_squares(p_bml) ! sum p(i,j)**2

       trdpdmu = beta * trdpdmu

       occerr = abs(trp0 - nocc)

       if (occerr .gt. occerrlimit) then

         mu = mu + (nocc - trp0)/trdpdmu

       end if

       !write(*,*) "mu = ", mu

     enddo


     ! Adjust occupation

     ! X = II-P0

     call bml_copy(p_bml, aux_bml)

     call bml_scale_add_identity(aux_bml, -1.0_dp, 1.0_dp, threshold)


     call bml_multiply(p_bml, aux_bml, aux1_bml, 1.0_dp, 0.0_dp, threshold)

     ofactor = ((nocc - trp0)/trdpdmu) * beta

     !call bml_add(p_bml, aux1_bml, 1.0_dp, ofactor, threshold)

     !call bml_print_matrix("Diagonalization - Adjusted occupation",p_bml,0,10,0,10)


     call bml_deallocate(eigenvectors_bml)

     call bml_deallocate(occupation_bml)

     call bml_deallocate(aux_bml)

     call bml_deallocate(aux1_bml)

     call bml_deallocate(i_bml)


     deallocate(eigenvalues)


   end subroutine prg_test_density_matrix


   real(dp) function fermi(e,mu,beta)


     real(dp), intent(in) :: e, mu, beta


     fermi = 1.0_dp/(1.0_dp+exp(beta*(e-mu)))


   end function fermi


 end module prg_implicit_fermi_mod

prg_densitymatrix_mod
Module to obtain the density matrix by diagonalizing an orthogonalized Hamiltonian.
Definition: prg_densitymatrix_mod.F90:5

prg_densitymatrix_mod::dp
integer, parameter dp
Definition: prg_densitymatrix_mod.F90:19

prg_densitymatrix_mod::fermi
real(dp) function fermi(e, ef, kbt)
Gives the Fermi distribution value for energy e.
Definition: prg_densitymatrix_mod.F90:1005

prg_ewald_mod
Definition: prg_ewald_mod.F90:2

prg_implicit_fermi_mod
Definition: prg_implicit_fermi_mod.F90:6

prg_implicit_fermi_mod::fermi
real(dp) function fermi(e, mu, beta)
Gives the Fermi distribution value for energy e.
Definition: prg_implicit_fermi_mod.F90:1193

prg_implicit_fermi_mod::prg_implicit_fermi
subroutine, public prg_implicit_fermi(h_bml, p_bml, nsteps, k, nocc, mu, beta, method, osteps, occErrLimit, threshold, tol)
Recursive Implicit Fermi Dirac for finite temperature.
Definition: prg_implicit_fermi_mod.F90:211

prg_implicit_fermi_mod::prg_implicit_fermi_save_inverse
subroutine, public prg_implicit_fermi_save_inverse(Inv_bml, h_bml, p_bml, nsteps, nocc, mu, beta, occErrLimit, threshold, tol, SCF_IT, occiter, totns)
Recursive Implicit Fermi Dirac for finite temperature.
Definition: prg_implicit_fermi_mod.F90:48

prg_implicit_fermi_mod::prg_pcg
subroutine prg_pcg(A_bml, p_bml, p2_bml, d_bml, wtmp_bml, cg_tol, threshold)
Solve the system AX = B with conjugate gradient.
Definition: prg_implicit_fermi_mod.F90:935

prg_implicit_fermi_mod::prg_implicit_fermi_first_order_response
subroutine, public prg_implicit_fermi_first_order_response(H0_bml, H1_bml, P0_bml, P1_bml, Inv_bml, nsteps, mu0, beta, nocc, threshold)
Calculate first order density matrix response to perturbations using Implicit Fermi Dirac.
Definition: prg_implicit_fermi_mod.F90:441

prg_implicit_fermi_mod::prg_get_density_matrix
subroutine prg_get_density_matrix(ham_bml, p_bml, beta, mu, threshold)
Calculate the density matrix with diagonalization.
Definition: prg_implicit_fermi_mod.F90:1057

prg_implicit_fermi_mod::prg_implicit_fermi_response
subroutine, public prg_implicit_fermi_response(H0_bml, H1_bml, H2_bml, H3_bml, P0_bml, P1_bml, P2_bml, P3_bml, nsteps, mu0, mu, beta, nocc, occ_tol, lin_tol, order, threshold)
Calculate density matrix response to perturbations using Implicit Fermi Dirac.
Definition: prg_implicit_fermi_mod.F90:549

prg_implicit_fermi_mod::prg_setup_linsys
subroutine prg_setup_linsys(p_bml, A_bml, b_bml, p2_bml, y_bml, aux_bml, aux1_bml, k, threshold)
Set up linear system for Implicit Fermi Dirac.
Definition: prg_implicit_fermi_mod.F90:845

prg_implicit_fermi_mod::prg_finite_diff
subroutine, public prg_finite_diff(H0_bml, H_list, mu0, mu_list, beta, order, lambda, h, threshold)
Calculate density matrix response from perturbations in the Hamiltonian.
Definition: prg_implicit_fermi_mod.F90:727

prg_implicit_fermi_mod::prg_test_density_matrix
subroutine, public prg_test_density_matrix(ham_bml, p_bml, beta, mu, nocc, osteps, occErrLimit, threshold)
Calculate the density matrix with diagonalization and converge chemical.
Definition: prg_implicit_fermi_mod.F90:1113

prg_implicit_fermi_mod::prg_conjgrad
subroutine prg_conjgrad(A_bml, p_bml, p2_bml, tmp_bml, d_bml, w_bml, cg_tol, threshold)
Solve the system AX = B with conjugate gradient.
Definition: prg_implicit_fermi_mod.F90:1007

prg_implicit_fermi_mod::prg_newtonschulz
subroutine prg_newtonschulz(a_bml, ai_bml, r_bml, tmp_bml, d_bml, I_bml, tol, threshold, num_iter)
Find the inverse of the matrix A with Newton-Schulz iteration.
Definition: prg_implicit_fermi_mod.F90:889

prg_implicit_fermi_mod::prg_implicit_fermi_zero
subroutine, public prg_implicit_fermi_zero(h_bml, p_bml, nsteps, mu, method, threshold, tol)
Recursive Implicit Fermi Dirac for zero temperature.
Definition: prg_implicit_fermi_mod.F90:348

prg_normalize_mod
The prg_normalize module.
Definition: prg_normalize_mod.F90:5

prg_normalize_mod::prg_normalize_implicit_fermi
subroutine, public prg_normalize_implicit_fermi(h_bml, cnst, mu)
Normalize a Hamiltonian matrix prior to running the implicit fermi dirac algorithm.
Definition: prg_normalize_mod.F90:84

prg_parallel_mod
The parallel module.
Definition: prg_parallel_mod.F90:6

prg_timer_mod
The timer module.
Definition: prg_timer_mod.F90:23