PROGRESS  master
prg_pulaycomponent_mod.F90
Go to the documentation of this file.
1 
6 
7  use bml
8  use iso_c_binding
9 
10 #ifdef USE_NVTX
11  use prg_nvtx_mod
12 #endif
13 
14  implicit none
15 
16  private
17 
18  integer,parameter :: dp = kind(1.0d0)
19 
21 
22 contains
23 
35  subroutine prg_pulaycomponent0(rho_bml,ham_bml,pcm_bml,threshold,M,&
36  &verbose)
37  !&bml_type,verbose)
38 
39  implicit none
40 
41  type(bml_matrix_t), intent(in) :: rho_bml
42  type(bml_matrix_t), intent(in) :: ham_bml
43  type(bml_matrix_t) :: aux_bml
44  type(bml_matrix_t), intent(inout) :: pcm_bml
45  integer, intent(in) :: m
46  integer :: norb, verbose
47  real(dp), intent(in) :: threshold
48  !character(20), intent(in) :: bml_type
49  character(20) :: bml_type
50 
51  if(verbose.eq.2) write(*,*)"In prg_PulayComponent0 ..."
52 
53  norb = bml_get_n(rho_bml)
54  bml_type = bml_get_deep_type(rho_bml)
55 
56  call bml_zero_matrix(bml_type,bml_element_real,dp,norb ,norb,aux_bml)
57 
58  if(bml_get_n(pcm_bml).le.0)then !If pcm is not allocated
59  call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,pcm_bml)
60  else
61  ! re-allocate will break the c pointer to pcm (for the c wrapper)
62  !call bml_deallocate(pcm_bml) !If pcm is allocated then we set it to 0
63  !call bml_zero_matrix(bml_type,bml_element_real,dp,nOrb,nOrb,pcm_bml)
64  call bml_scale(0.0_dp, pcm_bml)
65  endif
66 
67  call bml_multiply(rho_bml, ham_bml, aux_bml, 1.0d0, 1.0d0,threshold) !D*H
68 
69  call bml_multiply(aux_bml,rho_bml,pcm_bml, 1.0d0, 1.0d0,threshold) !(D*H)*D
70 
71  call bml_scale(0.5_dp, pcm_bml)
72 
73  call bml_deallocate(aux_bml)
74 
75  end subroutine prg_pulaycomponent0
76 
89  subroutine prg_pulaycomponentt(rho_bml,ham_bml,zmat_bml,pcm_bml,threshold &
90  &,M,bml_type,verbose)
91 
92  implicit none
93 
94  type(bml_matrix_t), intent(in) :: rho_bml
95  type(bml_matrix_t), intent(in) :: ham_bml
96  type(bml_matrix_t), intent(in) :: zmat_bml
97  type(bml_matrix_t) :: aux_bml
98  type(bml_matrix_t) :: aux1_bml
99  type(bml_matrix_t), intent(inout) :: pcm_bml
100  integer, intent(in) :: m
101  integer :: norb, verbose
102  real(dp), intent(in) :: threshold
103  character(20), intent(in) :: bml_type
104 
105  if(verbose.eq.2) write(*,*)"In prg_PulayComponentT ..."
106 
107  norb = bml_get_n(rho_bml)
108 
109  if(bml_get_n(pcm_bml).le.0)then !If pcm is not allocated.
110  call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,pcm_bml)
111  else
112  call bml_deallocate(pcm_bml) !If pcm is allocated then we set it to 0.
113  call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,pcm_bml)
114  endif
115 
116  call bml_zero_matrix(bml_type,bml_element_real,dp,norb ,norb,aux_bml)
117  call bml_zero_matrix(bml_type,bml_element_real,dp,norb ,norb,aux1_bml)
118 
119  if(bml_get_n(zmat_bml).lt.0)then
120  stop 'ERROR: zmat_bml not allocated'
121  endif
122 
123  call bml_multiply(zmat_bml,zmat_bml,aux_bml,1.0d0,1.0d0,threshold) !aux=Z*Z
124 
125  call bml_multiply(aux_bml,ham_bml,pcm_bml,1.0d0,1.0d0,threshold) !Z*Z*H
126 
127  call bml_multiply(pcm_bml,rho_bml,aux1_bml,1.0d0,1.0d0,threshold) !aux1=Z*Z*H*D
128 
129  call bml_scale(0.0_dp, pcm_bml)
130 
131  call bml_multiply(ham_bml,aux_bml,pcm_bml,1.0d0,1.0d0,threshold) !H*Z*Z
132 
133  call bml_scale(0.0_dp, aux_bml) !We set aux to 0 to recicle the variable
134 
135  call bml_multiply(rho_bml,pcm_bml,aux_bml,1.0d0,1.0d0,threshold) !D*H*Z*Z
136 
137  call bml_add_deprecated(0.5d0,aux_bml,0.5d0,aux1_bml)
138 
139  call bml_copy(aux_bml,pcm_bml)
140 
141  call bml_deallocate(aux_bml)
142  call bml_deallocate(aux1_bml)
143 
144 
145  end subroutine prg_pulaycomponentt
146 
147 
158  subroutine prg_get_pulayforce(nats,zmat_bml,ham_bml,rho_bml,&
159  dSx_bml,dSy_bml,dSz_bml,hindex,FPUL,threshold)
160  implicit none
161  real(dp), allocatable, intent(inout) :: fpul(:,:)
162  integer, intent(in) :: nats
163  type(bml_matrix_t), intent(in) :: dsx_bml, dsy_bml, dsz_bml
164  type(bml_matrix_t), intent(in) :: rho_bml, ham_bml, zmat_bml
165  integer, intent(in) :: hindex(:,:)
166  integer :: i_a, i_b, i , j, norb
167  real(dp), intent(in) :: threshold
168  type(bml_matrix_t) :: xtmp_bml, ytmp_bml, ztmp_bml, sihd_bml
169  type(bml_matrix_t) :: aux_bml
170  real(dp), allocatable :: diagxtmp(:), diagytmp(:), diagztmp(:)
171  real(dp) :: partrace,partracex,partracey,partracez
172 #ifdef USE_OFFLOAD
173  type(c_ptr) :: xtmp_bml_c_ptr, ytmp_bml_c_ptr, ztmp_bml_c_ptr
174  integer :: ld
175  real(c_double), pointer :: xtmp_bml_ptr(:,:), ytmp_bml_ptr(:,:), ztmp_bml_ptr(:,:)
176 #endif
177 
178  write(*,*)"In prg_get_pulayforce ..."
179 
180  if(.not.allocated(fpul))then
181  allocate(fpul(3,nats))
182  endif
183 
184  fpul = 0.0_dp
185 
186  norb = bml_get_n(rho_bml)
187 
188  !SIHD = 2*Z*Z'*H*D;
189  call bml_copy_new(zmat_bml,sihd_bml)
190  call bml_copy_new(zmat_bml,aux_bml)
191  call bml_transpose(aux_bml) !Z'
192  call bml_multiply(zmat_bml,aux_bml,sihd_bml,2.0_dp,0.0_dp,threshold) !2*Z*Z'
193  call bml_multiply(sihd_bml,ham_bml,aux_bml,1.0_dp,0.0_dp,threshold) !2*Z*Z'*H
194  call bml_multiply(aux_bml,rho_bml,sihd_bml,1.0_dp,0.0_dp,threshold) !2*Z*Z'*D
195  call bml_deallocate(aux_bml)
196 
197  call bml_copy_new(rho_bml,xtmp_bml)
198  call bml_multiply(dsx_bml,sihd_bml,xtmp_bml,1.0_dp,0.0_dp,threshold)
199  call bml_copy_new(rho_bml,ytmp_bml)
200  call bml_multiply(dsy_bml,sihd_bml,ytmp_bml,1.0_dp,0.0_dp,threshold)
201  call bml_copy_new(rho_bml,ztmp_bml)
202  call bml_multiply(dsz_bml,sihd_bml,ztmp_bml,1.0_dp,0.0_dp,threshold)
203 
204 #ifdef USE_OFFLOAD
205  xtmp_bml_c_ptr = bml_get_data_ptr_dense(xtmp_bml)
206  ytmp_bml_c_ptr = bml_get_data_ptr_dense(ytmp_bml)
207  ztmp_bml_c_ptr = bml_get_data_ptr_dense(ztmp_bml)
208  ld = bml_get_ld_dense(xtmp_bml)
209  call c_f_pointer(xtmp_bml_c_ptr,xtmp_bml_ptr,shape=[ld,norb])
210  call c_f_pointer(ytmp_bml_c_ptr,ytmp_bml_ptr,shape=[ld,norb])
211  call c_f_pointer(ztmp_bml_c_ptr,ztmp_bml_ptr,shape=[ld,norb])
212  !$acc enter data copyin(FPUL(1:3,1:nats),hindex(1:2,1:nats))
213  !$acc parallel loop deviceptr(Xtmp_bml_ptr,Ytmp_bml_ptr,Ztmp_bml_ptr) &
214  !$acc private(i,j,partracex,partracey,partracez)
215  do i = 1,nats
216  partracex = 0.0_dp
217  partracey = 0.0_dp
218  partracez = 0.0_dp
219  !$acc loop reduction(partracex,partracey,partracez)
220  do j=hindex(1,i),hindex(2,i)
221  partracex = partracex + xtmp_bml_ptr(j,j)
222  partracey = partracey + ytmp_bml_ptr(j,j)
223  partracez = partracez + ztmp_bml_ptr(j,j)
224  enddo
225  !$acc end loop
226  fpul(1,i) = partracex;
227  fpul(2,i) = partracey;
228  fpul(3,i) = partracez;
229 
230  enddo
231  !$acc end loop
232  !$acc exit data copyout(FPUL(1:3,1:nats)) delete(hindex(1:2,1:nats))
233 #else
234  allocate(diagxtmp(norb))
235  call bml_get_diagonal(xtmp_bml,diagxtmp)
236 
237  allocate(diagytmp(norb))
238  call bml_get_diagonal(ytmp_bml,diagytmp)
239 
240  allocate(diagztmp(norb))
241  call bml_get_diagonal(ztmp_bml,diagztmp)
242 
243  !$omp parallel do default(none) private(i) &
244  !$omp private(I_A,I_B,j,partrace) &
245  !$omp shared(hindex,diagxtmp,diagytmp,diagztmp,FPUL,nats)
246  do i = 1,nats
247  i_a = hindex(1,i);
248  i_b = hindex(2,i);
249 
250  partrace = 0.0_dp
251  do j=i_a,i_b
252  partrace = partrace + diagxtmp(j)
253  enddo
254  fpul(1,i) = partrace;
255 
256  partrace = 0.0_dp
257  do j=i_a,i_b
258  partrace = partrace + diagytmp(j)
259  enddo
260  fpul(2,i) = partrace;
261 
262  partrace = 0.0_dp
263  do j=i_a,i_b
264  partrace = partrace + diagztmp(j)
265  enddo
266  fpul(3,i) = partrace;
267 
268  enddo
269  !$omp end parallel do
270 
271  deallocate(diagxtmp)
272  deallocate(diagytmp)
273  deallocate(diagztmp)
274 #endif
275  call bml_deallocate(xtmp_bml)
276  call bml_deallocate(ytmp_bml)
277  call bml_deallocate(ztmp_bml)
278  call bml_deallocate(sihd_bml)
279 
280  end subroutine prg_get_pulayforce
281 
282 end module prg_pulaycomponent_mod
Produces a matrix to get the Pulay Component of the forces.
subroutine, public prg_pulaycomponentt(rho_bml, ham_bml, zmat_bml, pcm_bml, threshold, M, bml_type, verbose)
At , .
subroutine, public prg_pulaycomponent0(rho_bml, ham_bml, pcm_bml, threshold, M, verbose)
At , .
subroutine, public prg_get_pulayforce(nats, zmat_bml, ham_bml, rho_bml, dSx_bml, dSy_bml, dSz_bml, hindex, FPUL, threshold)
Pulay Force FPUL from .