!{\src2tex{textfont=tt}}
!!****f* abinit/prep_getghc
!! NAME
!! prep_getghc
!!
!! FUNCTION
!! this routine prepares the data to the call of getghc.
!!
!! COPYRIGHT
!! Copyright (C) 1998-2007 ABINIT group (MT)
!! this file is distributed under the terms of the
!! gnu general public license, see ~abinit/COPYING
!! or http://www.gnu.org/copyleft/gpl.txt .
!! for the initials of contributors, see ~abinit/doc/developers/contributors.txt .
!!
!! INPUTS
!!  blocksize= size of block for FFT
!!  cwavef(2,npw*nspinor*ndat)=planewave coefficients of wavefunction.
!!  dimffnl=second dimension of ffnl (1+number of derivatives)
!!  dtfil <type(datafiles_type)>=variables related to files
!!  ffnl(npw,dimffnl,lmnmax,ntypat)=nonlocal form factors on basis sphere.
!!  gs_hamk <type(gs_hamiltonian_type)>=all data for the hamiltonian at k
!!  gvnlc=matrix elements <G|Vnonlocal|C>
!!  kg_k(3,npw_k)=reduced planewave coordinates.
!!  kinpw(npw)=(modified) kinetic energy for each plane wave (hartree)
!!  icall = order of call of this routine in lobpcgccwf
!!  lmnmax=if useylm=1, max number of (l,m,n) comp. over all type of psps
!!        =if useylm=0, max number of (l,n)   comp. over all type of psps
!!  matblk=dimension of the array ph3d
!!  mgfft=maximum size of 1d ffts
!!  mpi_enreg=informations about mpi parallelization
!!  mpsang= 1+maximum angular momentum for nonlocal pseudopotentials
!!  mpssoang= 1+maximum (spin*angular momentum) for nonlocal pseudopotentials
!!  natom=number of atoms in cell.
!!  nband_k=number of bands at this k point for that spin polarization
!!  nbdblock=
!!  npw_k=number of plane waves at this k point
!!  nspinor=number of spinorial components of the wavefunctions
!!  ntypat=number of types of atoms in unit cell.
!!  nvloc=final dimension of vlocal (usually 1, but 4 for non-collinear)
!!  n4,n5,n6 used for dimensionning of vlocal
!!  ph3d(2,npw,matblk)=3-dim structure factors, for each atom and plane wave.
!!  prtvol=control print volume and debugging output
!!  vlocal(n4,n5,n6,nvloc)= local potential in real space, on the augmented fft grid
!!
!! OUTPUT
!!  gwavef=(2,npw*nspinor*ndat)=matrix elements <G|H|C>.
!!  swavef=(2,npw*nspinor*ndat)=matrix elements <G|S|C>.
!!
!! SIDE EFFECTS
!!
!! PARENTS
!!      lobpcgccwf,lobpcgwf
!!
!! CHILDREN
!!      fourwf,sphereboundary,timab,xallgather_mpi,xallgatherv_mpi
!!      xalltoallv_mpi,xcomm_init
!!
!! SOURCE

#if defined HAVE_CONFIG_H
#include "config.h"
#endif

subroutine prep_getghc(cwavef,dimffnl,dtfil,ffnl,gs_hamk,gvnlc,gwavef,swavef,iblock,icall,istwf_k,kg_k,&
& kinpw,lmnmax,matblk,blocksize,mgfft,mpi_enreg,mpsang,mpssoang,natom,nbdblock,nband_k,npw_k,&
& nspinor,ntypat,nvloc,n4,n5,n6,ph3d,prtvol,sij_opt,vlocal)

 use defs_basis
 use defs_datatypes

!This section has been created automatically by the script Abilint (TD). Do not modify these by hand.
#ifdef HAVE_FORTRAN_INTERFACES
 use interfaces_01manage_mpi
 use interfaces_12ffts
 use interfaces_14wfs, except_this_one => prep_getghc
 use interfaces_lib01hidempi
#else
 use defs_xfuncmpi
#endif
!End of the abilint section

 implicit none

#if defined MPI_FFT
          include 'mpif.h'
#endif
!Arguments ------------------------------------
 type(gs_hamiltonian_type) :: gs_hamk
 integer :: sij_opt
 integer :: blocksize
 integer :: dimffnl,iblock,icall,istwf_k,lmnmax,matblk,mgfft,mpsang,mpssoang,n4,n5
 integer :: n6,natom,nband_k,nbdblock,npw_k,nspinor,ntypat,nvloc,prtvol
 type(datafiles_type) :: dtfil
 type(mpi_type) :: mpi_enreg
 integer :: kg_k(3,npw_k)
 real(dp) :: ffnl(npw_k,dimffnl,lmnmax,ntypat),gvnlc(2,npw_k*nspinor*blocksize)
 real(dp) :: kinpw(npw_k),ph3d(2,npw_k,matblk)
 real(dp) :: vlocal(n4,n5,n6,nvloc),cwavef(2,npw_k*nspinor*blocksize),gwavef(2,npw_k*nspinor*blocksize)
 real(dp) :: swavef(2,npw_k*nspinor*blocksize)

!Local variables-------------------------------
 integer :: oldspaceComm,spacecomm=0
 integer :: ier,ipw,npw_k3
 integer :: old_me_g0,old_num_group_fft,old_paral_compil_fft,old_paral_level,tim_getghc
 integer :: old_ngfft(18)
 real(dp), allocatable :: dummy2(:,:)
 real(dp) :: tsec(2)

!local variables for mpialltoallv
 real(dp), allocatable :: swavef_alltoall(:,:)
 real(dp), allocatable :: cwavef_alltoall(:,:),gwavef_alltoall(:,:),gvnlc_alltoall(:,:),&
 ffnl_little(:,:,:,:),ffnl_little_gather(:,:,:,:),&
 ph3d_little(:,:,:),ph3d_little_gather(:,:,:)
 integer, allocatable,save :: kg_k_gather(:,:),kg_k_gather_all(:,:),rdispls_all(:),npw_per_proc(:)
 real(dp), allocatable,save :: kinpw_gather(:),ffnl_gather(:,:,:,:),vlocal_allgather(:,:,:,:),&
 ph3d_gather(:,:,:)
 integer,save :: iproc,ndatarecv,ndatarecvloc,npw_tot
 integer,  allocatable :: recvcounts(:)
 integer,  allocatable :: sendcounts(:),sdispls(:),rdispls(:)
 integer,  allocatable :: sendcountsloc(:),sdisplsloc(:),recvcountsloc(:),rdisplsloc(:)
!no_abirules
!correspondence with abinit. here for real wf but in complex mode
!this is the index of a given band

! *************************************************************************

 tim_getghc=6
 old_paral_level= mpi_enreg%paral_level
 mpi_enreg%paral_level=3
 call xcomm_init(mpi_enreg,spaceComm)
 if(mpi_enreg%mode_para=='b') spaceComm=mpi_enreg%comm_band

 allocate(sendcountsloc(blocksize))
 allocate(sdisplsloc(blocksize))
 allocate(sdispls(blocksize))
 allocate(sendcounts(blocksize))
 allocate(recvcountsloc(blocksize))
 allocate(rdisplsloc(blocksize))
 allocate(rdispls(blocksize))
 allocate(recvcounts(blocksize))
 call timab(546,1,tsec)
 call xallgather_mpi(npw_k,recvcounts,spaceComm,ier)
 call timab(546,2,tsec)
 rdispls(1)=0
 do iproc=2,blocksize
  rdispls(iproc)=rdispls(iproc-1)+recvcounts(iproc-1)
 end do
 ndatarecv=rdispls(blocksize)+recvcounts(blocksize)

 if (icall==1 .and. iblock==1) then
  if (allocated(ffnl_gather)) deallocate(ffnl_gather)
  allocate(ffnl_gather(ndatarecv,dimffnl,lmnmax,ntypat))
  allocate(ffnl_little(dimffnl,lmnmax,ntypat,npw_k))
  allocate(ffnl_little_gather(dimffnl,lmnmax,ntypat,ndatarecv))
  do ipw=1,npw_k
   ffnl_little(:,:,:,ipw)=ffnl(ipw,:,:,:)
  end do
  recvcountsloc(:)=recvcounts(:)*dimffnl*lmnmax*ntypat
  rdisplsloc(:)=rdispls(:)*dimffnl*lmnmax*ntypat
  call timab(546,1,tsec)
  call xallgatherv_mpi(ffnl_little,npw_k*dimffnl*lmnmax*ntypat,ffnl_little_gather,&
  & recvcountsloc(:),rdisplsloc,spaceComm,ier)
  call timab(546,2,tsec)
  do ipw=1,ndatarecv
   ffnl_gather(ipw,:,:,:)=ffnl_little_gather(:,:,:,ipw)
  end do
  deallocate(ffnl_little,ffnl_little_gather)

  if (allocated(kinpw_gather)) deallocate(kinpw_gather)
  allocate(kinpw_gather(ndatarecv))
  recvcountsloc(:)=recvcounts(:)
  rdisplsloc(:)=rdispls(:)
  call timab(546,1,tsec)
  call xallgatherv_mpi(kinpw,npw_k,kinpw_gather,recvcountsloc(:),rdisplsloc,spaceComm,ier)
  call timab(546,2,tsec)

  if (allocated(ph3d_gather)) deallocate(ph3d_gather)
  allocate(ph3d_gather(2,ndatarecv,matblk))
  allocate(ph3d_little(2,matblk,npw_k),ph3d_little_gather(2,matblk,ndatarecv))
  recvcountsloc(:)=recvcounts(:)*2*matblk
  rdisplsloc(:)=rdispls(:)*2*matblk
  do ipw=1,npw_k
   ph3d_little(:,:,ipw)=ph3d(:,ipw,:)
  end do
  call timab(546,1,tsec)
  call xallgatherv_mpi(ph3d_little,npw_k*2*matblk,ph3d_little_gather,recvcountsloc(:),rdisplsloc,spaceComm,ier)
  call timab(546,2,tsec)
  do ipw=1,ndatarecv
   ph3d_gather(:,ipw,:)=ph3d_little_gather(:,:,ipw)
  end do
  deallocate(ph3d_little_gather,ph3d_little)

  if (allocated(kg_k_gather)) deallocate(kg_k_gather)
  allocate(kg_k_gather(3,ndatarecv))
  recvcountsloc(:)=recvcounts(:)*3
  rdisplsloc(:)=rdispls(:)*3
  call timab(546,1,tsec)
  call xallgatherv_mpi(kg_k(1,:),npw_k,kg_k_gather(1,:),recvcounts(:),rdispls,spaceComm,ier)
  call xallgatherv_mpi(kg_k(2,:),npw_k,kg_k_gather(2,:),recvcounts(:),rdispls,spaceComm,ier)
  call xallgatherv_mpi(kg_k(3,:),npw_k,kg_k_gather(3,:),recvcounts(:),rdispls,spaceComm,ier)
! I now recollect all the kg to have a common sphereboundary.
! First get the dimension of the whole kg array
  oldspacecomm=mpi_enreg%comm_fft
! Get dimension information
  allocate(npw_per_proc(mpi_enreg%nproc_fft),rdispls_all(mpi_enreg%nproc_fft))
  call xallgather_mpi(ndatarecv,npw_per_proc,oldspacecomm,ier)
  rdispls_all(1)=0
  do iproc=2,mpi_enreg%nproc_fft
   rdispls_all(iproc)=rdispls_all(iproc-1)+npw_per_proc(iproc-1)
  end do
  npw_tot=rdispls_all(mpi_enreg%nproc_fft)+npw_per_proc(mpi_enreg%nproc_fft)
! Transfer the kg on each proc to the whole array
  allocate(kg_k_gather_all(3,npw_tot))
  call xallgatherv_mpi&
&  (kg_k_gather(1,:),ndatarecv,kg_k_gather_all(1,:),npw_per_proc(:),rdispls_all,oldspaceComm,ier)
  call xallgatherv_mpi&
&  (kg_k_gather(2,:),ndatarecv,kg_k_gather_all(2,:),npw_per_proc(:),rdispls_all,oldspaceComm,ier)
  call xallgatherv_mpi&
&  (kg_k_gather(3,:),ndatarecv,kg_k_gather_all(3,:),npw_per_proc(:),rdispls_all,oldspaceComm,ier)
!  call leave_new("COLL")
  call timab(546,2,tsec)
  call sphereboundary(gs_hamk%gbound,istwf_k,kg_k_gather_all,mgfft,npw_tot)
  deallocate(kg_k_gather_all,npw_per_proc,rdispls_all)
 end if !End of the icall=1 and iblock=1 conditions

 sendcounts(:)=npw_k
 do iproc=1,blocksize
  sdispls(iproc)=(iproc-1)*npw_k
 end do

 allocate(cwavef_alltoall(2,ndatarecv*nspinor))
 allocate(gwavef_alltoall(2,ndatarecv*nspinor))
 allocate(swavef_alltoall(2,ndatarecv*nspinor))
 allocate(gvnlc_alltoall(2,ndatarecv*nspinor))
 recvcountsloc(:)=recvcounts(:)*2*nspinor
 rdisplsloc(:)=rdispls(:)*2*nspinor
 sendcountsloc(:)=sendcounts(:)*2*nspinor
 sdisplsloc(:)=sdispls(:)*2*nspinor
 call timab(545,1,tsec)
 call xalltoallv_mpi(cwavef,sendcountsloc,sdisplsloc,cwavef_alltoall,&
&         recvcountsloc,rdisplsloc,spaceComm,ier)
 call timab(545,2,tsec)
 if(mpi_enreg%mode_para /= 'b') then !I collect on this processor the whole local potential, and perform scalar fft
  if (.not.allocated(vlocal_allgather) .and. icall==1 .and. iblock==1) allocate(vlocal_allgather(n4,n5,n6,nvloc))
  if (allocated(vlocal_allgather)  .and. icall==1 .and. iblock==1) deallocate(vlocal_allgather)
  if (.not.allocated(vlocal_allgather)) then
   allocate(vlocal_allgather(n4,n5,n6,nvloc))
   call timab(546,1,tsec)
   call xallgather_mpi(vlocal(:,:,n6/mpi_enreg%nproc_fft*mpi_enreg%me_fft+1:n6/mpi_enreg%nproc_fft*(mpi_enreg%me_fft+1),:)&
&  ,n4*n5*n6*nvloc/mpi_enreg%nproc_fft,vlocal_allgather,spaceComm,ier)
   call timab(546,2,tsec)
  end if
  old_ngfft(:)=gs_hamk%ngfft(:)
  gs_hamk%ngfft(11)=0;gs_hamk%ngfft(10)=1
  old_num_group_fft=mpi_enreg%num_group_fft
  mpi_enreg%num_group_fft=0
  old_paral_compil_fft=mpi_enreg%paral_compil_fft
  mpi_enreg%paral_compil_fft=0
  if (gs_hamk%istwf_k==2) old_me_g0=mpi_enreg%me_g0
  if (gs_hamk%istwf_k==2) mpi_enreg%me_g0=1
  mpi_enreg%me_fft=0;mpi_enreg%nproc_fft=1

  call getghc(cwavef_alltoall,dimffnl,ffnl_gather,dtfil%filstat,gwavef_alltoall(:,:),&
&  swavef_alltoall(:,:),gs_hamk,gvnlc_alltoall,kg_k_gather,&
&  kinpw_gather,lmnmax,matblk,mgfft,mpi_enreg,mpsang,mpssoang,natom,1,ndatarecv,nspinor,ntypat,&
&  nvloc,n4,n5,n6,ph3d_gather,prtvol,sij_opt,tim_getghc,vlocal_allgather)

  gs_hamk%ngfft(:)=old_ngfft(:)
  mpi_enreg%paral_compil_fft=old_paral_compil_fft
  mpi_enreg%num_group_fft=old_num_group_fft
  mpi_enreg%nproc_fft=old_ngfft(10)
  mpi_enreg%me_fft=old_ngfft(11)
 else !case band fft (use vlocal as fft_distributed, and the fft_parallel mode)

  call getghc(cwavef_alltoall,dimffnl,ffnl_gather,dtfil%filstat,gwavef_alltoall,&
&  swavef_alltoall(:,:),gs_hamk,gvnlc_alltoall,kg_k_gather,&
&  kinpw_gather,lmnmax,matblk,mgfft,mpi_enreg,mpsang,mpssoang,natom,1,ndatarecv,nspinor,ntypat,&
&  nvloc,n4,n5,n6,ph3d_gather,prtvol,sij_opt,tim_getghc,vlocal)

 end if
 if (gs_hamk%istwf_k==2) mpi_enreg%me_g0=old_me_g0
 call timab(545,1,tsec)
 if (sij_opt==1) then
  call xalltoallv_mpi(swavef_alltoall,recvcountsloc,rdisplsloc,swavef,&
  &         sendcountsloc,sdisplsloc,spaceComm,ier)
 endif
 call xalltoallv_mpi(gwavef_alltoall,recvcountsloc,rdisplsloc,gwavef,&
 &         sendcountsloc,sdisplsloc,spaceComm,ier)
 call xalltoallv_mpi(gvnlc_alltoall,recvcountsloc,rdisplsloc,gvnlc,&
 &         sendcountsloc,sdisplsloc,spaceComm,ier)
 call timab(545,2,tsec)

 mpi_enreg%paral_level= old_paral_level
 deallocate(sendcounts,recvcounts,sdispls,rdispls)
 deallocate(sendcountsloc,sdisplsloc)
 deallocate(recvcountsloc,rdisplsloc)
 deallocate(cwavef_alltoall,gwavef_alltoall,gvnlc_alltoall)
 deallocate(swavef_alltoall)
end subroutine prep_getghc
!!***
