!{\src2tex{textfont=tt}}
!!****f* ABINIT/sigma
!! NAME
!! sigma
!!
!! FUNCTION
!!
!! Calculate self-energy operator matrix elements
!!
!! COPYRIGHT
!! Copyright (C) 1999-2007 ABINIT group (GMR, VO, LR, RWG, MT, MG)
!! This file is distributed under the terms of the
!! GNU General Public License, see ~abinit/COPYING
!! or http://www.gnu.org/copyleft/gpl.txt .
!! For the initials of contributors, see ~abinit/doc/developers/contributors.txt .
!!
!! INPUTS
!!  acell(3)=length scales of primitive translations (bohr)
!!  dtfil <type(datafiles_type)>=variables related to files
!!  dtset <type(dataset_type)>=all input variables for this dataset
!!  mpi_enreg=informations about MPI parallelization (to be completed)
!!  rprim(3,3)=dimensionless real space primitive translations
!!
!! OUTPUT
!!  output is written in a file
!!
!! PARENTS
!!      driver,drivergw
!!
!! NOTES
!!
!! files used:
!! input:
!! 10: LDA band structure and material info for sigma operator
!! 12: Epsilon-twiddle**-1 (plasmon-pole form) file EPSM1
!! stdin: parameters of calculation (incl. which
!!        k-points to calc. Sigma for) file sig.in
!! output:
!! 21: gw file, available for exciton calculation
!!
!! CHILDREN
!!      calc_wf_qp,cigfft,clcqpg,cmevxclda,cppm1par,cppm2par,cppm3par,cppm4par
!!      crho,csigme,distrb2,fermi,fftwfn,findk,ham_changebasis,hartrham
!!      hdr_clean,identk,identq,lattice,leave_new,memerr,metric,mkrdim,pclock
!!      rdgw,rdkss,rdlda,rdldaabinit,rdqps,rdscr,setmesh,setshells
!!      setup_little_group,surot,testlda,testscr,timab,write_sigma_results
!!      write_sigma_results_header,wrqps,wrtout,xcomm_init,xmaster_init
!!      xme_init,xsum_mpi
!!
!! SOURCE

#if defined HAVE_CONFIG_H
#include "config.h"
#endif

subroutine sigma(acell,dtfil,dtset,mpi_enreg,rprim)

 use defs_basis
 use defs_datatypes

!This section has been created automatically by the script Abilint (TD). Do not modify these by hand.
#ifdef HAVE_FORTRAN_INTERFACES
 use interfaces_01manage_mpi
 use interfaces_12geometry
 use interfaces_14iowfdenpot
 use interfaces_15gw
 use interfaces_lib01hidempi
#else
 use defs_xfuncmpi
#endif
!End of the abilint section

 implicit none

!Arguments ------------------------------------
!scalars
 type(MPI_type),intent(inout) :: mpi_enreg
 type(datafiles_type),intent(in) :: dtfil
 type(dataset_type),intent(inout) :: dtset
!arrays
 real(dp),intent(in) :: acell(3),rprim(3,3)

!Local variables-------------------------------
!scalars
 integer,parameter :: eunit=12,iunit=10,unitem1=24
 integer,save :: counter=0
 integer :: fform,i,i1,i2,i3,ib,ibr,ierr,ig,ik,ikcalc,ikibz,io,iout,is
 integer :: isppol,istat,j,master,max_band_proc,me,min_band_proc,mpsang,mqpt
 integer :: natom,nbnds_per_proc,nbr,nel=0,ngfft1,ngfft1a,ngfft2,ngfft2a,ngfft3
 integer :: ngfft3a,ngr,ninv,nkbzx,nkibzr,nop,nopr,npwvec,nq1,nq2,nq3,nqbzx
 integer :: nqibzx,nqpt_computed,nqshft,nr,nrb,nscf,nshr,nsppol,ntypat,ppmsize
 integer :: spaceComm,tim_fourdp
 real(dp) :: absimsig,bzvol,condbottom,domegas,efermi,efermi_qp,oldefermi
 real(dp) :: omega_m_e_resig,omegaplasma,outrim0,qptrlen,ucvol,valencetop
 logical,parameter :: implemnented=.false.
 logical :: DEBUG,ltemp,nonlocal,parallelism_is_on_bands
 logical :: parallelism_is_on_kpoints,update_energies=.false.,min_found,max_found
 character(len=500) :: message
 character(len=fnlen) :: filnam
 type(epsilonm1_parameters) :: ep
 type(epsilonm1_results) :: er
 type(hdr_type) :: hdr
 type(little_group) :: lt_k
 type(sigma_parameters) :: sp
 type(sigma_results) :: sr
!arrays
 integer :: dsifkpt(3),qptrlatt(3,3),vacuum(3)
 integer,allocatable :: grottb(:,:,:),grottbm1(:,:,:),gvec(:,:),igfft(:,:,:,:),distrb(:)
 integer,allocatable :: irottb(:,:),ktab(:),ktabi(:),ktabo(:),ktabr(:,:)
 integer,allocatable :: nband_t(:),nbv(:),qtab(:),qtabi(:),qtabo(:),symafm(:)
 integer,allocatable :: typat(:)
 real(dp) :: a1(3),a2(3),a3(3),b1(3),b2(3),b3(3),gmet(3,3),gprimd(3,3)
 real(dp) :: rmet(3,3),rprimd(3,3),tsec(2)
 real(dp),allocatable :: en(:,:,:),en_qp(:,:,:),kbz(:,:),kibz(:,:),mcint(:,:,:)
 real(dp),allocatable :: occ(:,:,:),occ_qp(:,:,:),op(:,:,:),qpoint(:,:),qbz(:,:)
 real(dp),allocatable :: qbz_sym(:,:),qpg(:,:),qratio(:,:,:),rho(:,:)
 real(dp),allocatable :: rho_p(:,:),rho_qp(:,:),spqpt(:,:),sr_gwenergy(:,:,:)
 real(dp),allocatable :: temp1(:),tmpshifts(:,:),vkb(:,:,:,:),vkbd(:,:,:,:)
 real(dp),allocatable :: vkbsign(:,:),wtk(:),wtq(:),xred(:,:)
 complex,allocatable :: eigpot(:,:,:),hbare(:,:,:,:),hlda(:,:,:,:)
 complex,allocatable :: m_lda_to_qp(:,:,:,:),temp_omegatw(:,:),vhartr(:,:,:,:)
 complex,allocatable :: vxc(:,:,:,:),wfg(:,:,:,:),wfr(:,:,:,:)
 character(len=80) :: titem1(2),title(2)

!******************************************************************
!BEGIN EXECUTABLE SECTION
!start clock
  call pclock(0)
!Init mpi_comm
  call xcomm_init(mpi_enreg,spaceComm)
!Init me
  call xme_init(mpi_enreg,me)
!Init master
  call xmaster_init(mpi_enreg,master)
!End of parallelization initialisation

 call timab(401,1,tsec)
 call timab(402,1,tsec)

 write(message,'(7a)') &
& ' SIGMA: Calculation of the GW corrections',ch10,ch10,&
& ' Based on a program developped by R.W. Godby, V. Olevano, G. Onida, and L. Reining.',ch10,&
& ' Incorporated in ABINIT by V. Olevano, G.-M. Rignanese, and M. Torrent.',ch10
 call wrtout(6,message,'COLL')
 call wrtout(ab_out,message,'COLL')

 !start clock
 call pclock(0)

!defaults

 if(dtset%localrdwf==0)then
  nonlocal=.false.! only master has an access to files
 else
  nonlocal=.true. ! all proc have an access to files
 end if


 if(dtset%gwpara==1)then
  parallelism_is_on_kpoints=.true.
  parallelism_is_on_bands=.false.
 else if(dtset%gwpara==2)then
  parallelism_is_on_bands=.true.
  parallelism_is_on_kpoints=.false.
 else if(dtset%gwpara==3)then
 if(implemnented)then
  parallelism_is_on_kpoints=.true.
  parallelism_is_on_bands=.true.
 else
   write(message, '(a,a,a,a,a,a,a)' ) ch10,&
&   ' sigma: WARNING -',ch10,&
&   '  at the moment gwpara  3 is not yet implemented ',ch10,&
&   ' gwpara 2 will be imposed',ch10
   call wrtout(6,message,'COLL')
   call leave_new('COLL')
   parallelism_is_on_bands=.true.
 end if
 else if(dtset%gwpara>3)then
  write(message, '(a,a,a,a,a)' ) ch10,&
&  ' sigma: ERROR -',ch10,&
&  '  gwpara can only be 1, 2, 3  ',ch10
  call wrtout(6,message,'COLL')
  call leave_new('COLL')
 end if

 sp%nb=dtset%nband(1)
 mpi_enreg%parareel=0
 mpi_enreg%paralbd=0
 mpi_enreg%paral_level=2

 if(mpi_enreg%nproc==1)then
  parallelism_is_on_kpoints=.false.
  parallelism_is_on_bands=.false.
  nbnds_per_proc=sp%nb
  min_band_proc=1
  max_band_proc=sp%nb
 end if

 if(parallelism_is_on_bands)then
  allocate(nband_t(sp%nb))
  nband_t(:)=sp%nb
    allocate(mpi_enreg%proc_distrb(sp%nb,sp%nb,1))
    call distrb2(sp%nb,nband_t,sp%nb,1,mpi_enreg)
      nbnds_per_proc=0
      min_found=.false.
      max_found=.false.
      do ib=1,sp%nb
       if(mpi_enreg%proc_distrb(ib,1,1)==me)nbnds_per_proc=nbnds_per_proc+1
      end do
      do ib=1,sp%nb
      if(mpi_enreg%proc_distrb(ib,1,1)==me)min_found=.true.
      if(min_found)exit
      end do
      min_band_proc=ib
      do ib=sp%nb,1,-1
      if(mpi_enreg%proc_distrb(ib,1,1)==me)max_found=.true.
       if(max_found)exit
      end do
      max_band_proc=ib
      if(nbnds_per_proc==0)then
       write(message,'(a,a,a,a,a,a,a,a)')ch10,&
&  ' para: BUG -',ch10,&
&  ' one or more PROC has zero number of bands ',ch10,&
&  ' this is not allowd at the moment',ch10,&
&  ' use less number of PROCs'
  call wrtout(6,message,'PERS')
  call leave_new('PERS')
 end if
  deallocate(nband_t)
 else if(parallelism_is_on_kpoints)then
! memory is not parallelized for this option
  nbnds_per_proc=sp%nb
  min_band_proc=1
  max_band_proc=sp%nb
 end if
 sp%gwcalctyp=dtset%gwcalctyp
 write(message,'(a,i3)')' GW calculation type: ', sp%gwcalctyp
 call wrtout(6,message,'COLL')

 sp%splitsigc=dtset%splitsigc
 sp%ppmodel=dtset%ppmodel
 !in case of H-F, SEX or COHSEX use Hybertsen-Louie ppm (only $\omega = 0 $)
 !fake screening in case of H-F !
 if(mod(sp%gwcalctyp,10)==5.or.mod(sp%gwcalctyp,10)==6.or.mod(sp%gwcalctyp,10)==7) sp%ppmodel=2

 if(mod(sp%gwcalctyp,10)<5) then
  !if ppm or numerical integration (contour deformation)
  sp%nomegasrd=dtset%nomegasrd
  sp%omegasrdmax=dtset%omegasrdmax
  sp%deltae=(2*sp%omegasrdmax)/(sp%nomegasrd-1)
 else
  sp%nomegasrd=1
  sp%omegasrdmax=0.
  sp%deltae=0.
 end if

 write(message,'(a,i4,2a,f10.6,2a,f10.6)')&
& ' number of point to evaluate derivative: ',sp%nomegasrd,ch10,&
& ' omegasrdmax [eV] ',sp%omegasrdmax*Ha_eV,ch10,               &
& ' deltae [eV] ',sp%deltae*Ha_eV
 call wrtout(6,message,'COLL')

 sp%nomegasr=dtset%nfreqsp
 sp%omegasrmax=dtset%freqspmax

 if(sp%nomegasr>0) then
  write(message,'(a)')' calculating spectral functions'
  call wrtout(6,message,'COLL')
  domegas = 2*sp%omegasrmax / (sp%nomegasr-1)
  allocate(sr%omega(sp%nomegasr))
  do io=1, sp%nomegasr
   sr%omega(io) = - sp%omegasrmax + domegas * (io-1)
  end do
 else
  !sp%nomegasr=1 ! to not disturb calculate at least in 0
  !allocate(sr%omega(sp%nomegasr))
  !sr%omega(1)=0
 end if

 write(message,'(a,i4,2a,f10.6)')        &
& ' nomegasr ',sp%nomegasr,ch10,         &
& ' omegasrmax [eV] ',sp%omegasrmax*Ha_eV
 call wrtout(6,message,'COLL')

 sp%nb=dtset%nband(1)
 sp%zcut=dtset%zcut
 write(message,'(a,f10.6)')' zcut for avoiding poles in sigma [eV] ',sp%zcut*Ha_eV
 call wrtout(6,message,'COLL')

 !Compute dimensional primitive translations rprimd
 call mkrdim(acell,rprim,rprimd)

 !Obtain dimensional translations in reciprocal space gprimd, metrics and unit cell volume, from rprimd.
 !Also output rprimd, gprimd and ucvol
 call metric(gmet,gprimd,ab_out,rmet,rprimd,ucvol)

 !Define consistently npw, nsh, and ecut
 call setshells(dtset%ecutwfn,dtset%npwwfn,dtset%nshwfn,dtset%nsym,gmet,&
& gprimd,dtset%symrel,'wfn',ucvol)
 call setshells(dtset%ecutsigx,dtset%npwsigx,dtset%nshsigx,dtset%nsym,gmet,&
& gprimd,dtset%symrel,'mat',ucvol)

 sp%npwwfn=dtset%npwwfn
 sp%npwx=dtset%npwsigx
 npwvec=sp%npwwfn
 if(sp%npwx>sp%npwwfn) npwvec=sp%npwx

 !read parameters of the KSS or QPLDA o STA file and verifify them
 !MG060914 testlda now outputs the value nsppol read in the hdr of the _KSS file
 call testlda(dtfil,nop,nkibzr,nbr,ngr,nshr,i1,i2,ntypat,natom,mpsang,nsppol,mpi_enreg,nonlocal)
 if(i2/=0.and.i2/=1.and.i2/=2.and.i2/=502.and.i2/=602) then
  write(message,'(3a)')&
&  ' sigma: ERROR -',ch10,&
&  ' unknown format found in the screening file'
  call wrtout(6,message,'COLL')
  call leave_new('COLL')
 end if

 sp%nk=nkibzr
 sp%nsppol=nsppol

 ninv=2
 inquire(file='nop.in',exist=ltemp)
 if(ltemp) then
  open(99,file='nop.in')
  read(99,*) nop, ninv
  close(99)
 end if
 sp%nop=nop

 if(npwvec>ngr) then
  write(message,'(3a)')&
&  ' sigma: WARNING - ',ch10,&
&  ' number of g-vectors found less then required'
  call wrtout(6,message,'COLL')

  npwvec=ngr
  if(sp%npwwfn>ngr) sp%npwwfn=ngr
  if(sp%npwx>ngr) sp%npwx=ngr
  write(message,'(3(a,i8,a))')&
&  '         calculation will proceed with npwvec = ',npwvec,ch10,  &
   '         calculation will proceed with npwsigx= ',sp%npwx,ch10,&
   '                                       npwwfn = ',sp%npwwfn,ch10
  call wrtout(6,message,'COLL')
 end if

 if(sp%nb>nbr) then
  write(message,'(5a,i4,a)')&
&  ' sigma: WARNING - ',ch10,                            &
&  ' number of bands found less then required     ',ch10,&
&  '     calculation will proceed with nbnds= ',nbr,ch10
  call wrtout(6,message,'COLL')
  sp%nb=nbr
 end if

 !allocate LDA electronic structure variables
 allocate(op(3,3,nop),stat=istat)
 allocate(kibz(3,sp%nk),stat=istat)
 allocate(gvec(3,npwvec),stat=istat)
 if(istat/=0) stop 'out of memory in gvec'
 allocate(en(sp%nk,min_band_proc:max_band_proc,sp%nsppol),stat=istat)
 if(istat/=0) stop 'out of memory in en'
 allocate(occ(sp%nk,min_band_proc:max_band_proc,sp%nsppol),stat=istat)
 if(istat/=0) stop 'out of memory in occ'

 allocate(wfg(sp%npwwfn,min_band_proc:max_band_proc,sp%nk,sp%nsppol),stat=istat)
 if(istat/=0) then
  call memerr('sigma','wfg',dtset%npwwfn*nbnds_per_proc*sp%nk*sp%nsppol,'spc')
 end if
 wfg(:,:,:,:)=(0.,0.)
 en(:,:,:)=zero
 occ(:,:,:)=zero

 !read in LDA band structure for sigma operator
 !MG sp%nsppol==2 does not work if we are using the STA or QPLDA file format
 if(i2==0.or.i2==1) then

  write(message,'(2a)')' sigma: will call rdlda ',ch10
  call wrtout(6,message,'COLL')
  if (sp%nsppol==2) then
   write(message,'(4a)')ch10,&
&   ' sigma: ERROR- ',ch10,&
&   ' nsppol=2 and QPLDA file format are not compatible'
   call wrtout(6,message,'COLL')
   call leave_new('COLL')
  end if

  call rdlda(iunit,nop,sp%nb,sp%nk,npwvec,sp%npwwfn,nopr,nbr,sp%nk,ngr,&
&  title,i1,i2,i3,a1,a2,a3,op,gvec,kibz,en,occ,wfg)

 else if(i2==2) then

  write(message,'(2a)')' sigma: will call rdldaabinit ',ch10
  call wrtout(6,message,'COLL')
  if (sp%nsppol==2) then
   write(message,'(4a)')ch10,&
&   ' sigma: ERROR- ',ch10,&
&   ' nsppol=2 and old wf file format are not compatible'
   call wrtout(6,message,'COLL')
   call leave_new('COLL')
  end if

  call rdldaabinit(iunit,nop,sp%nb,sp%nk,npwvec,sp%npwwfn,&
&  title,a1,a2,a3,op,gvec,kibz,en,occ,wfg)

 else if(i2==502.or.i2==602) then
  write(message,'(2a)')' sigma: will call rdkss ',ch10
  call wrtout(6,message,'COLL')
  allocate(typat(natom),xred(3,natom))
  allocate(vkbsign(mpsang,ntypat))
  allocate(vkb(sp%npwwfn,ntypat,mpsang,sp%nk),stat=istat)
  if(istat/=0) stop 'out of memory in vkb'
  allocate(vkbd(sp%npwwfn,ntypat,mpsang,sp%nk),stat=istat)
  if(istat/=0) stop 'out of memory in vkbd'
  !MG 060914 added sp%nsppol as input variable and modified the shape of the output arrays
  call rdkss(dtfil,hdr,nop,sp%nb,sp%nk,npwvec,sp%nsppol,sp%npwwfn,&
&   title,a1,a2,a3,op,gvec,kibz,en,occ,wfg,&
&   ntypat,natom,mpsang,typat,xred,vkbsign,vkb,vkbd,nel,mpi_enreg,nonlocal,min_band_proc,max_band_proc)
  deallocate(typat,xred,vkbsign,vkb,vkbd)
 end if

 call pclock(10)

!Read screening (epsilon^-1) file
 call testscr(dtfil,ep%nq,ep%nomega,ep%npwe,ep%npwwfn,ep%nb,titem1,fform,mpi_enreg,nonlocal)

 sp%nq=ep%nq
 sp%npwc=ep%npwe
 er%nq=ep%nq
 er%nomega=ep%nomega
 if(sp%npwc>sp%npwx) sp%npwc=sp%npwx
 er%npwe=sp%npwc
 allocate(qpoint(3,sp%nq))

 if(fform==1002) then
  allocate(er%omega(ep%nomega))
  allocate(er%epsm1(sp%npwc,sp%npwc,ep%nomega,sp%nq),stat=istat)
  if(istat/=0) then
   call memerr('sigma','er%epsm1',sp%npwc*sp%npwc*ep%nomega*sp%nq,'spc')
  end if

  call rdscr(dtfil,sp%npwc,sp%nq,ep%nomega,qpoint,er%omega,er%epsm1,mpi_enreg,nonlocal)

  if(ep%nomega==2) then
   ep%nomegaer=1
   ep%nomegaei=1
  else
   ep%nomegaer=1
   do io=1,ep%nomega
    !MG this coding works only if the first frequencies are real,
    !   do not modify the corresponding part in screening.F90
    if(real(er%omega(io))>0.001*Ha_eV) ep%nomegaer=io
   end do
   ep%nomegaei=ep%nomega-ep%nomegaer
  end if

 else if(fform==1003) then
  write(message,'(3a)')&
&  ' sigma: ERROR- ',ch10,&
&  ' em1 file format not any more compatible'
  call wrtout(6,message,'COLL')
  call leave_new('COLL')
 else
  write(message,'(3a)')&
&  ' sigma : ERROR -',ch10,&
&  ' unknown epsilon^-1 file format'
  call wrtout(6,message,'COLL')
  call leave_new('COLL')
 end if !of if fform

 write(message,'(2(a,i4))')' nomegaer= ',ep%nomegaer,' nomegaei= ',ep%nomegaei
 call wrtout(6,message,'COLL')

 call pclock(70)

!Calculate b1, b2, b3 and ucvol, bzvol
!MG unlike the main abinit code, here the reciprocal vectors
!   are defined such as a_i b_j = 2pi delta_ij
 call lattice(a1,a2,a3,b1,b2,b3,ucvol,bzvol)

!Set real space mesh
 call setmesh(gmet,gvec,ngfft1,ngfft2,ngfft3,ngfft1a,ngfft2a,ngfft3a,npwvec,sp%npwwfn,nr,1)

 !NOTE : The following call to setmesh.F90 must be used if method=2 or method=3 in setmesh.F90
 !write(std_out,*) ' npwx = ',sp%npwx,' npwc = ',sp%npwc
 !call setmesh(gmet,gvec,ngfft1,ngfft2,ngfft3,ngfft1a,ngfft2a,ngfft3a,max(sp%npwx,sp%npwc),sp%npwwfn,nr)

 nrb=ngfft1*ngfft2*ngfft3

 !set up tables for FFT igfft
 allocate(igfft(npwvec,5,5,5),stat=istat)
 if(istat/=0) stop 'out of memory in igfft'
 call cigfft(npwvec,ngfft1a,ngfft1,ngfft2,ngfft3,gvec,igfft)
 call pclock(20)

 call timab(402,2,tsec)
 call timab(403,1,tsec)

!Calculate LDA wavefunctions in real space using FFT
 allocate(wfr(nr,min_band_proc:max_band_proc,sp%nk,sp%nsppol),stat=istat)
 if(istat/=0) then
  call memerr('sigma','wfr',nr*nbnds_per_proc*sp%nk*sp%nsppol,'spc')
 end if
 wfr(:,:,:,:)=(0.,0.)

 tim_fourdp=5
 !MG fftwfn has been modified in order to deal with the nsppol=2 case
 call fftwfn(sp%npwwfn,sp%nb,sp%nk,nr,sp%nsppol,wfg,wfr,igfft(:,3,3,3),ngfft1,ngfft1a,&
& ngfft2,ngfft3,tim_fourdp,mpi_enreg,min_band_proc,max_band_proc,parallelism_is_on_bands)

 call pclock(30)
 call timab(403,2,tsec)
 call timab(404,1,tsec)

 !set up table indicating rotations of r-points
 allocate(irottb(nr,nop),grottb(npwvec,2,nop),grottbm1(npwvec,2,nop),stat=istat)
 if(istat/=0) stop ' out of memory in rotations tables'
  irottb(:,:)=0
  grottb(:,:,:)=0
  grottbm1(:,:,:)=0

 call surot(op,nop,ninv,ngfft1,ngfft1a,ngfft2,ngfft3,nr,npwvec,gvec,grottb,irottb,grottbm1)
 call pclock(40)

 !set up required k-points in whole BZ; nkbzx maximum number of them
 nkbzx=sp%nk*nop*ninv
 allocate(kbz(3,nkbzx),wtk(sp%nk),stat=istat)
 if(istat/=0) stop ' out of memory in kbz'
 allocate(ktab(nkbzx),ktabi(nkbzx),ktabo(nkbzx),ktabr(nr,nkbzx),stat=istat)
 if(istat/=0) stop ' out of memory in k-points tables'

 call identk(kibz,sp%nk,nkbzx,nr,nop,ninv,irottb,op,kbz,ktab,ktabr,ktabi,ktabo,sp%nkbz,wtk)
 call pclock(50)

 !MG060926 using an array to store the valence band index take into account the case
 !         in which the number of occupied bands in a semi-conductor is not the same for both the spins
 allocate (nbv(sp%nsppol))
 !MG FIXME Check this part
 call fermi(hdr,sp%nb,sp%nk,dtset%fixmom,sp%nsppol,wtk,en,occ,nel,nbv,efermi,mpi_enreg,min_band_proc,&
&  max_band_proc,parallelism_is_on_bands)

 if(dtset%nkptgw==0) then
  sp%nkcalc=sp%nk
  allocate(sp%xkcalc(3,sp%nkcalc),sp%kcalc(sp%nkcalc))
  allocate(sp%minbnd(sp%nkcalc),sp%maxbnd(sp%nkcalc))
  sp%xkcalc(:,:)=kibz(:,:)
  sp%minbnd(:)=1
  !maximum value over spins
  sp%maxbnd(:)=maxval(nbv)*2
  if(maxval(nbv)*2>sp%nb) sp%maxbnd(:)=sp%nb
 else
  sp%nkcalc=dtset%nkptgw
  allocate(sp%xkcalc(3,sp%nkcalc),sp%kcalc(sp%nkcalc))
  allocate(sp%minbnd(sp%nkcalc),sp%maxbnd(sp%nkcalc))
  sp%xkcalc(:,:)=dtset%kptgw(:,:)
  sp%minbnd(:)=dtset%bdgw(1,:)
  sp%maxbnd(:)=dtset%bdgw(2,:)
  if(any(sp%maxbnd(:)>sp%nb)) then
   write(message,'(5a)')&
&   ' sigma : ERROR - ',ch10,&
&   ' at least one band where the GW corrections are required exceeds the number of treated bands',ch10, &
&   ' increase the number of bands in the input file (or in the KSS file) '
   call wrtout(6,message,'COLL')
   call leave_new('COLL')
  end if
 end if

 !MG if option==1 two kpoints are considered equivalent if they differ by a
 !reciprocal G vector, actually option is 0, but should be changed FIXME
 !verbose (last argument set to 0)
 call findk(sp%nkcalc,sp%nkbz,sp%xkcalc,kbz,sp%kcalc,0,1)
 call pclock(60)

! calculate the density
 allocate(rho(nr,sp%nsppol),stat=istat)
 if(istat/=0) stop 'out of memory'

!MG060926 in case of nsppol==2 crho reports the total charge in the first half
!         and the spin up charge density in the second half
!         if the  wavefunctions are not in memory we can use density.F90
!         presently the wfr are supposed to be in memory
 call crho(irottb,sp%nb,ninv,sp%nkbz,sp%nk,sp%nk,nop,nr,nrb,sp%nsppol,&
& occ,omegaplasma,rho,ucvol,wfr,wtk,mpi_enreg,&
& min_band_proc,max_band_proc,parallelism_is_on_bands)

 !vxc(b1,b2,k,s)=<b1,k,s| v_{xc} | b2,k,s>
 allocate(vxc(sp%nb,sp%nb,sp%nk,sp%nsppol),stat=istat)
 if (istat/=0) then
  call memerr('sigma','vxc',sp%nb*sp%nb*sp%nk*sp%nsppol,'spc')
 end if

!MG note that matrix elements of vxc are calculated without including
!   the non linear core correction
 vxc(:,:,:,:)=czero

!Now cmexcclda works if nsppol==2
 call cmevxclda(dtset,dtset%ixc,mpi_enreg,sp%nb,ngfft1,ngfft2,ngfft3,sp%nk,&
& nr,nrb,sp%nsppol,rho,rprimd,wfr,&
& vxc,min_band_proc,max_band_proc,parallelism_is_on_bands)

 if(dtset%gwcalctyp>=20) then

! calculate LDA Hartree potential
  allocate(vhartr(sp%nb,sp%nb,sp%nk,sp%nsppol),stat=istat)
  if (istat/=0) then
   call memerr('sigma','vhartr',sp%nb*sp%nb*sp%nk*sp%nsppol,'spc')
  end if

  write(message,'(a)')' *************** LDA Energies *******************'
  call wrtout(6,message,'COLL')
! using only first half of rho(nr,sp%nsppol)
  call hartrham(dtset,sp,gmet,ucvol,ngfft1,ngfft2,ngfft3,nr,&
&  npwvec,gvec,kibz,wtk,sp%nsppol,wfr,wfg,rho(:,1),occ,en,vhartr,mpi_enreg,&
&  min_band_proc,max_band_proc,parallelism_is_on_bands)

  end if

 !set up q-points in whole BZ
 nqbzx=sp%nq*nop*ninv

 allocate(qbz(3,nqbzx),qtab(nqbzx),qtabo(nqbzx),qtabi(nqbzx),stat=istat)
 if(istat/=0) stop 'out of memory in q-points tables'
 qtab(:)=0
 qtabo(:)=0
 qtabi(:)=0
 qbz(:,:)=0
 call identq(qpoint,sp%nq,nqbzx,op,nop,ninv,qbz,qtab,qtabi,qtabo,sp%nqbz)
 call pclock(80)

!MG added to implement the RIM technique
! allocate(spqpt(3,sp%nqbz),wtq(sp%nqbz),tmpshifts(3,8))
! ! spqpt=qbz
! dsifkpt(:) = 1
! wtq(:) = one
! vacuum(:) = 0
! tmpshifts(:,:) = zero
! iout=6
! nqshft=dtset%nshiftk
! tmpshifts(:,1:nqshft) = dtset%shiftk(:,1:nqshft)
! qptrlatt=dtset%kptrlatt
! write(*,*) 'calling getkgrid'
! call getkgrid(dsifkpt,iout,3,spqpt,3,qptrlatt,qptrlen, &
!  &   hdr%nsym,sp%nqbz,nqpt_computed,nqshft,hdr%nsym,rprimd,tmpshifts,hdr%symafm, &
!  &   dtset%symrel,dtset%tnons,vacuum,wtq)
! write(*,*)'after'
! if (nqshft /= 1) then
!  write (*,*) 'Error: multiple qpt shifts not treated yet',&
!&        ' -- should be possible ', nqshft
!  stop
! end if
! write(*,*)qptrlatt
! nq1=qptrlatt(1,1)
! nq2=qptrlatt(2,2)
! nq3=qptrlatt(3,3)
! write(*,*)sp%nqbz,nqpt_computed
! deallocate (spqpt,wtq,tmpshifts)
! allocate (mcint(sp%nq,sp%npwx,sp%npwx))
! write(*,*)'calling rim'
! call rim0(b1,b2,b3,sp%nqbz,nq1,nq2,nq3,ucvol,outrim0)
 !call rim1(npwvec,sp%npwx,gvec,q,sp%nq,b1,b2,b3,nq1,nq2,nq3,ucvol,mcint)
! write(*,*)'ok'
! stop
!!END MG


 !calculate |q+G| for all q(IBZ) and G
 allocate(qpg(sp%npwx,sp%nq),stat=istat)
 if(istat/=0) stop 'out of memory in qpg'
 !MG NOTE1 qratio is only used for ppmodel==2
 !   NOTE2 qratio is evaluated in clcqpg even though ppmodel\=2
 !   Maybe here we can save a bit of memory
 allocate(qratio(sp%npwc,sp%npwc,sp%nq),stat=istat)
 if(istat/=0) then
  call memerr('sigma','qratio',sp%npwc*sp%npwc*sp%nq,'dp')
 end if
 call clcqpg(sp%npwx,sp%npwc,gvec,qpoint,sp%nq,b1,b2,b3,qpg,qratio)
 call pclock(90)

 if(sp%ppmodel==1.or.sp%ppmodel==2)then
  ppmsize=sp%npwc**2*sp%nq+sp%npwc**2*sp%nq
  allocate(er%bigomegatwsq(sp%npwc,sp%npwc,sp%nq),er%omegatw(sp%npwc,sp%npwc,sp%nq),stat=istat)
 else if(sp%ppmodel==3)then
  ppmsize=sp%npwc*sp%nq+sp%npwc*sp%nq
  allocate(er%bigomegatwsq(sp%npwc,1,sp%nq),er%omegatw(sp%npwc,1,sp%nq),stat=istat)
 else if(sp%ppmodel==4)then
  ppmsize=sp%npwc**2*sp%nq+sp%npwc*sp%nq
  allocate(er%bigomegatwsq(sp%npwc,sp%npwc,sp%nq),er%omegatw(sp%npwc,1,sp%nq),stat=istat)
 end if
 !MG NOTE the dimension of the array depends on the ppmodel used
 !        all these complex arrays are single-precision
 if(istat/=0) then
  call memerr('sigma','bigomegatwsq/omegatw',ppmsize,'spc')
 end if

 !MG eigenpot is used only if ppmodel==3, anyway it is passed to csigme
 !   Also here we can save a bit of memory
 allocate(eigpot(sp%npwc,sp%npwc,sp%nq),stat=istat)
 if(istat/=0) then
  call memerr('sigma','eigpot',sp%npwc**2*sp%nq,'spc')
 end if

 !calculate plasmonpole model parameters
 !only the total charge is needed ==> using first half of rho(nr,sp%nsppol)
 if(sp%ppmodel==1)then

  call cppm1par(sp%npwc,sp%nq,ep%nomega,er%epsm1,er%omega,er%bigomegatwsq, &
&  er%omegatw,omegaplasma)

 else if(sp%ppmodel==2)then

  call cppm2par(sp%npwc,sp%nq,ep%nomega,er%epsm1,er%bigomegatwsq, &
&  er%omegatw,ngfft1,ngfft2,ngfft3,gvec,qratio,rho(:,1),nr,qpoint,b1,b2,b3)

 else if(dtset%ppmodel==3)then

  call cppm3par(sp%npwc,sp%nq,ep%nomega,er%epsm1,er%bigomegatwsq, &
&  er%omegatw,ngfft1,ngfft2,ngfft3,gvec,rho(:,1),nr,eigpot,qpoint,b1,b2,b3)

 else if(dtset%ppmodel==4)then

  call cppm4par(sp%npwc,sp%nq,er%epsm1,ep%nomega,er%bigomegatwsq, &
&  er%omegatw,ngfft1,ngfft2,ngfft3,gvec,rho(:,1),nr,qpoint,b1,b2,b3)

 else
  write(message,'(a,a,a,a,a)')ch10,&
&  ' sigma: BUG -',ch10,&
&  ' The ppmodel can only be 1, 2, 3 or 4 ',ch10
  call wrtout(6,message,'COLL')
  call leave_new('COLL')
 end if

 if(dtset%ppmodel==1.or.dtset%ppmodel==2)then
  write(6,*)'omega twiddle minval ',minval(abs(er%omegatw(:,:,:)))*Ha_eV
 else
  allocate(temp_omegatw(sp%npwc,sp%nq))
  do ig=1,sp%npwc
   temp_omegatw(ig,:)=er%omegatw(ig,1,:)
  end do
  write(6,*)'omega twiddle minval ',minval(abs(temp_omegatw(:,:)))*Ha_eV
  deallocate(temp_omegatw)
 end if

 sr%nk=sp%nk
 sr%nb=sp%nb
 sr%nomega=sp%nomegasr

 deallocate(qratio)

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
! FBruneval 05/12/15
! read QP wavefunctions of the previous step
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

! Read self-consistent transformation matrices and QP energies
 allocate(m_lda_to_qp(min_band_proc:max_band_proc,min_band_proc:max_band_proc,sp%nk,sp%nsppol),stat=istat)
 if(istat/=0) then
  call memerr('sigma','m_lda_to_qp',nbnds_per_proc*nbnds_per_proc*sr%nk*sp%nsppol,'spc')
 end if
 m_lda_to_qp(:,:,:,:)=czero
 allocate(en_qp(sp%nk,min_band_proc:max_band_proc,sp%nsppol),stat=istat)
 if(istat/=0) then
  call memerr('sigma','en_qp',sp%nk*nbnds_per_proc*sp%nsppol,'dp')
 end if

 allocate(rho_p(nr,nsppol))
 rho_p(:,:)=rho(:,:)

 call rdqps(dtfil,sp%gwcalctyp,sr%nk,nbnds_per_proc,sp%nsppol,kibz,nscf,nr,en,en_qp,m_lda_to_qp,rho_p,&
& min_band_proc,max_band_proc,mpi_enreg)
!compute the QP wfr, write them into wfr

 call calc_wf_qp(sp%nk,nbnds_per_proc,nr,          sp%nsppol,m_lda_to_qp,wfr,min_band_proc,max_band_proc)
 call calc_wf_qp(sp%nk,nbnds_per_proc,dtset%npwwfn,sp%nsppol,m_lda_to_qp,wfg,min_band_proc,max_band_proc)
!compute QP occupation numbers
 allocate(occ_qp(sp%nk,min_band_proc:max_band_proc,sp%nsppol))
 occ_qp(:,:,:)=occ(:,:,:)

 call fermi(hdr,sp%nb,sp%nk,dtset%fixmom,sp%nsppol,wtk,en_qp,occ_qp,nel,nbv,efermi_qp,&
& mpi_enreg,min_band_proc,max_band_proc,parallelism_is_on_bands)

!compute QP density
 allocate(rho_qp(nr,sp%nsppol))
!MG060926 in case of nsppol==2 crho reports the total and the spin up charge density
 call crho(irottb,sp%nb,ninv,sp%nkbz,sp%nk,sp%nk,nop,nr,nrb,&
& sp%nsppol,occ_qp,omegaplasma,rho_qp,ucvol,wfr,wtk,&
& mpi_enreg,min_band_proc,max_band_proc,parallelism_is_on_bands)

 deallocate(irottb)

!FB061217: simple mixing of the densities in order to damp oscillations in the Hartree potential
 rho_qp(:,:)=rho_p(:,:)+dtset%rhoqpmix*(rho_qp(:,:)-rho_p(:,:))
 deallocate(rho_p)

!MG NOTE
! Since 3 plasmon pole models depend on the Fourier components of the density
! I think that in case of self-consistency we should calculate here the ppm
! coefficients using rho_qp
! We can introduce a logical flag to avoid the calculation of the ppm parameters
! with the KS density and calculate here the coefficients
!END MG NOTE

 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 ! FBruneval 05/12/15
 ! setup hartree hamiltonian
 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

 !2)  setup lda hamiltonian
 allocate(hlda(sr%nb,sr%nb,sr%nk,sp%nsppol),stat=istat)
 if (istat/=0) then
  call memerr('sigma','hlda',sr%nb*sr%nb*sr%nk*sp%nsppol,'spc')
 end if
 hlda(:,:,:,:)=czero

 !MG060923 added external loop on spin  hlda(b1,b2,k,s)=<b1,k,s| H_s | b2,k,s>
 !H_s depends on the spin index
 do is=1,sp%nsppol
  do ik=1,sp%nk
   do ib=1,sp%nb
    if(parallelism_is_on_bands)then
     if(minval(abs(mpi_enreg%proc_distrb(ib,:,:)-mpi_enreg%me))/=0) cycle
    end if
    hlda(ib,ib,ik,is)=en(ik,ib,is)
   end do ! ib
  end do ! ik
! XG070104 : should take this xsum_mpi out of the loop, but a specific xsum_mpi_xxxx has to be created
  if(parallelism_is_on_bands)call xsum_mpi(hlda(:,:,:,is),spaceComm,ierr)
 end do ! is

 allocate(sr%hhartree(sr%nb,sr%nb,sr%nk,sp%nsppol)) ! to check whether this can be para


 if(dtset%gwcalctyp<20) then

  !obtain the LDA Hartree Hamiltonian
  sr%hhartree(:,:,:,:)=hlda(:,:,:,:)-vxc(:,:,:,:)

 else

! 4) calculate the bare Hamiltonian
  allocate(hbare(sr%nb,sr%nb,sr%nk,sp%nsppol),stat=istat)
  if (istat/=0) then
   call memerr('sigma','hbare',sr%nb*sr%nb*sr%nk*sp%nsppol,'spc')
  end if

! Computes Hbare = -p^2/2 + vpsp_loc + vpsp_nonloc on lda wfs basis
  hbare(:,:,:,:)=hlda(:,:,:,:)-vhartr(:,:,:,:)-vxc(:,:,:,:)


! 5)  change the basis from LDA to QP
! MG060923 added nsppol in the list of input variables
  call ham_changebasis(sr%nb,sr%nk,sp%nsppol,m_lda_to_qp,hbare)

! 6)  calculate the QP Hartree potential
  write(message,'(a)')' *************** QP Energies *******************'
  call wrtout(6,message,'COLL')
! MG060923 added nsppol in the list of input variables
! only the total charge density is necessary ==> rho_qp(:,1)
  call hartrham(dtset,sp,gmet,ucvol,ngfft1,ngfft2,ngfft3,nr,&
&  npwvec,gvec,kibz,wtk,sp%nsppol,wfr,wfg,rho_qp(:,1),occ_qp,en_qp,vhartr,mpi_enreg,&
&  min_band_proc,max_band_proc,parallelism_is_on_bands)

! 7) obtain the QP Hartree Hamiltonian
! MG060923 modified shape of sr%hhartree to take into account the spin
  sr%hhartree(:,:,:,:)=hbare(:,:,:,:)+vhartr(:,:,:,:)

 end if ! self-consistent

 deallocate(wfg)

!8) prepare the storage of the new wavefunctions and new energies
!MG060923 modified shape of sr%hhartree to take into account the spin
 allocate(sr%eigvec_qp(min_band_proc:max_band_proc,min_band_proc:max_band_proc,sr%nk,sp%nsppol))
 allocate(sr%en_qp_diago(sr%nb,sr%nk,sp%nsppol))
 sr%eigvec_qp(:,:,:,:)=czero
 sr%en_qp_diago(:,:,:)=zero
!Initialize with LDA wavefunctions and en energies
 sr%eigvec_qp(:,:,:,:)=czero
 do ib=min_band_proc,max_band_proc
  sr%eigvec_qp(ib,ib,:,:)=cone
  sr%en_qp_diago(ib,:,:)=en(:,ib,:)
 end do

 if(allocated(vhartr))deallocate(vhartr)
 if(allocated(hbare))deallocate(hbare)
 if(allocated(hlda))deallocate(hlda)

!store < nks | Vxc | nks >
!NOTE here the matrix elements of vxc in the KS basis set are stored
!     not the elements in the QP basis set!, why?
 allocate(sr%vxcme(sr%nb,sr%nk,sp%nsppol))
 do ib=1,sr%nb
  sr%vxcme(ib,:,:)=vxc(ib,ib,:,:)
 end do

 deallocate(vxc)
 call pclock(100)

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

 allocate(sr%sigcme(sr%nb,sr%nk,sr%nomega,sp%nsppol),stat=istat)
 if(istat/=0) stop 'out of memory sigcme'
 allocate(sr%sigxme(sr%nb,sr%nk,sp%nsppol),stat=istat)
 if(istat/=0) stop 'out of memory sigxme'
 allocate(sr%sigxcme(sr%nb,sr%nk,sr%nomega,sp%nsppol),stat=istat)
 if(istat/=0) stop 'out of memory sigxcme'
 allocate(sr%sigcmee0(sr%nb,sr%nk,sp%nsppol),sr%ze0(sr%nb,sr%nk,sp%nsppol),stat=istat)
 if(istat/=0) stop 'out of memory sigcmee0, ze0'
 allocate(sr%dsigmee0(sr%nb,sr%nk,sp%nsppol),sr%sigmee(sr%nb,sr%nk,sp%nsppol),stat=istat)
 if(istat/=0) stop 'out of memory dsigmee0, sigmee'
 allocate(sr%e0(sr%nb,sr%nk,sp%nsppol),sr%degw(sr%nb,sr%nk,sp%nsppol),sr%egw(sr%nb,sr%nk,sp%nsppol),stat=istat)
 if(istat/=0) stop 'out of memory e0,egw,degw'

 allocate(sr%e0gap(sr%nk,sp%nsppol),sr%degwgap(sr%nk,sp%nsppol),sr%egwgap(sr%nk,sp%nsppol))

 sr%nomegasrd=sp%nomegasrd

 !MG since we need to evaluate $\Sigma(E)$ around the KS\QP eigenvalue
 !   whose value depends on the spin, a spin index has been added
 allocate(sr%omegasrd(sr%nb,sr%nk,sr%nomegasrd,sp%nsppol))

 allocate(sr%sigcmesrd(sr%nb,sr%nk,sr%nomegasrd,sp%nsppol))
 allocate(sr%sigxcmesrd(sr%nb,sr%nk,sr%nomegasrd,sp%nsppol))

   sr%e0(:,:,:)=zero
   sr%egw(:,:,:)=(zero,zero)
   sr%e0gap(:,:)=zero
   sr%sigcme(:,:,:,:)=(zero,zero)
   sr%sigxme(:,:,:)=(zero,zero)
   sr%sigxcme(:,:,:,:)=(zero,zero)
   sr%sigcmee0(:,:,:)=(zero,zero)
   sr%ze0(:,:,:)=(zero,zero)
   sr%dsigmee0(:,:,:)=(zero,zero)
   sr%sigmee(:,:,:)=(zero,zero)
   sr%omegasrd(:,:,:,:)=(zero,zero)
   sr%sigcmesrd(:,:,:,:)=(zero,zero)
   sr%sigxcmesrd(:,:,:,:)=(zero,zero)
   sr%degw(:,:,:)=(zero,zero)


 if(parallelism_is_on_kpoints)then
  allocate(nband_t(sp%nkbz*1))
  nband_t(:)=sp%nb
  mpi_enreg%parareel=0
  mpi_enreg%paralbd=0
  if (mpi_enreg%parareel==0)then
   mpi_enreg%paral_level=2
   allocate(mpi_enreg%proc_distrb(sp%nkbz,sp%nb,1))
   call distrb2(sp%nb,nband_t,sp%nkbz,1,mpi_enreg)
  end if
  deallocate(nband_t)
 end if ! parallelism_is_on_kpoints

 do is=1,sp%nsppol
  do ik=1,sr%nk
   do ib=1,sr%nb
    if(parallelism_is_on_bands)then
     if(minval(abs(mpi_enreg%proc_distrb(ib,:,:)-mpi_enreg%me))/=0) cycle
    end if
    sr%e0(ib,ik,is)=en_qp(ik,ib,is)
    sr%egw(ib,ik,is)=en_qp(ik,ib,is) ! first guess for gw energy
   end do
   if(parallelism_is_on_bands)then
    call xsum_mpi(sr%egw(:,ik,is),spaceComm,ierr)
   end if
!  MG060926 calculating gap for each spin, so it is not the *minimum* gap
   sr%e0gap(ik,is)=sr%e0(nbv(is)+1,ik,is)-sr%e0(nbv(is),ik,is)
  end do
 end do
 if(parallelism_is_on_bands)then
  call xsum_mpi(sr%e0,spaceComm,ierr)
 end if
 ep%soenergy=dtset%soenergy

 if(ep%soenergy>0.1d-4) then
  write(message,'(5a,f10.5,a)')&
&  ' performing a first self-consistency',ch10,&
&  ' update of the energies in G by a scissor operator',ch10, &
&  ' applying a scissor operator of [eV] ',ep%soenergy*Ha_eV,ch10
  call wrtout(06,message,'COLL') ; call wrtout(ab_out,message,'COLL')
  do is=1,sp%nsppol
   if(sr%nb>=nbv(is)+1)then
    sr%egw(nbv(is)+1:sr%nb,:,is)=sr%egw(nbv(is)+1:sr%nb,:,is)+ep%soenergy
   end if
  end do
 else if(update_energies) then
  allocate(sr_gwenergy(sr%nb,sr%nk,sp%nsppol),stat=istat)
  if(istat/=0) stop 'out of memory sr_gwenergy'
  write(message,'(3a)')&
&  ' performing a first self-consistency',ch10,&
   ' update of the energies in G by a previous GW calculation'
  call wrtout(06,message,'COLL')
  call wrtout(ab_out,message,'COLL')
  !MG added the spin variable
  call rdgw(nkibzr,sr%nb,nbv,sp%nsppol,kibz,sr_gwenergy)
  do is=1,sp%nsppol
   do ik=1,sr%nk
    do ib=1,sr%nb
    sr%egw(ib,ik,is) = sr_gwenergy(ik,ib,is)
    end do
   end do
  end do
  deallocate(sr_gwenergy)
 end if

 call write_sigma_results_header(sp,ep)

 filnam=trim(dtfil%filnam_ds(4))//'_GW'
 open(21,file=filnam,status='unknown',form='formatted')
 write(21,*) sp%nkcalc

 filnam=trim(dtfil%filnam_ds(4))//'_SIG'
 open(22,file=filnam,status='unknown',form='formatted')

 filnam=trim(dtfil%filnam_ds(4))//'_SGR'
 open(23,file=filnam,status='unknown',form='formatted')

 call timab(404,2,tsec)
 call timab(405,1,tsec)

!change this if you do not want to use symmetries in the calculation of \Sigma
 lt_k%sym_flag=dtset%symsigma
 if (lt_k%sym_flag/=0) then
  write(*,*)' switching on symmetrization'
  lt_k%nop=nop
  lt_k%ninv=ninv
  lt_k%nkbz=sp%nqbz
  allocate(lt_k%ibzq(sp%nqbz),lt_k%ltq(2,nop),lt_k%wtksym(2,nop,sp%nqbz))
  allocate(lt_k%tab(sp%nqbz),lt_k%tabi(sp%nqbz),lt_k%tabo(sp%nqbz))
  lt_k%tab=0
  lt_k%tabi=0
  lt_k%tabo=0
 end if

 call pclock(110)

 !calculate self-energy and save on disc for each k-point
 !MG here we have what I call the stupid input file BUG (or stupid man BUG):
 !if one calculates the GW corrections in the same k-point twice the results
 !for the correlation part of sigma are different, since in the second
 !calculation the starting point is updated and is different from the LDA value
 do ikcalc=1,sp%nkcalc

  write(message,'(a,i5)')' sigma : loop over k point, treating k point number',ikcalc
  call wrtout(6,message,'COLL')

  ikibz=ktab(sp%kcalc(ikcalc))

!MG TESTING SYMMETRIZATION IN CHI
  if (lt_k%sym_flag/=0) then
   call setup_little_group(sp%xkcalc(:,ikcalc),gmet,nop,op,ninv,sp%nqbz,qbz,lt_k%ibzq,lt_k%ltq,&
&         lt_k%tab,lt_k%tabo,lt_k%tabi,lt_k%wtksym)
   end if
!DEBUG
!   DEBUG=.false.
!   if (DEBUG==.true. .and. lt_k%sym_flag==.true.) then
!    call test_sym(sp,ep,sp%kcalc(ikcalc),sp%minbnd(ikcalc),sp%maxbnd(ikcalc),&
!&    npwvec,gvec,kibz,ktab,ktabr,ktabi,kbz,ngfft1,ngfft1a,ngfft2,&
!&    ngfft3,igfft,nr,nop,op,qbz,grottb,wfr_qp,lt_k)
!     cycle
!   end if
!ENDDEBUG

  call csigme(sp,ep,sp%kcalc(ikcalc),sp%minbnd(ikcalc),sp%maxbnd(ikcalc),&
&   qpoint,gvec,npwvec,op,nop,kibz,ktab,ktabr,ktabi,kbz,ngfft1,ngfft1a,ngfft2,&
&   ngfft3,igfft,nr,en_qp,occ_qp,qbz,qtab,qtabi,qtabo,qpg,ucvol,grottb,  &
&   wfr,efermi_qp,er,sr,mpi_enreg,dtset%ppmodel,eigpot,b1,b2,b3,      &
&   nbnds_per_proc,min_band_proc,max_band_proc,&
&   parallelism_is_on_kpoints,parallelism_is_on_bands,lt_k)

! Calculating direct gap for each spin
  do is=1,sp%nsppol
   if(sp%maxbnd(ikcalc)>=nbv(is)+1)then
    sr%egwgap(ikibz,is)=sr%egw(nbv(is)+1,ikibz,is)-sr%egw(nbv(is),ikibz,is)
    sr%degwgap(ikibz,is)=sr%degw(nbv(is)+1,ikibz,is)-sr%degw(nbv(is),ikibz,(is))
   else
!   The "gap" cannot be computed
    sr%e0gap(ikibz,is)=zero
    sr%egwgap(ikibz,is)=zero
    sr%degwgap(ikibz,is)=zero
   end if
  end do

  !print out final results
  call write_sigma_results(sp,sr,ikcalc,ikibz,en)
!     call write_sigma_results(sp,sr,ikcalc,ikibz)

  !write out scf data
  !call write_scf(dtfil,sp%gwcalctyp,sr%nk,sr%nb,ikibz,kibz(:,ikibz),&
  !&   nscf,sr%en_qp_diago(:,ikibz),sr%eigvec_qp(:,:,ikibz))
  call pclock(200+ikcalc)
 end do !ikcalc

 if(sp%gwcalctyp>=10)then 
  call fermi(hdr,sp%nb,sp%nk,dtset%fixmom,sp%nsppol,wtk,sr%en_qp_diago,occ_qp,nel,nbv,efermi_qp,&
&  mpi_enreg,min_band_proc,max_band_proc,parallelism_is_on_bands)
  write(message,'(a,es16.6,a,es16.6,a)') &
&  ' New Fermi energy : ',efermi_qp,' Ha ,',efermi_qp*Ha_eV,' eV'
  call wrtout(6,message,'COLL')
  call wrtout(ab_out,message,'COLL')
 endif

 deallocate(igfft,wfr,grottb,grottbm1,ktabr,qpg,eigpot)

 write(message,'(a,i5)')' sigma : finished the k point loop'
 call wrtout(6,message,'COLL')

!MG FIXME here I should introduce the spin.
!   to retain backwards compatibility I suggest
!   to add the Abinit header in the _QPS file,
!   in such a way the nsppol variable could be read from hdr%nsppol
!   if the header exists, otherwise the file is in the old format

 !write out scf data
 call wrqps(dtfil,sp%gwcalctyp,sr%nk,sr%nb,sp%nsppol,&
&  kibz,nscf,nr,sr%en_qp_diago,sr%eigvec_qp,m_lda_to_qp,rho_qp,min_band_proc,max_band_proc)

 write(message,'(a,i5)')' sigma : SCF data written '
 call wrtout(6,message,'COLL')

!MG interpolation of sigma matrix elements
! FIXME still experimental
!   call rdkss4interp(dtfil,sp,sr,npwvec,sp%npwwfn,kibz,kbz,nr,wfr_qp,ktab,ktabr,ktabi,&
!&  hdr,ngfft1,ngfft1a,ngfft2,ngfft2a,ngfft3,ngfft3a,mpi_enreg)

 call timab(405,2,tsec)

 close(21)
 close(22)

! close(unitem1)

 call pclock(9999)

!DEBUG
!write(6,*)' sigma : debug, stop '
!write(6,*)allocated(sr%sigxme),allocated(sr%sigcme),allocated(sr%sigxcme)
!write(6,*)allocated(sr%vxcme),allocated(sr%e0)
!write(6,*)allocated(sr%omega)
!write(6,*)allocated(sr%sigcmee0),allocated(sr%dsigmee0),allocated(sr%sigmee)
!write(6,*)allocated(sr%degw),allocated(sr%egw)
!write(6,*)allocated(sr%e0gap),allocated(sr%degwgap),allocated(sr%egwgap)
!write(6,*)allocated(sp%kcalc),allocated(sp%xkcalc),allocated(sp%minbnd),allocated(sp%maxbnd)
!stop
!ENDDEBUG

 deallocate(op,kibz,gvec,en,occ,occ_qp,qpoint,wtk,ktab,ktabi,ktabo,kbz)
 deallocate(qtab,qtabo,qtabi)
 deallocate(m_lda_to_qp,en_qp)
!MT 2006-0209 ???
! if(allocated(er%omega))deallocate(er%omega)
! if(allocated(er%epsm1))deallocate(er%epsm1)
 if(associated(er%omega))deallocate(er%omega)
 if(associated(er%epsm1))deallocate(er%epsm1)
 deallocate(sr%sigcme)
!MT 2006-0209 ???
! if(allocated(sr%sigxcme))deallocate(sr%sigxcme)
 if(associated(sr%sigxcme))deallocate(sr%sigxcme)
 deallocate(sr%sigxme)
 deallocate(sr%omegasrd)
 deallocate(sr%vxcme,sr%e0,sr%hhartree,sr%eigvec_qp,sr%en_qp_diago)
 if(sp%nomegasr>0) deallocate(sr%omega)
 deallocate(sr%sigcmee0,sr%dsigmee0,sr%sigcmesrd,sr%sigxcmesrd,sr%sigmee,sr%ze0)
 deallocate(sr%degw,sr%egw)
 deallocate(sr%e0gap,sr%degwgap,sr%egwgap)
 deallocate(sp%kcalc,sp%xkcalc,sp%minbnd,sp%maxbnd)
 deallocate(nbv)
 deallocate(rho,rho_qp)
 if (lt_k%sym_flag/=0) then
  deallocate (lt_k%ibzq,lt_k%ltq,lt_k%wtksym)
  deallocate (lt_k%tab,lt_k%tabi,lt_k%tabo)
 end if
!MT 2006-0209 ???
! if(allocated(er%bigomegatwsq))deallocate(er%bigomegatwsq)
! if(allocated(er%omegatw))deallocate(er%omegatw)
 if(associated(er%bigomegatwsq))deallocate(er%bigomegatwsq)
 if(associated(er%omegatw))deallocate(er%omegatw)
!MG FIXME according to g95 there are arrays that are still allocated
!   should fix this problem to avoid memory leaks in case of
!   calculations with datasets
!MT 2006-0209 ???
! if(allocated(mpi_enreg%proc_distrb))deallocate(mpi_enreg%proc_distrb)
 if(associated(mpi_enreg%proc_distrb))deallocate(mpi_enreg%proc_distrb)

!the extra if statement added by RShaltaf
 if( mpi_enreg%nproc==1 .or. (.not.nonlocal) .or. me==0)then
  if(i2==502.or.i2==602)then
   call hdr_clean(hdr)
  end if
 end if
 call timab(401,2,tsec)
 write(message,'(a)')' sigma ended'
 call wrtout(06,message,'COLL')

 end subroutine sigma
!!***
