////////////////////////////////////////////////////////////////////////////////
//
// Copyright (c) 2011 The Regents of the University of California
//
// This file is part of Qbox
//
// Qbox is distributed under the terms of the GNU General Public License
// as published by the Free Software Foundation, either version 2 of
// the License, or (at your option) any later version.
// See the file COPYING in the root directory of this distribution
// or .
//
////////////////////////////////////////////////////////////////////////////////
//
// Bisection.C
//
////////////////////////////////////////////////////////////////////////////////
#include "Bisection.h"
#include
#include
#include "jade.h"
#include "FourierTransform.h"
using namespace std;
////////////////////////////////////////////////////////////////////////////////
int walsh(int l, int n, int i)
{
// walsh function at level l at position i in a grid of n points
assert(i>=0);
assert(i= n/2 ) return 0;
else return 1;
}
else if ( l == 1 )
{
assert(n%4==0);
if ( i >= n/4 && i < (3*n)/4 ) return 0;
else return 1;
}
else if ( l == 2 )
{
assert(n%8==0);
if ( (i >= n/8 && i < 3*n/8) || (i >= 5*n/8 && i < 7*n/8) ) return 0;
else return 1;
}
else if ( l == 3 )
{
assert(n%16==0);
if ( (i >= n/16 && i < 3*n/16) ||
(i >= 5*n/16 && i < 7*n/16) ||
(i >= 9*n/16 && i < 11*n/16) ||
(i >= 13*n/16 && i < 15*n/16) ) return 0;
else return 1;
}
else if ( l == 4 )
{
assert(n%32==0);
if ( (i >= n/32 && i < 3*n/32) ||
(i >= 5*n/32 && i < 7*n/32) ||
(i >= 9*n/32 && i < 11*n/32) ||
(i >= 13*n/32 && i < 15*n/32) ||
(i >= 17*n/32 && i < 19*n/32) ||
(i >= 21*n/32 && i < 23*n/32) ||
(i >= 25*n/32 && i < 27*n/32) ||
(i >= 29*n/32 && i < 31*n/32) ) return 0;
else return 1;
}
else
assert(false);
}
////////////////////////////////////////////////////////////////////////////////
Bisection::Bisection(const SlaterDet& sd, int nlevels[3]) : ctxt_(sd.context())
{
// localization vectors are long int
assert(sizeof(long int) >= 4);
nst_ = sd.nst();
nstloc_ = sd.nstloc();
// number of bisection levels in each direction
nlevels_[0]=nlevels[0];
nlevels_[1]=nlevels[1];
nlevels_[2]=nlevels[2];
// number of subdivisions required in each direction
// ndiv = 2^nlevel
ndiv_[0] = 1 << nlevels[0];
ndiv_[1] = 1 << nlevels[1];
ndiv_[2] = 1 << nlevels[2];
// largest number of levels
nlevelsmax_=max(nlevels[0],max(nlevels[1],nlevels[2]));
// real-space grid size for wave functions
np_[0] = sd.basis().np(0);
np_[1] = sd.basis().np(1);
np_[2] = sd.basis().np(2);
// adapt the grid dimensions to the levels of bisection
for ( int idim = 0; idim < 3; idim++ )
{
for ( int j=1; j<=nlevels[idim]; j++ )
{
int base = 1 << j;
if ( np_[idim] % base != 0 ) np_[idim] += base/2;
}
}
while (!sd.basis().factorizable(np_[0])) np_[0] += (1<np2_loc();
np012loc_ = ft_->np012loc();
// xy projector index: xy_proj[i+j*np0] is the xy projector
// associated with a point located at i + j*np0 + k*np0*np1
xy_proj_.resize(np01_);
for ( int i = 0; i < np_[0]; i++ )
for ( int j = 0; j < np_[1]; j++ )
{
int i_slice_x= ( i * ndiv_[0] ) / np_[0];
int i_slice_y= ( j * ndiv_[1] ) / np_[1];
int xy_proj_index = i_slice_x + ndiv_[0] * i_slice_y;
xy_proj_[i+np_[0]*j] = xy_proj_index;
}
// nmat_: number of A matrices
nmat_ = nlevels[0] + nlevels[1] + nlevels[2];
// each projector uses two bits of the localization vector
// check that sizeof(long int) is sufficient
// number of bits in a long int is 8*sizeof(long int)
assert(2*nmat_ <= 8*sizeof(long int));
// allocate A matrices
amat_.resize(nmat_);
const ComplexMatrix &c = sd.c();
for ( int i = 0; i < nmat_; i++ )
amat_[i] = new DoubleMatrix(c.context(),c.n(),c.n(),c.nb(),c.nb());
// allocate rotation matrix
u_ = new DoubleMatrix(c.context(),c.n(),c.n(),c.nb(),c.nb());
// matrices of real space wave functions in subdomains
rmat_.resize( ndiv_[0]*ndiv_[1] );
{
// xyproj_rsize: real-space size of xy projectors
vector xyproj_rsize( ndiv_[0]*ndiv_[1] , 0 );
for ( int ixy = 0; ixy < np01_; ixy++ )
xyproj_rsize[xy_proj_[ixy]] += np2loc_;
// max_xyproj_rsize: maximum real-space size of xy projectors
vector max_xyproj_rsize( ndiv_[0]*ndiv_[1] , 0 );
MPI_Allreduce((void*)&xyproj_rsize[0] , (void*)&max_xyproj_rsize[0],
(int)xyproj_rsize.size(), MPI_INT ,MPI_MAX , ctxt_.comm());
// allocate matrices rmat_[i]
for ( int i = 0; i < rmat_.size(); i++ )
{
int n = c.n();
int nb = c.nb();
int m = max_xyproj_rsize[i] * c.context().nprow();
int mb = max_xyproj_rsize[i];
rmat_[i] = new DoubleMatrix(c.context(),m,n,mb,nb);
rmat_[i]->clear();
}
}
localization_.resize(nst_);
}
////////////////////////////////////////////////////////////////////////////////
Bisection::~Bisection(void)
{
delete ft_;
for ( int i = 0; i < nmat_; i++ ) delete amat_[i];
delete u_;
for ( int i = 0; i < rmat_.size(); i++ ) delete rmat_[i];
}
////////////////////////////////////////////////////////////////////////////////
void Bisection::compute_transform(const SlaterDet& sd, int maxsweep, double tol)
{
// compute the transformation with tolerance tol
map tmap;
// check that basis is real
// jade is not implemented for complex matrices
assert( sd.basis().real() );
const ComplexMatrix& c = sd.c();
const vector& occ = sd.occ();
assert(occ.size() == nst_);
#ifdef TIMING
tmap["wf FFT"].start();
#endif
// Fourier transform states and save real-space values in
// rmat_ matrices
vector > wftmp(np012loc_,0.0);
for ( int n = 0; n < nstloc_; n++ )
{
ft_->backward(c.cvalptr(c.mloc()*n),&wftmp[0]);
// pointers to rmat
vector p_rmat( ndiv_[0]*ndiv_[1] );
for (int iproj=0; iprojmloc();
p_rmat[iproj] = rmat_[iproj]->valptr(index);
}
// copy wf to rmat arrays
for ( int iz = 0; iz < np2loc_; iz++ )
{
for ( int ixy = 0; ixy < np01_; ixy++ )
{
int xy_proj = xy_proj_[ixy];
(*p_rmat[xy_proj]) = wftmp[ixy + iz*np01_].real();
p_rmat[xy_proj]++;
}
}
}
#ifdef TIMING
tmap["wf FFT"].stop();
#endif
// compute the x/y A matrices
#ifdef TIMING
tmap["xy products"].start();
#endif
// clear a matrices
for ( int i = 0; i < nmat_; i++ )
amat_[i]->clear();
int size=amat_[0]->size();
// allocate matrix for products of projected parts
DoubleMatrix products(c.context(),c.n(),c.n(),c.nb(),c.nb());
for ( int i_proj=0; i_projvalptr(0);
for ( int i=0; ivalptr(0);
double fac = 1.0 / (np_[0]*np_[1]*np_[2]);
for ( int i = 0; i < size; i++ )
coeff[i] *= fac;
imat++;
}
// else: matrix A is a projector in the z direction
else if ( ilevelreal matrix product
DoubleMatrix c_proxy(c);
DoubleMatrix c_pz_proxy(c_pz);
for ( int ilevel=0; ilevel p_rmat( ndiv_[0]*ndiv_[1] );
for ( int iproj=0; iprojmloc();
p_rmat[iproj] = rmat_[iproj]->valptr(index);
}
// save values of wf*walsh in rmat arrays
for ( int iz = 0; iz < np2loc_; iz++ )
{
int izglobal = ft_->np2_first() + iz;
double walsh_z = walsh(ilevel, np_[2], izglobal);
for ( int ixy = 0; ixy < np01_; ixy++ )
{
int i = ixy + iz * np01_;
int xy_proj = xy_proj_[ixy];
wftmp[i] = (*p_rmat[xy_proj]) * walsh_z;
p_rmat[xy_proj]++;
}
}
ft_->forward(&wftmp[0],c_pz.valptr(c_pz.mloc()*n));
}
// compute the product
// factor -2.0 in next line: G and -G
amat_[imat]->gemm('t','n',2.0,c_pz_proxy,c_proxy,0.0);
// rank-1 update using first row of cd_proxy() and c_proxy
amat_[imat]->ger(-1.0,c_pz_proxy,0,c_proxy,0);
imat++;
}
}
}
#ifdef TIMING
tmap["z products"].stop();
#endif
#ifdef DEBUG
// check the values of the amat matrices
#ifdef TIMING
tmap["check_amat"].start();
#endif
check_amat(c);
#ifdef TIMING
tmap["check_amat"].stop();
#endif
#endif
// set to zero matrix elements of the matrices amat_[i] if they couple
// states with differing occupation numbers
trim_amat(occ);
#ifdef DEBUG_PRINT_MAT
for ( int k = 0; k < amat_.size(); k++ )
{
cout << "A(k=" << k << "):" << endl;
cout << *amat_[k];
}
#endif
// joint approximate diagonalization of the matrices amat (jade)
#ifdef TIMING
tmap["jade"].start();
#endif
// diagonal values adiag_[k][i]
// adiag_ is resized by jade
// diagonalize projectors
int nsweep = jade(maxsweep,tol,amat_,*u_,adiag_);
#ifdef TIMING
if ( ctxt_.onpe0() )
cout << "Bisection::compute_transform: nsweep=" << nsweep
<< " maxsweep=" << maxsweep << " tol=" << tol << endl;
#endif
#ifdef TIMING
tmap["jade"].stop();
#endif
#ifdef DEBUG_PRINT_MAT
cout << "U:" << endl;
cout << *u_;
#endif
for ( int imat=0; imat::iterator i = tmap.begin(); i != tmap.end(); i++ )
{
double time = (*i).second.real();
double tmin = time;
double tmax = time;
ctxt_.dmin(1,1,&tmin,1);
ctxt_.dmax(1,1,&tmax,1);
if ( ctxt_.myproc()==0 )
{
cout << ""
<< endl;
}
}
#endif
}
////////////////////////////////////////////////////////////////////////////////
void Bisection::compute_localization(double epsilon)
{
// compute localization vector for a threshold epsilon
for ( int n = 0; n < nst_; n++ )
{
localization_[n] = 0;
for ( int imat = 0; imat < nmat_; imat++ )
{
if ( adiag_[imat][n] < epsilon )
localization_[n] += 1<<(2*imat);
else if ( adiag_[imat][n] > 1.0-epsilon)
localization_[n] += 1<<(2*imat+1);
else
localization_[n] += (1<<(2*imat)) + (1<<(2*imat+1));
}
}
#ifdef DEBUG
// print localization vector and number of overlaps (including self)
// for each state
if ( ctxt_.onpe0() )
{
int sum = 0;
for ( int i = 0; i < nst_; i++ )
{
int count = 0;
for ( int j = 0; j < nst_; j++ )
{
if ( overlap(i,j) )
count++;
}
cout << "localization[" << i << "]: "
<< localization_[i] << " "
<< bitset<30>(localization_[i]) << " overlaps: "
<< count << endl;
sum += count;
}
cout << "total overlaps: " << sum << " / " << nst_*nst_
<< " = " << ((double) sum)/(nst_*nst_) << endl;
}
#endif
// broadcast localization to all tasks to ensure consistency
MPI_Bcast( (void *) &localization_[0], localization_.size(),
MPI_LONG, 0, ctxt_.comm() );
}
////////////////////////////////////////////////////////////////////////////////
void Bisection::forward(SlaterDet& sd)
{
forward(*u_,sd);
}
////////////////////////////////////////////////////////////////////////////////
void Bisection::forward(DoubleMatrix& u, SlaterDet& sd)
{
// apply the bisection transformation to the SlaterDet sd
// apply the rotation u to sd.c()
ComplexMatrix& c = sd.c();
ComplexMatrix cp(c);
DoubleMatrix cp_proxy(cp);
DoubleMatrix c_proxy(c);
c_proxy.gemm('n','n',1.0,cp_proxy,u,0.0);
}
////////////////////////////////////////////////////////////////////////////////
void Bisection::backward(SlaterDet& sd)
{
backward(*u_,sd);
}
////////////////////////////////////////////////////////////////////////////////
void Bisection::backward(DoubleMatrix& u, SlaterDet& sd)
{
// apply the inverse bisection transformation to SlaterDet sd
// apply rotation u^T to sd
ComplexMatrix& c = sd.c();
ComplexMatrix cp(c);
DoubleMatrix cp_proxy(cp);
DoubleMatrix c_proxy(c);
c_proxy.gemm('n','t',1.0,cp_proxy,u,0.0);
}
////////////////////////////////////////////////////////////////////////////////
bool Bisection::check_amat(const ComplexMatrix &c)
{
// allocate memory for copy of the wave functions
ComplexMatrix cd(c.context(),c.m(),c.n(),c.mb(),c.nb());
// precompute values of Walsh functions in three directions
// wx[l][i] = value of level l Walsh function at position i in direction x
vector > > w(3);
for ( int idir=0; idir<3; idir++ )
{
w[idir].resize(nlevels_[idir]);
//
for ( int l = 0; l < nlevels_[idir]; l++ )
{
w[idir][l].resize(np_[idir]);
for ( int i = 0; i < np_[idir]; i++ )
{
w[idir][l][i] = walsh(l,np_[idir],i);
}
}
}
// compute matrices B_k =
vector bmat(nmat_);
for ( int k = 0; k < bmat.size(); k++ )
{
bmat[k] = new DoubleMatrix(c.context(),c.n(),c.n(),c.nb(),c.nb());
}
DoubleMatrix c_proxy(c);
DoubleMatrix cd_proxy(cd);
vector > wftmp(ft_->np012loc());
complex *f = &wftmp[0];
// compute matrices A at all levels
for ( int l=0 , imat=0; l < nlevelsmax_; l++ )
{
// x direction
if ( lbackward(cd.cvalptr(cd.mloc()*n),&wftmp[0]);
for ( int i = 0; i < np_[0]; i++ )
{
if ( w[0][l][i] == 0 )
{
for ( int j = 0; j < np_[1]; j++ )
for ( int k = 0; k < ft_->np2_loc(); k++ )
f[i + np_[0] * ( j + np_[1] * k )] = 0.0;
}
}
ft_->forward(&wftmp[0],cd.valptr(cd.mloc()*n));
}
// factor -2.0 in next line: G and -G
bmat[imat]->gemm('t','n',2.0,cd_proxy,c_proxy,0.0);
// rank-1 update using first row of cd_proxy() and c_proxy
bmat[imat]->ger(-1.0,cd_proxy,0,c_proxy,0);
imat++;
}
// y direction
if ( lbackward(cd.cvalptr(cd.mloc()*n),&wftmp[0]);
for ( int j = 0; j < np_[1]; j++ )
{
if ( w[1][l][j] == 0 )
{
for ( int i = 0; i < np_[0]; i++ )
for ( int k = 0; k < ft_->np2_loc(); k++ )
f[i + np_[0] * ( j + np_[1] * k )] = 0.0;
}
}
ft_->forward(&wftmp[0],cd.valptr(cd.mloc()*n));
}
// factor -2.0 in next line: G and -G
bmat[imat]->gemm('t','n',2.0,cd_proxy,c_proxy,0.0);
// rank-1 update using first row of cd_proxy() and c_proxy
bmat[imat]->ger(-1.0,cd_proxy,0,c_proxy,0);
imat++;
}
// z direction
if ( lbackward(cd.cvalptr(cd.mloc()*n),&wftmp[0]);
for ( int k = 0; k < ft_->np2_loc(); k++ )
{
int kglobal = ft_->np2_first() + k;
const int istart = k * np_[0] * np_[1];
if ( w[2][l][kglobal] == 0 )
{
for ( int ij = 0; ij < np_[0]*np_[1]; ij++ )
f[istart+ij] = 0.0;
}
}
ft_->forward(&wftmp[0],cd.valptr(cd.mloc()*n));
}
// factor -2.0 in next line: G and -G
bmat[imat]->gemm('t','n',2.0,cd_proxy,c_proxy,0.0);
// rank-1 update using first row of cd_proxy() and c_proxy
bmat[imat]->ger(-1.0,cd_proxy,0,c_proxy,0);
imat++;
}
} // for l
// testing the matrices
for ( int imat=0; imatvalptr(0);
double *b=bmat[imat]->valptr(0);
int ncoeff=amat_[imat]->mloc()*amat_[imat]->nloc();
for ( int i=0; i1e-10)
{
cout << "error > 1.e-10 for matrix " << imat << endl;
}
}
}
return true;
}
////////////////////////////////////////////////////////////////////////////////
void Bisection::trim_amat(const vector& occ)
{
// set to zero the matrix elements of the matrices amat_[k] if they couple
// states with differing occupation numbers
const double trim_tol = 1.e-6;
// check if all occupation numbers are the same
double occ_max = 0.0, occ_min = 2.0;
for ( int i = 0; i < occ.size(); i++ )
{
occ_max = max(occ_max, occ[i]);
occ_min = min(occ_min, occ[i]);
}
// return if all occupation numbers are equal
if ( fabs(occ_max-occ_min) < trim_tol )
return;
const int mloc = amat_[0]->mloc();
const int nloc = amat_[0]->nloc();
// loop over elements local to this task
for ( int i = 0; i < mloc; i++ )
{
const int iglobal = amat_[0]->iglobal(i);
for ( int j = 0; j < nloc; j++ )
{
const int jglobal = amat_[0]->jglobal(j);
const int ival = i + mloc * j;
if ( fabs(occ[iglobal] - occ[jglobal]) > trim_tol )
for ( int k = 0; k < amat_.size(); k++ )
(*amat_[k])[ival] = 0.0;
}
}
}
////////////////////////////////////////////////////////////////////////////////
bool Bisection::overlap(int i, int j) const
{
return overlap(localization_,i,j);
}
////////////////////////////////////////////////////////////////////////////////
bool Bisection::overlap(const vector& loc_, int i, int j) const
{
// overlap: return true if the functions i and j overlap according
// to the localization vector loc_
long int loc_i = loc_[i];
long int loc_j = loc_[j];
while ( loc_i!=0 && loc_j!=0 )
{
// get the weight of projections for each state
bool p_right_i = loc_i & 1;
bool p_left_i = loc_i & 2;
bool p_right_j = loc_j & 1;
bool p_left_j = loc_j & 2;
// return false as soon as the states are found to be separated
if ( !( ( p_right_i && p_right_j ) || ( p_left_i && p_left_j ) ) )
return false;
loc_i >>= 2;
loc_j >>= 2;
}
// return true if the states overlap
return true;
}
////////////////////////////////////////////////////////////////////////////////
double Bisection::pair_fraction(void) const
{
// pair_fraction: return fraction of pairs having non-zero overlap
// count pairs (i,j) having non-zero overlap for i != j only
int sum = 0;
for ( int i = 1; i < nst_; i++ )
{
int count = 0;
for ( int j = i+1; j < nst_; j++ )
{
if ( overlap(i,j) )
count++;
}
sum += count;
}
// add overlap with self: (i,i)
sum += nst_;
return ((double) sum)/((nst_*(nst_+1))/2);
}
////////////////////////////////////////////////////////////////////////////////
double Bisection::size(int i) const
{
// size: return fraction of the domain on which state i is non-zero
long int loc = localization_[i];
double size = 1.0;
// process all projectors
while ( loc != 0 )
{
// weight of projections
bool p_right = loc & 1;
bool p_left = loc & 2;
if ( (p_right && !p_left) || (!p_right && p_left) )
size *= 0.5;
loc >>= 2;
}
// return true if the states overlap
return size;
}
////////////////////////////////////////////////////////////////////////////////
double Bisection::total_size(void) const
{
// total_size: return normalized sum of sizes
double sum = 0.0;
for ( int i = 0; i < nst_; i++ )
sum += size(i);
return sum / nst_;
}