...
 
Commits (16)
......@@ -22,18 +22,24 @@
#include <iostream>
#include <cassert>
#include <cstring> // memset
using namespace std;
#if USE_GATHER_SCATTER
extern "C" {
// zgthr: x(i) = y(indx(i))
void zgthr_(int* n, complex<double>* y, complex<double>* x, int *indx);
// zsctr: y(indx(i)) = x(i)
void zsctr_(int* n, complex<double>* x, int* indx, complex<double>* y);
}
#endif
////////////////////////////////////////////////////////////////////////////////
BasisMapping::BasisMapping (const Basis &basis) : basis_(basis)
BasisMapping::BasisMapping (const Basis &basis, int np0, int np1, int np2) :
basis_(basis), np0_(np0), np1_(np1), np2_(np2)
{
nprocs_ = basis_.npes();
myproc_ = basis_.mype();
np0_ = basis.np(0);
np1_ = basis.np(1);
np2_ = basis.np(2);
np2_loc_.resize(nprocs_);
np2_first_.resize(nprocs_);
......@@ -127,6 +133,9 @@ BasisMapping::BasisMapping (const Basis &basis) : basis_(basis)
rdispl[iproc] = rdispl[iproc-1] + rcounts[iproc-1];
}
// check if the basis_ fits in the grid np0, np1, np2
assert(basis_.fits_in_grid(np0_,np1_,np2_));
if ( basis_.real() )
{
// compute index arrays ip_ and im_ for mapping vector->zvec
......@@ -388,19 +397,18 @@ BasisMapping::BasisMapping (const Basis &basis) : basis_(basis)
}
////////////////////////////////////////////////////////////////////////////////
void BasisMapping::transpose_fwd(const complex<double> *zvec,
complex<double> *ct)
void BasisMapping::transpose_bwd(const complex<double> *zvec,
complex<double> *ct) const
{
// Transpose zvec to ct
// scatter zvec to sbuf for transpose
#if USE_GATHER_SCATTER
// zsctr: y(indx(i)) = x(i)
// void zsctr_(int* n, complex<double>* x, int* indx, complex<double>* y);
{
complex<double>* y = &sbuf[0];
complex<double>* y = const_cast<complex<double>*>(&sbuf[0]);
complex<double>* x = const_cast<complex<double>*>(zvec);
int n = zvec_.size();
zsctr_(&n,x,&ipack_[0],y);
int n = zvec_size();
zsctr_(&n,x,const_cast<int*>(&ipack_[0]),y);
}
#else
const int len = zvec_size();
......@@ -420,39 +428,36 @@ void BasisMapping::transpose_fwd(const complex<double> *zvec,
// segments of z-vectors are now in sbuf
// transpose
#if USE_MPI
int status = MPI_Alltoallv((double*)&sbuf[0],&scounts[0],&sdispl[0],
MPI_DOUBLE,(double*)&rbuf[0],&rcounts[0],&rdispl[0],MPI_DOUBLE,
basis_.comm());
if ( status != 0 )
if ( nprocs_ == 1 )
{
cout << " BasisMapping: status = " << status << endl;
MPI_Abort(basis_.comm(),2);
assert(sbuf.size()==rbuf.size());
rbuf.swap(sbuf);
}
#else
assert(sbuf.size()==rbuf.size());
rbuf = sbuf;
#endif
// copy from rbuf to ct
// scatter index array iunpack
else
{
const int len = np012loc_;
double* const pv = (double*) ct;
for ( int i = 0; i < len; i++ )
int status =
MPI_Alltoallv((double*)&sbuf[0],(int*)&scounts[0],(int*)&sdispl[0],
MPI_DOUBLE,(double*)&rbuf[0],(int*)&rcounts[0],(int*)&rdispl[0],
MPI_DOUBLE, basis_.comm());
if ( status != 0 )
{
pv[2*i] = 0.0;
pv[2*i+1] = 0.0;
cout << " BasisMapping: status = " << status << endl;
MPI_Abort(basis_.comm(),2);
}
}
// clear ct
memset((void*)ct,0,np012loc_*sizeof(complex<double>));
// copy from rbuf to ct
// using scatter index array iunpack
#if USE_GATHER_SCATTER
// zsctr(n,x,indx,y): y(indx(i)) = x(i)
{
complex<double>* y = ct;
complex<double>* x = &rbuf[0];
complex<double>* x = const_cast<complex<double>*>(&rbuf[0]);
int n = rbuf.size();
zsctr_(&n,x,&iunpack_[0],y);
zsctr_(&n,x,const_cast<int*>(&iunpack_[0]),y);
}
#else
{
......@@ -475,19 +480,18 @@ void BasisMapping::transpose_fwd(const complex<double> *zvec,
}
////////////////////////////////////////////////////////////////////////////////
void BasisMapping::transpose_bwd(const complex<double> *ct,
complex<double> *zvec)
void BasisMapping::transpose_fwd(const complex<double> *ct,
complex<double> *zvec) const
{
// transpose back distributed array ct into zvec
// transpose ct to zvec
// gather ct into rbuf
#if USE_GATHER_SCATTER
// zgthr: x(i) = y(indx(i))
// void zgthr_(int* n, complex<double>* y, complex<double>* x, int*indx);
{
complex<double>* y = const_cast<complex<double>*>(ct);
complex<double>* x = &rbuf[0];
complex<double>* x = const_cast<complex<double>*>(&rbuf[0]);
int n = rbuf.size();
zgthr_(&n,y,x,&iunpack_[0]);
zgthr_(&n,y,x,const_cast<int*>(&iunpack_[0]));
}
#else
const int rbuf_size = rbuf.size();
......@@ -505,27 +509,34 @@ void BasisMapping::transpose_bwd(const complex<double> *ct,
#endif
// transpose
#if USE_MPI
int status = MPI_Alltoallv((double*)&rbuf[0],&rcounts[0],&rdispl[0],
MPI_DOUBLE,(double*)&sbuf[0],&scounts[0],&sdispl[0],MPI_DOUBLE,
basis_.comm());
assert ( status == 0 );
#else
assert(sbuf.size()==rbuf.size());
sbuf = rbuf;
#endif
if ( nprocs_ == 1 )
{
assert(sbuf.size()==rbuf.size());
sbuf.swap(rbuf);
}
else
{
int status =
MPI_Alltoallv((double*)&rbuf[0],(int*)&rcounts[0],(int*)&rdispl[0],
MPI_DOUBLE,(double*)&sbuf[0],(int*)&scounts[0],(int*)&sdispl[0],
MPI_DOUBLE, basis_.comm());
if ( status != 0 )
{
cout << " BasisMapping: status = " << status << endl;
MPI_Abort(basis_.comm(),2);
}
}
// segments of z-vectors are now in sbuf
// gather sbuf into zvec_
#if USE_GATHER_SCATTER
// zgthr: x(i) = y(indx(i))
// void zgthr_(int* n, complex<double>* y, complex<double>* x, int*indx);
{
complex<double>* y = &sbuf[0];
complex<double>* y = const_cast<complex<double>*>(&sbuf[0]);
complex<double>* x = zvec;
int n = zvec_.size();
zgthr_(&n,y,x,&ipack_[0]);
int n = zvec_size();
zgthr_(&n,y,x,const_cast<int*>(&ipack_[0]));
}
#else
const int len = zvec_size();
......@@ -545,17 +556,14 @@ void BasisMapping::transpose_bwd(const complex<double> *ct,
////////////////////////////////////////////////////////////////////////////////
void BasisMapping::vector_to_zvec(const complex<double> *c,
complex<double> *zvec)
complex<double> *zvec) const
{
// clear zvec
memset((void*)&zvec[0],0,zvec_size()*sizeof(complex<double>));
// map coefficients from the basis order to a zvec
const int ng = basis_.localsize();
const int len = zvec_size();
double* const pz = (double*) zvec;
for ( int i = 0; i < len; i++ )
{
pz[2*i] = 0.0;
pz[2*i+1] = 0.0;
}
const int ng = basis_.localsize();
const double* const pc = (const double*) c;
if ( basis_.real() )
{
......@@ -584,9 +592,46 @@ void BasisMapping::vector_to_zvec(const complex<double> *c,
pz[2*ip+1] = b;
}
}
////////////////////////////////////////////////////////////////////////////////
void BasisMapping::doublevector_to_zvec(const complex<double> *c1,
const complex<double> *c2, complex<double> *zvec) const
{
assert(basis_.real());
// clear zvec
memset((void*)&zvec[0],0,zvec_size()*sizeof(complex<double>));
// map two real functions to zvec
double* const pz = (double*) &zvec[0];
const int ng = basis_.localsize();
const double* const pc1 = (double*) &c1[0];
const double* const pc2 = (double*) &c2[0];
#pragma omp parallel for
for ( int ig = 0; ig < ng; ig++ )
{
// const double a = c1[ig].real();
// const double b = c1[ig].imag();
// const double c = c2[ig].real();
// const double d = c2[ig].imag();
// zvec_[ip] = complex<double>(a-d, b+c);
// zvec_[im] = complex<double>(a+d, c-b);
const double a = pc1[2*ig];
const double b = pc1[2*ig+1];
const double c = pc2[2*ig];
const double d = pc2[2*ig+1];
const int ip = ip_[ig];
const int im = im_[ig];
pz[2*ip] = a - d;
pz[2*ip+1] = b + c;
pz[2*im] = a + d;
pz[2*im+1] = c - b;
}
}
////////////////////////////////////////////////////////////////////////////////
void BasisMapping::zvec_to_vector(const complex<double> *zvec,
complex<double> *c)
complex<double> *c) const
{
const int ng = basis_.localsize();
const double* const pz = (const double*) zvec;
......@@ -601,3 +646,35 @@ void BasisMapping::zvec_to_vector(const complex<double> *zvec,
pc[2*ig+1] = pz1;
}
}
////////////////////////////////////////////////////////////////////////////////
void BasisMapping::zvec_to_doublevector(const complex<double> *zvec,
complex<double> *c1, complex<double> *c2 ) const
{
// Mapping of zvec onto two real functions
assert(basis_.real());
const int ng = basis_.localsize();
const double* const pz = (double*) &zvec[0];
double* const pc1 = (double*) &c1[0];
double* const pc2 = (double*) &c2[0];
#pragma omp parallel for
for ( int ig = 0; ig < ng; ig++ )
{
// const double a = 0.5*zvec_[ip].real();
// const double b = 0.5*zvec_[ip].imag();
// const double c = 0.5*zvec_[im].real();
// const double d = 0.5*zvec_[im].imag();
// c1[ig] = complex<double>(a+c, b-d);
// c2[ig] = complex<double>(b+d, c-a);
const int ip = ip_[ig];
const int im = im_[ig];
const double a = pz[2*ip];
const double b = pz[2*ip+1];
const double c = pz[2*im];
const double d = pz[2*im+1];
pc1[2*ig] = 0.5 * ( a + c );
pc1[2*ig+1] = 0.5 * ( b - d );
pc2[2*ig] = 0.5 * ( b + d );
pc2[2*ig+1] = 0.5 * ( c - a );
}
}
......@@ -38,32 +38,41 @@ class BasisMapping
std::vector<int> np2_first_; // np2_first_[iproc], iproc=0, nprocs_-1
std::vector<int> scounts, sdispl, rcounts, rdispl;
std::vector<std::complex<double> > sbuf, rbuf;
mutable std::vector<std::complex<double> > sbuf, rbuf;
std::vector<int> ip_, im_;
std::vector<int> ipack_, iunpack_;
public:
BasisMapping (const Basis &basis);
BasisMapping (const Basis &basis, int np0, int np1, int np2);
int np0(void) const { return np0_; }
int np1(void) const { return np1_; }
int np2(void) const { return np2_; }
int np2loc(void) const { return np2_loc_[myproc_]; }
int np2_loc(void) const { return np2_loc_[myproc_]; }
int np2_loc(int iproc) const { return np2_loc_[iproc]; }
int np2_first(void) const { return np2_first_[myproc_]; }
int np2_first(int iproc) const { return np2_first_[iproc]; }
int np012loc(void) const { return np012loc_; }
int nvec(void) const { return nvec_; }
int zvec_size(void) const { return nvec_ * np2_; }
// map a function c(G) to zvec_
void vector_to_zvec(const std::complex<double> *c,
std::complex<double> *zvec);
std::complex<double> *zvec) const;
// map two real functions c1(G) and c2(G) to zvec_
void doublevector_to_zvec(const std::complex<double> *c1,
const std::complex<double> *c2,std::complex<double> *zvec) const;
// map zvec_ to a function c(G)
void zvec_to_vector(const std::complex<double> *zvec,
std::complex<double> *c);
std::complex<double> *c) const;
// map zvec_ to two real functions c1(G) and c2(G)
void zvec_to_doublevector(const std::complex<double> *zvec,
std::complex<double> *c1, std::complex<double> *c2) const;
void transpose_fwd(const std::complex<double> *zvec,
std::complex<double> *ct);
void transpose_bwd(const std::complex<double> *ct,
std::complex<double> *zvec);
void transpose_bwd(const std::complex<double> *zvec,
std::complex<double> *ct) const;
void transpose_fwd(const std::complex<double> *ct,
std::complex<double> *zvec) const;
};
#endif
......@@ -406,7 +406,7 @@ void Bisection::compute_transform(const SlaterDet& sd, int maxsweep, double tol)
jade(maxsweep,tol,amat_,*u_,adiag_);
#ifdef TIMING
if ( ctxt_.onpe0() )
cout << "Bisection::compute_transform: nsweep=" << nsweep
cout << "Bisection::compute_transform:"
<< " maxsweep=" << maxsweep << " tol=" << tol << endl;
#endif
......
......@@ -82,15 +82,15 @@ CPSampleStepper::~CPSampleStepper(void)
void CPSampleStepper::step(int niter)
{
const bool onpe0 = s_.ctxt_.onpe0();
// CP dynamics is allowed only for all doubly occupied states
// check if states are all doubly occupied
const bool wf_double_occ = (s_.wf.nel() == 2 * s_.wf.nst());
if ( !wf_double_occ )
// check that there are no fractionally occupied states
// next line: (3-nspin) = 2 if nspin==1 and 1 if nspin==2
if ( s_.wf.nel() != (( 3 - s_.wf.nspin() ) * s_.wf.nst()) )
{
if ( s_.ctxt_.onpe0() )
{
cout << " CPSampleStepper::step:"
" not all states doubly occupied: cannot run" << endl;
" fractionally occupied or empty states: cannot run" << endl;
}
return;
}
......
......@@ -1007,6 +1007,7 @@ double ExchangeOperator::compute_exchange_at_gamma_(const Wavefunction &wf,
// if using bisection, localize the wave functions
if ( use_bisection_ )
{
tmb.reset();
tmb.start();
int maxsweep = 50;
if ( s_.ctrl.debug.find("BISECTION_MAXSWEEP") != string::npos )
......
......@@ -45,11 +45,8 @@
#endif
#endif
#if USE_MPI
#include <mpi.h>
#endif
#include "Timer.h"
#include "BasisMapping.h"
class Basis;
......@@ -57,26 +54,15 @@ class FourierTransform
{
private:
MPI_Comm comm_;
const Basis& basis_;
int nprocs_, myproc_;
const BasisMapping bm_;
int np0_,np1_,np2_;
int ntrans0_,ntrans1_,ntrans2_;
const int np0_,np1_,np2_;
const int nvec_;
int nvec_;
bool basis_fits_in_grid_;
int ntrans0_,ntrans1_,ntrans2_;
std::vector<int> np2_loc_; // np2_loc_[iproc], iproc=0, nprocs_-1
std::vector<int> np2_first_; // np2_first_[iproc], iproc=0, nprocs_-1
std::vector<std::complex<double> > zvec_;
std::vector<int> scounts, sdispl, rcounts, rdispl;
std::vector<std::complex<double> > sbuf, rbuf;
std::vector<int> ifftp_, ifftm_;
std::vector<int> ipack_, iunpack_;
void init_lib(void);
#if USE_ESSL_FFT
......@@ -107,11 +93,10 @@ class FourierTransform
#error "Must define USE_FFTW2, USE_FFTW3, USE_ESSL_FFT or FFT_NOLIB"
#endif
void vector_to_zvec(const std::complex<double>* c);
void zvec_to_vector(std::complex<double>* c);
void doublevector_to_zvec(const std::complex<double>* c1,
const std::complex<double> *c2);
void zvec_to_doublevector(std::complex<double>* c1, std::complex<double>* c2);
void fxy(std::complex<double>* val);
void fxy_inv(std::complex<double>* val);
void fz(void);
void fz_inv(void);
void fwd(std::complex<double>* val);
void bwd(std::complex<double>* val);
......@@ -119,7 +104,6 @@ class FourierTransform
FourierTransform (const Basis &basis, int np0, int np1, int np2);
~FourierTransform ();
MPI_Comm comm(void) const { return comm_; }
// backward: Fourier synthesis, compute real-space function
// forward: Fourier analysis, compute Fourier coefficients
......@@ -139,13 +123,13 @@ class FourierTransform
int np0() const { return np0_; }
int np1() const { return np1_; }
int np2() const { return np2_; }
int np2_loc() const { return np2_loc_[myproc_]; }
int np2_loc(int iproc) const { return np2_loc_[iproc]; }
int np2_first() const { return np2_first_[myproc_]; }
int np2_first(int iproc) const { return np2_first_[iproc]; }
int np2_loc(void) const { return bm_.np2_loc(); }
int np2_loc(int iproc) const { return bm_.np2_loc(iproc); }
int np2_first(void) const { return bm_.np2_first(); }
int np2_first(int iproc) const { return bm_.np2_first(iproc); }
long int np012() const { return ((long int)np0_) * np1_ * np2_; }
int np012loc(int iproc) const { return np0_ * np1_ * np2_loc_[iproc]; }
int np012loc() const { return np0_ * np1_ * np2_loc_[myproc_]; }
int np012loc(int iproc) const { return np0_ * np1_ * np2_loc(iproc); }
int np012loc(void) const { return np0_ * np1_ * np2_loc(); }
int index(int i, int j, int k) const
{ return i + np0_ * ( j + np1_ * k ); }
int i(int ind) const { return ind % np0_; }
......@@ -153,11 +137,7 @@ class FourierTransform
int k(int ind) const { return (ind / np0_) / np1_ + np2_first(); }
void reset_timers(void);
Timer tm_f_map, tm_f_fft, tm_f_pack, tm_f_mpi, tm_f_zero, tm_f_unpack,
tm_b_map, tm_b_fft, tm_b_pack, tm_b_mpi, tm_b_zero, tm_b_unpack,
tm_f_xy, tm_f_z, tm_f_x, tm_f_y,
tm_b_xy, tm_b_z, tm_b_x, tm_b_y,
tm_init, tm_b_com, tm_f_com;
Timer tm_fwd, tm_bwd, tm_map_fwd, tm_map_bwd, tm_trans_fwd, tm_trans_bwd,
tm_fxy, tm_fxy_inv, tm_fz, tm_fz_inv, tm_init;
};
#endif
......@@ -32,8 +32,9 @@ using namespace std;
////////////////////////////////////////////////////////////////////////////////
MLWFTransform::MLWFTransform(const SlaterDet& sd) : sd_(sd),
cell_(sd.basis().cell()), ctxt_(sd.context()), bm_(BasisMapping(sd.basis())),
maxsweep_(50), tol_(1.e-8)
cell_(sd.basis().cell()), ctxt_(sd.context()),
bm_(BasisMapping(sd.basis(),sd.basis().np(0),sd.basis().np(1),
sd.basis().np(2))), maxsweep_(50), tol_(1.e-8)
{
a_.resize(6);
adiag_.resize(6);
......@@ -105,7 +106,7 @@ void MLWFTransform::update(void)
const int np1 = bm_.np1();
const int np2 = bm_.np2();
const int np01 = np0 * np1;
const int np2loc = bm_.np2loc();
const int np2loc = bm_.np2_loc();
const int nvec = bm_.nvec();
for ( int n = 0; n < c.nloc(); n++ )
{
......@@ -132,7 +133,7 @@ void MLWFTransform::update(void)
// x direction
// map zvec to ct
bm_.transpose_fwd(&zvec[0],&ct[0]);
bm_.transpose_bwd(&zvec[0],&ct[0]);
for ( int iz = 0; iz < np2loc; iz++ )
{
......@@ -142,10 +143,10 @@ void MLWFTransform::update(void)
compute_sincos(np0,&ct[ibase],&ct_cos[ibase],&ct_sin[ibase]);
}
}
// transpose back ct_cos to zvec_cos
bm_.transpose_bwd(&ct_cos[0],&zvec_cos[0]);
// transpose back ct_sin to zvec_sin
bm_.transpose_bwd(&ct_sin[0],&zvec_sin[0]);
// transpose ct_cos to zvec_cos
bm_.transpose_fwd(&ct_cos[0],&zvec_cos[0]);
// transpose ct_sin to zvec_sin
bm_.transpose_fwd(&ct_sin[0],&zvec_sin[0]);
// map back zvec_cos to sdcos and zvec_sin to sdsin
bm_.zvec_to_vector(&zvec_cos[0],&fcx[0]);
......@@ -167,10 +168,10 @@ void MLWFTransform::update(void)
zcopy(&len,&csin_tmp[0],&one,&ct_sin[ibase],&stride);
}
}
// transpose back ct_cos to zvec_cos
bm_.transpose_bwd(&ct_cos[0],&zvec_cos[0]);
// transpose back ct_sin to zvec_sin
bm_.transpose_bwd(&ct_sin[0],&zvec_sin[0]);
// transpose ct_cos to zvec_cos
bm_.transpose_fwd(&ct_cos[0],&zvec_cos[0]);
// transpose ct_sin to zvec_sin
bm_.transpose_fwd(&ct_sin[0],&zvec_sin[0]);
// map back zvec_cos and zvec_sin
bm_.zvec_to_vector(&zvec_cos[0],&fcy[0]);
......
--------------------------------------------------------------------------------
rel1_71_1
--------------------------------------------------------------------------------
0b54b01 Merge branch 'develop'
04bf383 Fix timing msg in Bisection.C
8ad14a3 Fix bisection timer for nspin==2
4b2de68 Fix constness in BasisMapping
1a2fb01 Enable spin-polarized CP dynamics
2aad1f5 Add cstdlib header for abs()
2d6f542 Use vector swap in BasisMapping for single-node
e9d9631 Use memset to zero arrays in BasisMapping
90f6ddc Update gather/scatter in BasisMapping for MKL
4f1caa4 Fix timers in testFourierTransform
2311fdd Add fxy and fz functions in FourierTransform
984676c Cleanup FourierTransform and BasisMapping
8dc7100 Rewrite BasisMapping and FourierTransform
57f8d91 Switch names of transpose fns in BasisMapping
--------------------------------------------------------------------------------
rel1_71_0
--------------------------------------------------------------------------------
a26cbfa Update constraints before ionic step
......
......@@ -19,5 +19,5 @@
#include "release.h"
std::string release(void)
{
return std::string("rel1_71_0");
return std::string("rel1_71_1");
}
......@@ -62,7 +62,7 @@ int main(int argc, char **argv)
<< " np1=" << basis.np(1)
<< " np2=" << basis.np(2) << endl;
cout << " basis.size=" << basis.size() << endl;
BasisMapping bmap(basis);
BasisMapping bmap(basis,basis.np(0),basis.np(1),basis.np(2));
cout << " zvec_size=" << bmap.zvec_size() << endl;
cout << " np012loc=" << bmap.np012loc() << endl;
......@@ -80,9 +80,9 @@ int main(int argc, char **argv)
<< f[i] << endl;
}
bmap.vector_to_zvec(&f[0],&zvec[0]);
bmap.transpose_fwd(&zvec[0],&ct[0]);
bmap.transpose_bwd(&zvec[0],&ct[0]);
for ( int k = 0; k < bmap.np2loc(); k++ )
for ( int k = 0; k < bmap.np2_loc(); k++ )
for ( int j = 0; j < bmap.np1(); j++ )
for ( int i = 0; i < bmap.np0(); i++ )
{
......@@ -94,7 +94,7 @@ int main(int argc, char **argv)
// transpose back to zvec
for ( int i = 0; i < zvec.size(); i++ )
zvec[i] = 0.0;
bmap.transpose_bwd(&ct[0],&zvec[0]);
bmap.transpose_fwd(&ct[0],&zvec[0]);
// transpose back to array f2
vector<complex<double> > f2(basis.localsize());
......