////////////////////////////////////////////////////////////////////////////////
//
// Copyright (c) 2008 The Regents of the University of California
//
// This file is part of Qbox
//
// Qbox is distributed under the terms of the GNU General Public License
// as published by the Free Software Foundation, either version 2 of
// the License, or (at your option) any later version.
// See the file COPYING in the root directory of this distribution
// or .
//
////////////////////////////////////////////////////////////////////////////////
//
// BasisMapping.C
//
////////////////////////////////////////////////////////////////////////////////
#include "Basis.h"
#include "Context.h"
#include "BasisMapping.h"
#include
#include
using namespace std;
////////////////////////////////////////////////////////////////////////////////
BasisMapping::BasisMapping (const Basis &basis) : basis_(basis)
{
nprocs_ = basis_.npes();
myproc_ = basis_.mype();
np0_ = basis.np(0);
np1_ = basis.np(1);
np2_ = basis.np(2);
np2_loc_.resize(nprocs_);
np2_first_.resize(nprocs_);
// Block-cyclic distribution for np2
// Partition np2 into nprocs_ intervals and
// store local sizes in np2_loc_[iproc]
// Use same block distribution as in ScaLAPACK
// Blocks 0,...,nprocs_-2 have size np2_block_size
// Block nprocs_-1 may have a smaller size
if ( np2_ % nprocs_ == 0 )
{
// all blocks have equal size
const int np2_block_size = np2_ / nprocs_;
for ( int iproc = 0; iproc < nprocs_; iproc++ )
np2_loc_[iproc] = np2_block_size;
}
else
{
// first k-1 blocks have same size, k_th block is smaller, others zero
const int np2_block_size = np2_ / nprocs_ + 1;
const int k = np2_ / np2_block_size;
for ( int iproc = 0; iproc < k; iproc++ )
np2_loc_[iproc] = np2_block_size;
np2_loc_[k] = np2_ - k * np2_block_size;
for ( int iproc = k+1; iproc < nprocs_; iproc++ )
np2_loc_[iproc] = 0;
}
np012loc_ = np0_ * np1_ * np2_loc_[myproc_];
np2_first_[0] = 0;
for ( int iproc = 1; iproc < nprocs_; iproc++ )
{
np2_first_[iproc] = np2_first_[iproc-1] + np2_loc_[iproc-1];
}
// number of local z vectors
if ( basis_.real() )
{
if ( myproc_ == 0 )
// rod(0,0) is mapped to only one z vector
nvec_ = 2 * basis_.nrod_loc() - 1;
else
nvec_ = 2 * basis_.nrod_loc();
}
else
{
nvec_ = basis_.nrod_loc();
}
// allocate send buffer
sbuf.resize(nvec_ * np2_);
// allocate receive buffer
if ( basis_.real() )
rbuf.resize((2 * basis_.nrods() - 1) * np2_loc_[myproc_]);
else
rbuf.resize(basis_.nrods() * np2_loc_[myproc_]);
// compute send/receive counts and displacements in units of sizeof(double)
scounts.resize(nprocs_);
sdispl.resize(nprocs_);
rcounts.resize(nprocs_);
rdispl.resize(nprocs_);
if ( basis_.real() )
{
for ( int iproc = 0; iproc < nprocs_; iproc++ )
{
scounts[iproc] = 2 * nvec_ * np2_loc_[iproc];
int nvec_iproc = iproc == 0 ? 2*basis_.nrod_loc(iproc)-1 :
2 * basis_.nrod_loc(iproc);
rcounts[iproc] = 2 * nvec_iproc * np2_loc_[myproc_];
}
}
else
{
for ( int iproc = 0; iproc < nprocs_; iproc++ )
{
scounts[iproc] = 2 * nvec_ * np2_loc_[iproc];
int nvec_iproc = basis_.nrod_loc(iproc);
rcounts[iproc] = 2 * nvec_iproc * np2_loc_[myproc_];
}
}
sdispl[0] = 0;
rdispl[0] = 0;
for ( int iproc = 1; iproc < nprocs_; iproc++ )
{
sdispl[iproc] = sdispl[iproc-1] + scounts[iproc-1];
rdispl[iproc] = rdispl[iproc-1] + rcounts[iproc-1];
}
if ( basis_.real() )
{
// compute index arrays ip_ and im_ for mapping vector->zvec
ip_.resize(basis_.localsize());
im_.resize(basis_.localsize());
if ( myproc_ == 0 )
{
// this process holds rod(0,0)
// nvec_ == 2 * nrod_loc - 1
// map rod(0,0)
// the positive segment of rod(0,0) maps onto the first half of
// the first column of zvec_, and the negative segment maps onto
// the second half
int ig = 0;
ip_[0] = 0;
im_[0] = 0;
ig++;
for ( int l = 1; l < basis_.rod_size(0); l++ )
{
ip_[ig] = l;
im_[ig] = np2_ - l;
ig++;
}
// map other rods(h,k) on pe 0, h!=0, k!=0
// rod(h,k) maps onto column (2*irod-1)*np2_ of zvec_, irod=1,..,nrods-1
// rod(-h,-k) maps onto column (2*irod)*np2_ of zvec_, irod=1,..,nrods-1
for ( int irod = 1; irod < basis_.nrod_loc(); irod++ )
{
const int rodsize = basis_.rod_size(irod);
for ( int i = 0; i < rodsize; i++ )
{
const int l = i + basis_.rod_lmin(irod);
int izp = l;
int izm = -l;
if ( izp < 0 ) izp += np2_;
if ( izm < 0 ) izm += np2_;
ip_[ig] = ( 2 * irod - 1 ) * np2_ + izp;
im_[ig] = ( 2 * irod ) * np2_ + izm;
ig++;
}
}
assert(ig == basis_.localsize());
}
else
{
// this process does not hold rod(0,0)
// map rods(h,k)
// rod(h,k) maps onto column (2*irod)*np2_ of zvec_, irod=0,..,nrods-1
// rod(-h,-k) maps onto column (2*irod+1)*np2_ of zvec_,irod=0,..,nrods-1
int ig = 0;
for ( int irod = 0; irod < basis_.nrod_loc(); irod++ )
{
const int rodsize = basis_.rod_size(irod);
for ( int i = 0; i < rodsize; i++ )
{
const int l = i + basis_.rod_lmin(irod);
int izp = l;
int izm = -l;
if ( izp < 0 ) izp += np2_;
if ( izm < 0 ) izm += np2_;
ip_[ig] = ( 2 * irod ) * np2_ + izp;
im_[ig] = ( 2 * irod + 1 ) * np2_ + izm;
ig++;
}
}
assert(ig == basis_.localsize());
}
// compute ipack index array
// used in packing zvec_ into sbuf
// sbuf[ipack_[i]] = zvec_[i]
ipack_.resize(nvec_*np2_);
int idest = 0;
for ( int iproc = 0; iproc < nprocs_; iproc++ )
{
int isource = np2_first_[iproc];
int sz = np2_loc_[iproc];
for ( int ivec = 0; ivec < nvec_; ivec++ )
{
for ( int i = 0; i < sz; i++ )
{
ipack_[isource+i] = idest + i;
}
idest += sz;
isource += np2_;
}
}
// compute array iunpack
// used in unpacking rbuf into val
// val[iunpack[i]] = rbuf[i]
// rbuf contains 2*_nrods-1 segments of size np2_loc[myproc]
// the position of vector ivec in local rbuf[_nrods*np2_loc_] is
// obtained from rod_h[iproc][irod], rod_k[irod][iproc]
// compute iunpack[i], i = 0, .. , _nrods * np2_loc_
iunpack_.resize((2*basis_.nrods()-1)*np2_loc_[myproc_]);
// map rod(0,0)
for ( int l = 0; l < np2_loc_[myproc_]; l++ )
{
iunpack_[l] = l * np0_ * np1_;
}
int isource_p = np2_loc_[myproc_];
int isource_m = 2 * np2_loc_[myproc_];
// all rods of pe 0
for ( int irod = 1; irod < basis_.nrod_loc(0); irod++ )
{
// map rod(h,k) and rod(-h,-k) columns of zvec_
// map rod(h,k)
// find position of rod(h,k) in the slab
int hp = basis_.rod_h(0,irod);
int kp = basis_.rod_k(0,irod);
if ( hp < 0 ) hp += np0_;
if ( kp < 0 ) kp += np1_;
int hm = -hp;
int km = -kp;
if ( hm < 0 ) hm += np0_;
if ( km < 0 ) km += np1_;
for ( int l = 0; l < np2_loc_[myproc_]; l++ )
{
int idest_p = hp + np0_ * ( kp + np1_ * l );
iunpack_[isource_p+l] = idest_p;
int idest_m = hm + np0_ * ( km + np1_ * l );
iunpack_[isource_m+l] = idest_m;
}
isource_p += 2 * np2_loc_[myproc_];
isource_m += 2 * np2_loc_[myproc_];
}
// pe's above pe0
for ( int iproc = 1; iproc < nprocs_; iproc++ )
{
for ( int irod = 0; irod < basis_.nrod_loc(iproc); irod++ )
{
// map rod(h,k) and rod(-h,-k) columns of zvec_
// map rod(h,k)
// find position of rod(h,k) in the slab
int hp = basis_.rod_h(iproc,irod);
int kp = basis_.rod_k(iproc,irod);
if ( hp < 0 ) hp += np0_;
if ( kp < 0 ) kp += np1_;
int hm = -hp;
int km = -kp;
if ( hm < 0 ) hm += np0_;
if ( km < 0 ) km += np1_;
for ( int l = 0; l < np2_loc_[myproc_]; l++ )
{
int idest_p = hp + np0_ * ( kp + np1_ * l );
iunpack_[isource_p+l] = idest_p;
int idest_m = hm + np0_ * ( km + np1_ * l );
iunpack_[isource_m+l] = idest_m;
}
isource_p += 2 * np2_loc_[myproc_];
isource_m += 2 * np2_loc_[myproc_];
}
}
}
else
{
// basis is complex
// compute index array ip_ for mapping vector->zvec
// Note: im_ is not used
ip_.resize(basis_.localsize());
// map rods(h,k)
// rod(h,k) maps onto column irod*np2_ of zvec_, irod=0,..,nrods-1
int ig = 0;
for ( int irod = 0; irod < basis_.nrod_loc(); irod++ )
{
const int rodsize = basis_.rod_size(irod);
for ( int i = 0; i < rodsize; i++ )
{
const int l = i + basis_.rod_lmin(irod);
int iz = l;
if ( iz < 0 ) iz += np2_;
ip_[ig] = irod * np2_ + iz;
ig++;
}
}
assert(ig == basis_.localsize());
// compute ipack index array
// used in packing zvec_ into sbuf
// sbuf[ipack_[i]] = zvec_[i]
ipack_.resize(nvec_*np2_);
int idest = 0;
for ( int iproc = 0; iproc < nprocs_; iproc++ )
{
int isource = np2_first_[iproc];
int sz = np2_loc_[iproc];
for ( int ivec = 0; ivec < nvec_; ivec++ )
{
for ( int i = 0; i < sz; i++ )
{
ipack_[isource+i] = idest + i;
}
idest += sz;
isource += np2_;
}
}
// compute array iunpack
// used in unpacking rbuf into val
// val[iunpack[i]] = rbuf[i]
// rbuf contains _nrods segments of size np2_loc[mype]
// the position of vector ivec in local rbuf[_nrods*np2_loc_] is
// obtained from rod_h[iproc][irod], rod_k[irod][iproc]
// compute iunpack[i], i = 0, .. , _nrods * np2_loc_
iunpack_.resize(basis_.nrods()*np2_loc_[myproc_]);
int isource = 0;
for ( int iproc = 0; iproc < nprocs_; iproc++ )
{
for ( int irod = 0; irod < basis_.nrod_loc(iproc); irod++ )
{
// map rod(h,k)
// find position of rod(h,k) in the slab
int h = basis_.rod_h(iproc,irod);
int k = basis_.rod_k(iproc,irod);
if ( h < 0 ) h += np0_;
if ( k < 0 ) k += np1_;
for ( int l = 0; l < np2_loc_[myproc_]; l++ )
{
int idest = h + np0_ * ( k + np1_ * l );
iunpack_[isource+l] = idest;
}
isource += np2_loc_[myproc_];
}
}
}
#if USE_GATHER_SCATTER
// shift index array by one for fortran ZGTHR and ZSCTR calls
for ( int i = 0; i < iunpack_.size(); i++ )
{
iunpack_[i]++;
}
for ( int i = 0; i < ipack_.size(); i++ )
{
ipack_[i]++;
}
#endif
}
////////////////////////////////////////////////////////////////////////////////
void BasisMapping::transpose_fwd(const complex *zvec,
complex *ct)
{
// Transpose zvec to ct
// scatter zvec to sbuf for transpose
#if USE_GATHER_SCATTER
// zsctr: y(indx(i)) = x(i)
// void zsctr_(int* n, complex* x, int* indx, complex* y);
{
complex* y = &sbuf[0];
complex* x = const_cast*>(zvec);
int n = zvec_.size();
zsctr_(&n,x,&ipack_[0],y);
}
#else
const int len = zvec_size();
double* const ps = (double*) &sbuf[0];
const double* const pz = (const double*) zvec;
for ( int i = 0; i < len; i++ )
{
// sbuf[ipack_[i]] = zvec[i];
const int ip = ipack_[i];
const double a = pz[2*i];
const double b = pz[2*i+1];
ps[2*ip] = a;
ps[2*ip+1] = b;
}
#endif
// segments of z-vectors are now in sbuf
// transpose
#if USE_MPI
int status = MPI_Alltoallv((double*)&sbuf[0],&scounts[0],&sdispl[0],
MPI_DOUBLE,(double*)&rbuf[0],&rcounts[0],&rdispl[0],MPI_DOUBLE,
basis_.comm());
if ( status != 0 )
{
cout << " BasisMapping: status = " << status << endl;
MPI_Abort(basis_.comm(),2);
}
#else
assert(sbuf.size()==rbuf.size());
rbuf = sbuf;
#endif
// copy from rbuf to ct
// scatter index array iunpack
{
const int len = np012loc_;
double* const pv = (double*) ct;
for ( int i = 0; i < len; i++ )
{
pv[2*i] = 0.0;
pv[2*i+1] = 0.0;
}
}
#if USE_GATHER_SCATTER
// zsctr(n,x,indx,y): y(indx(i)) = x(i)
{
complex* y = ct;
complex* x = &rbuf[0];
int n = rbuf.size();
zsctr_(&n,x,&iunpack_[0],y);
}
#else
{
const int rbuf_size = rbuf.size();
const double* const pr = (double*) &rbuf[0];
double* const pv = (double*) ct;
for ( int i = 0; i < rbuf_size; i++ )
{
// val[iunpack_[i]] = rbuf[i];
const int iu = iunpack_[i];
const double a = pr[2*i];
const double b = pr[2*i+1];
pv[2*iu] = a;
pv[2*iu+1] = b;
}
}
#endif
// coefficients are now in ct
}
////////////////////////////////////////////////////////////////////////////////
void BasisMapping::transpose_bwd(const complex *ct,
complex *zvec)
{
// transpose back distributed array ct into zvec
// gather ct into rbuf
#if USE_GATHER_SCATTER
// zgthr: x(i) = y(indx(i))
// void zgthr_(int* n, complex* y, complex* x, int*indx);
{
complex* y = const_cast*>(ct);
complex* x = &rbuf[0];
int n = rbuf.size();
zgthr_(&n,y,x,&iunpack_[0]);
}
#else
const int rbuf_size = rbuf.size();
double* const pr = (double*) &rbuf[0];
const double* const pv = (const double*) ct;
for ( int i = 0; i < rbuf_size; i++ )
{
// rbuf[i] = val[iunpack_[i]];
const int iu = iunpack_[i];
const double a = pv[2*iu];
const double b = pv[2*iu+1];
pr[2*i] = a;
pr[2*i+1] = b;
}
#endif
// transpose
#if USE_MPI
int status = MPI_Alltoallv((double*)&rbuf[0],&rcounts[0],&rdispl[0],
MPI_DOUBLE,(double*)&sbuf[0],&scounts[0],&sdispl[0],MPI_DOUBLE,
basis_.comm());
assert ( status == 0 );
#else
assert(sbuf.size()==rbuf.size());
sbuf = rbuf;
#endif
// segments of z-vectors are now in sbuf
// gather sbuf into zvec_
#if USE_GATHER_SCATTER
// zgthr: x(i) = y(indx(i))
// void zgthr_(int* n, complex* y, complex* x, int*indx);
{
complex* y = &sbuf[0];
complex* x = zvec;
int n = zvec_.size();
zgthr_(&n,y,x,&ipack_[0]);
}
#else
const int len = zvec_size();
const double* const ps = (double*) &sbuf[0];
double* const pz = (double*) zvec;
for ( int i = 0; i < len; i++ )
{
// zvec[i] = sbuf[ipack_[i]];
const int ip = ipack_[i];
const double a = ps[2*ip];
const double b = ps[2*ip+1];
pz[2*i] = a;
pz[2*i+1] = b;
}
#endif
}
////////////////////////////////////////////////////////////////////////////////
void BasisMapping::vector_to_zvec(const complex *c,
complex *zvec)
{
// map coefficients from the basis order to a zvec
const int ng = basis_.localsize();
const int len = zvec_size();
double* const pz = (double*) zvec;
for ( int i = 0; i < len; i++ )
{
pz[2*i] = 0.0;
pz[2*i+1] = 0.0;
}
const double* const pc = (const double*) c;
if ( basis_.real() )
{
for ( int ig = 0; ig < ng; ig++ )
{
// zvec[ip_[ig]] = c[ig];
// zvec[im_[ig]] = conj(c[ig]);
const double a = pc[2*ig];
const double b = pc[2*ig+1];
const int ip = ip_[ig];
const int im = im_[ig];
pz[2*ip] = a;
pz[2*ip+1] = b;
pz[2*im] = a;
pz[2*im+1] = -b;
}
}
else
for ( int ig = 0; ig < ng; ig++ )
{
// zvec[ip_[ig]] = c[ig];
const double a = pc[2*ig];
const double b = pc[2*ig+1];
const int ip = ip_[ig];
pz[2*ip] = a;
pz[2*ip+1] = b;
}
}
////////////////////////////////////////////////////////////////////////////////
void BasisMapping::zvec_to_vector(const complex *zvec,
complex *c)
{
const int ng = basis_.localsize();
const double* const pz = (const double*) zvec;
double* const pc = (double*) c;
for ( int ig = 0; ig < ng; ig++ )
{
// c[ig] = zvec[ip_[ig]];
const int ip = ip_[ig];
const double pz0 = pz[2*ip];
const double pz1 = pz[2*ip+1];
pc[2*ig] = pz0;
pc[2*ig+1] = pz1;
}
}