Commit 0b54b012 by Francois Gygi

Merge branch 'develop'

parents 1a76f0a4 04bf3838
......@@ -22,18 +22,24 @@
#include <iostream>
#include <cassert>
#include <cstring> // memset
using namespace std;
#if USE_GATHER_SCATTER
extern "C" {
// zgthr: x(i) = y(indx(i))
void zgthr_(int* n, complex<double>* y, complex<double>* x, int *indx);
// zsctr: y(indx(i)) = x(i)
void zsctr_(int* n, complex<double>* x, int* indx, complex<double>* y);
}
#endif
////////////////////////////////////////////////////////////////////////////////
BasisMapping::BasisMapping (const Basis &basis) : basis_(basis)
BasisMapping::BasisMapping (const Basis &basis, int np0, int np1, int np2) :
basis_(basis), np0_(np0), np1_(np1), np2_(np2)
{
nprocs_ = basis_.npes();
myproc_ = basis_.mype();
np0_ = basis.np(0);
np1_ = basis.np(1);
np2_ = basis.np(2);
np2_loc_.resize(nprocs_);
np2_first_.resize(nprocs_);
......@@ -127,6 +133,9 @@ BasisMapping::BasisMapping (const Basis &basis) : basis_(basis)
rdispl[iproc] = rdispl[iproc-1] + rcounts[iproc-1];
}
// check if the basis_ fits in the grid np0, np1, np2
assert(basis_.fits_in_grid(np0_,np1_,np2_));
if ( basis_.real() )
{
// compute index arrays ip_ and im_ for mapping vector->zvec
......@@ -388,19 +397,18 @@ BasisMapping::BasisMapping (const Basis &basis) : basis_(basis)
}
////////////////////////////////////////////////////////////////////////////////
void BasisMapping::transpose_fwd(const complex<double> *zvec,
complex<double> *ct)
void BasisMapping::transpose_bwd(const complex<double> *zvec,
complex<double> *ct) const
{
// Transpose zvec to ct
// scatter zvec to sbuf for transpose
#if USE_GATHER_SCATTER
// zsctr: y(indx(i)) = x(i)
// void zsctr_(int* n, complex<double>* x, int* indx, complex<double>* y);
{
complex<double>* y = &sbuf[0];
complex<double>* y = const_cast<complex<double>*>(&sbuf[0]);
complex<double>* x = const_cast<complex<double>*>(zvec);
int n = zvec_.size();
zsctr_(&n,x,&ipack_[0],y);
int n = zvec_size();
zsctr_(&n,x,const_cast<int*>(&ipack_[0]),y);
}
#else
const int len = zvec_size();
......@@ -420,39 +428,36 @@ void BasisMapping::transpose_fwd(const complex<double> *zvec,
// segments of z-vectors are now in sbuf
// transpose
#if USE_MPI
int status = MPI_Alltoallv((double*)&sbuf[0],&scounts[0],&sdispl[0],
MPI_DOUBLE,(double*)&rbuf[0],&rcounts[0],&rdispl[0],MPI_DOUBLE,
basis_.comm());
if ( status != 0 )
if ( nprocs_ == 1 )
{
cout << " BasisMapping: status = " << status << endl;
MPI_Abort(basis_.comm(),2);
assert(sbuf.size()==rbuf.size());
rbuf.swap(sbuf);
}
#else
assert(sbuf.size()==rbuf.size());
rbuf = sbuf;
#endif
// copy from rbuf to ct
// scatter index array iunpack
else
{
const int len = np012loc_;
double* const pv = (double*) ct;
for ( int i = 0; i < len; i++ )
int status =
MPI_Alltoallv((double*)&sbuf[0],(int*)&scounts[0],(int*)&sdispl[0],
MPI_DOUBLE,(double*)&rbuf[0],(int*)&rcounts[0],(int*)&rdispl[0],
MPI_DOUBLE, basis_.comm());
if ( status != 0 )
{
pv[2*i] = 0.0;
pv[2*i+1] = 0.0;
cout << " BasisMapping: status = " << status << endl;
MPI_Abort(basis_.comm(),2);
}
}
// clear ct
memset((void*)ct,0,np012loc_*sizeof(complex<double>));
// copy from rbuf to ct
// using scatter index array iunpack
#if USE_GATHER_SCATTER
// zsctr(n,x,indx,y): y(indx(i)) = x(i)
{
complex<double>* y = ct;
complex<double>* x = &rbuf[0];
complex<double>* x = const_cast<complex<double>*>(&rbuf[0]);
int n = rbuf.size();
zsctr_(&n,x,&iunpack_[0],y);
zsctr_(&n,x,const_cast<int*>(&iunpack_[0]),y);
}
#else
{
......@@ -475,19 +480,18 @@ void BasisMapping::transpose_fwd(const complex<double> *zvec,
}
////////////////////////////////////////////////////////////////////////////////
void BasisMapping::transpose_bwd(const complex<double> *ct,
complex<double> *zvec)
void BasisMapping::transpose_fwd(const complex<double> *ct,
complex<double> *zvec) const
{
// transpose back distributed array ct into zvec
// transpose ct to zvec
// gather ct into rbuf
#if USE_GATHER_SCATTER
// zgthr: x(i) = y(indx(i))
// void zgthr_(int* n, complex<double>* y, complex<double>* x, int*indx);
{
complex<double>* y = const_cast<complex<double>*>(ct);
complex<double>* x = &rbuf[0];
complex<double>* x = const_cast<complex<double>*>(&rbuf[0]);
int n = rbuf.size();
zgthr_(&n,y,x,&iunpack_[0]);
zgthr_(&n,y,x,const_cast<int*>(&iunpack_[0]));
}
#else
const int rbuf_size = rbuf.size();
......@@ -505,27 +509,34 @@ void BasisMapping::transpose_bwd(const complex<double> *ct,
#endif
// transpose
#if USE_MPI
int status = MPI_Alltoallv((double*)&rbuf[0],&rcounts[0],&rdispl[0],
MPI_DOUBLE,(double*)&sbuf[0],&scounts[0],&sdispl[0],MPI_DOUBLE,
basis_.comm());
assert ( status == 0 );
#else
assert(sbuf.size()==rbuf.size());
sbuf = rbuf;
#endif
if ( nprocs_ == 1 )
{
assert(sbuf.size()==rbuf.size());
sbuf.swap(rbuf);
}
else
{
int status =
MPI_Alltoallv((double*)&rbuf[0],(int*)&rcounts[0],(int*)&rdispl[0],
MPI_DOUBLE,(double*)&sbuf[0],(int*)&scounts[0],(int*)&sdispl[0],
MPI_DOUBLE, basis_.comm());
if ( status != 0 )
{
cout << " BasisMapping: status = " << status << endl;
MPI_Abort(basis_.comm(),2);
}
}
// segments of z-vectors are now in sbuf
// gather sbuf into zvec_
#if USE_GATHER_SCATTER
// zgthr: x(i) = y(indx(i))
// void zgthr_(int* n, complex<double>* y, complex<double>* x, int*indx);
{
complex<double>* y = &sbuf[0];
complex<double>* y = const_cast<complex<double>*>(&sbuf[0]);
complex<double>* x = zvec;
int n = zvec_.size();
zgthr_(&n,y,x,&ipack_[0]);
int n = zvec_size();
zgthr_(&n,y,x,const_cast<int*>(&ipack_[0]));
}
#else
const int len = zvec_size();
......@@ -545,17 +556,14 @@ void BasisMapping::transpose_bwd(const complex<double> *ct,
////////////////////////////////////////////////////////////////////////////////
void BasisMapping::vector_to_zvec(const complex<double> *c,
complex<double> *zvec)
complex<double> *zvec) const
{
// clear zvec
memset((void*)&zvec[0],0,zvec_size()*sizeof(complex<double>));
// map coefficients from the basis order to a zvec
const int ng = basis_.localsize();
const int len = zvec_size();
double* const pz = (double*) zvec;
for ( int i = 0; i < len; i++ )
{
pz[2*i] = 0.0;
pz[2*i+1] = 0.0;
}
const int ng = basis_.localsize();
const double* const pc = (const double*) c;
if ( basis_.real() )
{
......@@ -584,9 +592,46 @@ void BasisMapping::vector_to_zvec(const complex<double> *c,
pz[2*ip+1] = b;
}
}
////////////////////////////////////////////////////////////////////////////////
void BasisMapping::doublevector_to_zvec(const complex<double> *c1,
const complex<double> *c2, complex<double> *zvec) const
{
assert(basis_.real());
// clear zvec
memset((void*)&zvec[0],0,zvec_size()*sizeof(complex<double>));
// map two real functions to zvec
double* const pz = (double*) &zvec[0];
const int ng = basis_.localsize();
const double* const pc1 = (double*) &c1[0];
const double* const pc2 = (double*) &c2[0];
#pragma omp parallel for
for ( int ig = 0; ig < ng; ig++ )
{
// const double a = c1[ig].real();
// const double b = c1[ig].imag();
// const double c = c2[ig].real();
// const double d = c2[ig].imag();
// zvec_[ip] = complex<double>(a-d, b+c);
// zvec_[im] = complex<double>(a+d, c-b);
const double a = pc1[2*ig];
const double b = pc1[2*ig+1];
const double c = pc2[2*ig];
const double d = pc2[2*ig+1];
const int ip = ip_[ig];
const int im = im_[ig];
pz[2*ip] = a - d;
pz[2*ip+1] = b + c;
pz[2*im] = a + d;
pz[2*im+1] = c - b;
}
}
////////////////////////////////////////////////////////////////////////////////
void BasisMapping::zvec_to_vector(const complex<double> *zvec,
complex<double> *c)
complex<double> *c) const
{
const int ng = basis_.localsize();
const double* const pz = (const double*) zvec;
......@@ -601,3 +646,35 @@ void BasisMapping::zvec_to_vector(const complex<double> *zvec,
pc[2*ig+1] = pz1;
}
}
////////////////////////////////////////////////////////////////////////////////
void BasisMapping::zvec_to_doublevector(const complex<double> *zvec,
complex<double> *c1, complex<double> *c2 ) const
{
// Mapping of zvec onto two real functions
assert(basis_.real());
const int ng = basis_.localsize();
const double* const pz = (double*) &zvec[0];
double* const pc1 = (double*) &c1[0];
double* const pc2 = (double*) &c2[0];
#pragma omp parallel for
for ( int ig = 0; ig < ng; ig++ )
{
// const double a = 0.5*zvec_[ip].real();
// const double b = 0.5*zvec_[ip].imag();
// const double c = 0.5*zvec_[im].real();
// const double d = 0.5*zvec_[im].imag();
// c1[ig] = complex<double>(a+c, b-d);
// c2[ig] = complex<double>(b+d, c-a);
const int ip = ip_[ig];
const int im = im_[ig];
const double a = pz[2*ip];
const double b = pz[2*ip+1];
const double c = pz[2*im];
const double d = pz[2*im+1];
pc1[2*ig] = 0.5 * ( a + c );
pc1[2*ig+1] = 0.5 * ( b - d );
pc2[2*ig] = 0.5 * ( b + d );
pc2[2*ig+1] = 0.5 * ( c - a );
}
}
......@@ -38,32 +38,41 @@ class BasisMapping
std::vector<int> np2_first_; // np2_first_[iproc], iproc=0, nprocs_-1
std::vector<int> scounts, sdispl, rcounts, rdispl;
std::vector<std::complex<double> > sbuf, rbuf;
mutable std::vector<std::complex<double> > sbuf, rbuf;
std::vector<int> ip_, im_;
std::vector<int> ipack_, iunpack_;
public:
BasisMapping (const Basis &basis);
BasisMapping (const Basis &basis, int np0, int np1, int np2);
int np0(void) const { return np0_; }
int np1(void) const { return np1_; }
int np2(void) const { return np2_; }
int np2loc(void) const { return np2_loc_[myproc_]; }
int np2_loc(void) const { return np2_loc_[myproc_]; }
int np2_loc(int iproc) const { return np2_loc_[iproc]; }
int np2_first(void) const { return np2_first_[myproc_]; }
int np2_first(int iproc) const { return np2_first_[iproc]; }
int np012loc(void) const { return np012loc_; }
int nvec(void) const { return nvec_; }
int zvec_size(void) const { return nvec_ * np2_; }
// map a function c(G) to zvec_
void vector_to_zvec(const std::complex<double> *c,
std::complex<double> *zvec);
std::complex<double> *zvec) const;
// map two real functions c1(G) and c2(G) to zvec_
void doublevector_to_zvec(const std::complex<double> *c1,
const std::complex<double> *c2,std::complex<double> *zvec) const;
// map zvec_ to a function c(G)
void zvec_to_vector(const std::complex<double> *zvec,
std::complex<double> *c);
std::complex<double> *c) const;
// map zvec_ to two real functions c1(G) and c2(G)
void zvec_to_doublevector(const std::complex<double> *zvec,
std::complex<double> *c1, std::complex<double> *c2) const;
void transpose_fwd(const std::complex<double> *zvec,
std::complex<double> *ct);
void transpose_bwd(const std::complex<double> *ct,
std::complex<double> *zvec);
void transpose_bwd(const std::complex<double> *zvec,
std::complex<double> *ct) const;
void transpose_fwd(const std::complex<double> *ct,
std::complex<double> *zvec) const;
};
#endif
......@@ -406,7 +406,7 @@ void Bisection::compute_transform(const SlaterDet& sd, int maxsweep, double tol)
jade(maxsweep,tol,amat_,*u_,adiag_);
#ifdef TIMING
if ( ctxt_.onpe0() )
cout << "Bisection::compute_transform: nsweep=" << nsweep
cout << "Bisection::compute_transform:"
<< " maxsweep=" << maxsweep << " tol=" << tol << endl;
#endif
......
......@@ -82,15 +82,15 @@ CPSampleStepper::~CPSampleStepper(void)
void CPSampleStepper::step(int niter)
{
const bool onpe0 = s_.ctxt_.onpe0();
// CP dynamics is allowed only for all doubly occupied states
// check if states are all doubly occupied
const bool wf_double_occ = (s_.wf.nel() == 2 * s_.wf.nst());
if ( !wf_double_occ )
// check that there are no fractionally occupied states
// next line: (3-nspin) = 2 if nspin==1 and 1 if nspin==2
if ( s_.wf.nel() != (( 3 - s_.wf.nspin() ) * s_.wf.nst()) )
{
if ( s_.ctxt_.onpe0() )
{
cout << " CPSampleStepper::step:"
" not all states doubly occupied: cannot run" << endl;
" fractionally occupied or empty states: cannot run" << endl;
}
return;
}
......
......@@ -1007,6 +1007,7 @@ double ExchangeOperator::compute_exchange_at_gamma_(const Wavefunction &wf,
// if using bisection, localize the wave functions
if ( use_bisection_ )
{
tmb.reset();
tmb.start();
int maxsweep = 50;
if ( s_.ctrl.debug.find("BISECTION_MAXSWEEP") != string::npos )
......
......@@ -18,13 +18,12 @@
#include "FourierTransform.h"
#include "Basis.h"
#include "BasisMapping.h"
#include "blas.h"
#include <complex>
#include <algorithm>
#include <map>
#include <cassert>
#include <cstring> // memset
#include <cstdlib> // abs
#if _OPENMP
#include <omp.h>
......@@ -35,12 +34,6 @@
#endif
#endif
#if USE_MPI
#include <mpi.h>
#else
typedef int MPI_Comm;
#endif
#if defined(USE_FFTW2) || defined(USE_FFTW3)
#ifdef ADD_
#define zdscal zdscal_
......@@ -101,6 +94,26 @@ extern "C" {
using namespace std;
////////////////////////////////////////////////////////////////////////////////
FourierTransform::FourierTransform (const Basis &basis,
int np0, int np1, int np2) : basis_(basis),
bm_(BasisMapping(basis,np0,np1,np2)), np0_(np0), np1_(np1), np2_(np2),
nvec_(bm_.nvec())
{
// compute number of transforms along the x,y and z directions
// ntrans0_ is the number of transforms along x in one of the two blocks
// of vectors corresponding to positive and negative y indices
ntrans0_ = max(abs(basis_.idxmax(1)),abs(basis_.idxmin(1)))+1;
ntrans1_ = np0_;
ntrans2_ = nvec_;
// resize array zvec holding columns
zvec_.resize(nvec_ * np2_);
// Initialize FT library auxiliary arrays
init_lib();
}
////////////////////////////////////////////////////////////////////////////////
FourierTransform::~FourierTransform()
{
#if USE_FFTW2
......@@ -131,391 +144,15 @@ FourierTransform::~FourierTransform()
}
////////////////////////////////////////////////////////////////////////////////
FourierTransform::FourierTransform (const Basis &basis,
int np0, int np1, int np2) : comm_(basis.comm()), basis_(basis),
np0_(np0), np1_(np1), np2_(np2)
void FourierTransform::forward(complex<double>* f, complex<double>* c)
{
MPI_Comm_size(comm_,&nprocs_);
MPI_Comm_rank(comm_,&myproc_);
np2_loc_.resize(nprocs_);
np2_first_.resize(nprocs_);
// Block-cyclic distribution for np2
// Partition np2 into nprocs_ intervals and
// store local sizes in np2_loc_[iproc]
// Use same block distribution as in ScaLAPACK
// Blocks 0,...,nprocs_-2 have size np2_block_size
// Block nprocs_-1 may have a smaller size
if ( np2_ % nprocs_ == 0 )
{
// all blocks have equal size
const int np2_block_size = np2_ / nprocs_;
for ( int iproc = 0; iproc < nprocs_; iproc++ )
np2_loc_[iproc] = np2_block_size;
}
else
{
// first k-1 blocks have same size, k_th block is smaller, others zero
const int np2_block_size = np2_ / nprocs_ + 1;
const int k = np2_ / np2_block_size;
for ( int iproc = 0; iproc < k; iproc++ )
np2_loc_[iproc] = np2_block_size;
np2_loc_[k] = np2_ - k * np2_block_size;
for ( int iproc = k+1; iproc < nprocs_; iproc++ )
np2_loc_[iproc] = 0;
}
np2_first_[0] = 0;
for ( int iproc = 1; iproc < nprocs_; iproc++ )
{
np2_first_[iproc] = np2_first_[iproc-1] + np2_loc_[iproc-1];
}
// number of local z vectors
if ( basis_.real() )
{
if ( myproc_ == 0 )
// rod(0,0) is mapped to only one z vector
nvec_ = 2 * basis_.nrod_loc() - 1;
else
nvec_ = 2 * basis_.nrod_loc();
}
else
{
nvec_ = basis_.nrod_loc();
}
// compute number of transforms along the x,y and z directions
// ntrans0_ is the number of transforms along x in one of the two blocks
// of vectors corresponding to positive and negative y indices
ntrans0_ = max(abs(basis_.idxmax(1)),abs(basis_.idxmin(1)))+1;
ntrans1_ = np0_;
ntrans2_ = nvec_;
// resize array zvec holding columns
zvec_.resize(nvec_ * np2_);
fwd(f);
#if TIMING
tm_init.start();
tm_map_fwd.start();
#endif
// Initialize FT library auxiliary arrays
init_lib();
bm_.zvec_to_vector(&zvec_[0],c);
#if TIMING
tm_init.stop();
#endif
// allocate send buffer
sbuf.resize(nvec_ * np2_);
// allocate receive buffer
if ( basis_.real() )
rbuf.resize((2 * basis_.nrods() - 1) * np2_loc_[myproc_]);
else
rbuf.resize(basis_.nrods() * np2_loc_[myproc_]);
// compute send/receive counts and displacements in units of sizeof(double)
scounts.resize(nprocs_);
sdispl.resize(nprocs_);
rcounts.resize(nprocs_);
rdispl.resize(nprocs_);
if ( basis_.real() )
{
for ( int iproc = 0; iproc < nprocs_; iproc++ )
{
scounts[iproc] = 2 * nvec_ * np2_loc_[iproc];
int nvec_iproc = iproc == 0 ? 2*basis_.nrod_loc(iproc)-1 :
2 * basis_.nrod_loc(iproc);
rcounts[iproc] = 2 * nvec_iproc * np2_loc_[myproc_];
}
}
else
{
for ( int iproc = 0; iproc < nprocs_; iproc++ )
{
scounts[iproc] = 2 * nvec_ * np2_loc_[iproc];
int nvec_iproc = basis_.nrod_loc(iproc);
rcounts[iproc] = 2 * nvec_iproc * np2_loc_[myproc_];
}
}
sdispl[0] = 0;
rdispl[0] = 0;
for ( int iproc = 1; iproc < nprocs_; iproc++ )
{
sdispl[iproc] = sdispl[iproc-1] + scounts[iproc-1];
rdispl[iproc] = rdispl[iproc-1] + rcounts[iproc-1];
}
// check if the basis_ fits in the grid np0, np1, np2
basis_fits_in_grid_ = basis_.fits_in_grid(np0,np1,np2);
assert(basis_fits_in_grid_);
if ( basis_.real() )
{
// compute index arrays ifftp_ and ifftm_ for mapping vector->zvec
ifftp_.resize(basis_.localsize());
ifftm_.resize(basis_.localsize());
if ( myproc_ == 0 )
{
// this process holds rod(0,0)
// nvec_ == 2 * nrod_loc - 1
// map rod(0,0)
// the positive segment of rod(0,0) maps onto the first half of
// the first column of zvec_, and the negative segment maps onto
// the second half
int ig = 0;
ifftp_[0] = 0;
ifftm_[0] = 0;
ig++;
for ( int l = 1; l < basis_.rod_size(0); l++ )
{
ifftp_[ig] = l;
ifftm_[ig] = np2_ - l;
ig++;
}
// map other rods(h,k) on pe 0, h!=0, k!=0
// rod(h,k) maps onto column (2*irod-1)*np2_ of zvec_, irod=1,..,nrods-1
// rod(-h,-k) maps onto column (2*irod)*np2_ of zvec_, irod=1,..,nrods-1
for ( int irod = 1; irod < basis_.nrod_loc(); irod++ )
{
const int rodsize = basis_.rod_size(irod);
for ( int i = 0; i < rodsize; i++ )
{
const int l = i + basis_.rod_lmin(irod);
int izp = l;
int izm = -l;
if ( izp < 0 ) izp += np2_;
if ( izm < 0 ) izm += np2_;
ifftp_[ig] = ( 2 * irod - 1 ) * np2_ + izp;
ifftm_[ig] = ( 2 * irod ) * np2_ + izm;
ig++;
}
}
assert(ig == basis_.localsize());
}
else
{
// this process does not hold rod(0,0)
// map rods(h,k)
// rod(h,k) maps onto column (2*irod)*np2_ of zvec_, irod=0,..,nrods-1
// rod(-h,-k) maps onto column (2*irod+1)*np2_ of zvec_, irod=0,..,nrods-1
int ig = 0;
for ( int irod = 0; irod < basis_.nrod_loc(); irod++ )
{
const int rodsize = basis_.rod_size(irod);
for ( int i = 0; i < rodsize; i++ )
{
const int l = i + basis_.rod_lmin(irod);
int izp = l;
int izm = -l;
if ( izp < 0 ) izp += np2_;
if ( izm < 0 ) izm += np2_;
ifftp_[ig] = ( 2 * irod ) * np2_ + izp;
ifftm_[ig] = ( 2 * irod + 1 ) * np2_ + izm;
ig++;
}
}
assert(ig == basis_.localsize());
}
// compute ipack index array
// used in packing zvec_ into sbuf
// sbuf[ipack_[i]] = zvec_[i]
ipack_.resize(nvec_*np2_);
int idest = 0;
for ( int iproc = 0; iproc < nprocs_; iproc++ )
{
int isource = np2_first_[iproc];
int sz = np2_loc_[iproc];
for ( int ivec = 0; ivec < nvec_; ivec++ )
{
for ( int i = 0; i < sz; i++ )
{
ipack_[isource+i] = idest + i;
}
idest += sz;
isource += np2_;
}
}
// compute array iunpack
// used in unpacking rbuf into val
// val[iunpack[i]] = rbuf[i]
// rbuf contains 2*_nrods-1 segments of size np2_loc[myproc]
// the position of vector ivec in local rbuf[_nrods*np2_loc_] is
// obtained from rod_h[iproc][irod], rod_k[irod][iproc]
// compute iunpack[i], i = 0, .. , _nrods * np2_loc_
iunpack_.resize((2*basis_.nrods()-1)*np2_loc_[myproc_]);
// map rod(0,0)
for ( int l = 0; l < np2_loc_[myproc_]; l++ )
{
iunpack_[l] = l * np0_ * np1_;
}
int isource_p = np2_loc_[myproc_];
int isource_m = 2 * np2_loc_[myproc_];
// all rods of pe 0
for ( int irod = 1; irod < basis_.nrod_loc(0); irod++ )
{
// map rod(h,k) and rod(-h,-k) columns of zvec_
// map rod(h,k)
// find position of rod(h,k) in the slab
int hp = basis_.rod_h(0,irod);
int kp = basis_.rod_k(0,irod);
if ( hp < 0 ) hp += np0_;
if ( kp < 0 ) kp += np1_;
int hm = -hp;
int km = -kp;
if ( hm < 0 ) hm += np0_;
if ( km < 0 ) km += np1_;
for ( int l = 0; l < np2_loc_[myproc_]; l++ )
{
int idest_p = hp + np0_ * ( kp + np1_ * l );
iunpack_[isource_p+l] = idest_p;
int idest_m = hm + np0_ * ( km + np1_ * l );
iunpack_[isource_m+l] = idest_m;
}
isource_p += 2 * np2_loc_[myproc_];