Basis.C 21.6 KB
Newer Older
Francois Gygi committed
1 2
////////////////////////////////////////////////////////////////////////////////
//
Francois Gygi committed
3 4 5 6
// Copyright (c) 2008 The Regents of the University of California
//
// This file is part of Qbox
//
Francois Gygi committed
7 8
// Qbox is distributed under the terms of the GNU General Public License
// as published by the Free Software Foundation, either version 2 of
Francois Gygi committed
9 10 11 12 13 14
// the License, or (at your option) any later version.
// See the file COPYING in the root directory of this distribution
// or <http://www.gnu.org/licenses/>.
//
////////////////////////////////////////////////////////////////////////////////
//
Francois Gygi committed
15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32
// Basis.C
//
////////////////////////////////////////////////////////////////////////////////

#include "Basis.h"
#include <cmath>
#include <cassert>
#include <cstdlib>
#include <vector>
#include <set>
#include <algorithm>
#include <functional>
#include <iostream>
#include <iomanip>
using namespace std;

////////////////////////////////////////////////////////////////////////////////
double Basis::localmemsize(void) const
33 34
{
  return
35 36
  5.0 * (npes_*nrods_*sizeof(int)) // x[ipe][irod]
  + localsize_[mype_] * (3.0*sizeof(int) + 10 * sizeof(double));
Francois Gygi committed
37
}
38
double Basis::memsize(void) const { return npes_*localmemsize(); }
Francois Gygi committed
39

40
MPI_Comm Basis::comm(void) const { return comm_; }
Francois Gygi committed
41

Francois Gygi committed
42 43 44 45 46
const UnitCell& Basis::cell() const { return cell_; }
const UnitCell& Basis::refcell() const { return refcell_; }
int Basis::idxmin(int i) const { return idxmin_[i]; }
int Basis::idxmax(int i) const { return idxmax_[i]; }
double Basis::ecut() const { return ecut_; }
Francois Gygi committed
47

Francois Gygi committed
48
int Basis::size() const { return size_; }
49
int Basis::localsize() const { return localsize_[mype_]; }
Francois Gygi committed
50 51 52
int Basis::localsize(int ipe) const { return localsize_[ipe]; }
int Basis::maxlocalsize() const { return maxlocalsize_; }
int Basis::minlocalsize() const { return minlocalsize_; }
Francois Gygi committed
53

Francois Gygi committed
54
int Basis::nrods() const { return nrods_; }
55
int Basis::nrod_loc() const { return nrod_loc_[mype_]; }
Francois Gygi committed
56
int Basis::nrod_loc(int ipe) const { return nrod_loc_[ipe]; }
Francois Gygi committed
57 58

int Basis::rod_h(int irod) const
59
{ return rod_h_[mype_][irod]; }
Francois Gygi committed
60
int Basis::rod_h(int ipe, int irod) const
Francois Gygi committed
61
{ return rod_h_[ipe][irod]; }
Francois Gygi committed
62

63
int Basis::rod_k(int irod) const { return rod_k_[mype_][irod]; }
Francois Gygi committed
64
int Basis::rod_k(int ipe, int irod) const { return rod_k_[ipe][irod]; }
Francois Gygi committed
65

66
int Basis::rod_lmin(int irod) const { return rod_lmin_[mype_][irod]; }
Francois Gygi committed
67
int Basis::rod_lmin(int ipe, int irod) const { return rod_lmin_[ipe][irod]; }
Francois Gygi committed
68 69

// size of rod irod on current process
70
int Basis::rod_size(int irod) const { return rod_size_[mype_][irod]; }
Francois Gygi committed
71
int Basis::rod_size(int ipe, int irod) const { return rod_size_[ipe][irod]; }
Francois Gygi committed
72 73

// position of first elem. of rod irod in the local list of g vectors
74
int Basis::rod_first(int irod) const { return rod_first_[mype_][irod]; }
Francois Gygi committed
75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98
int Basis::rod_first(int ipe, int irod) const { return rod_first_[ipe][irod]; }

int    Basis::idx(int i) const   { return idx_[i]; }
double Basis::g(int i) const     { return g_[i]; }
double Basis::kpg(int i) const   { return kpg_[i]; }
double Basis::gi(int i) const    { return gi_[i]; }
double Basis::kpgi(int i) const  { return kpgi_[i]; }
double Basis::g2(int i) const    { return g2_[i]; }
double Basis::kpg2(int i) const  { return kpg2_[i]; }
double Basis::g2i(int i) const   { return g2i_[i]; }
double Basis::kpg2i(int i) const { return kpg2i_[i]; }
double Basis::gx(int i) const    { return gx_[i]; }
double Basis::kpgx(int i) const  { return kpgx_[i]; }
int    Basis::isort(int i) const { return isort_loc[i]; }

const int*    Basis::idx_ptr(void) const   { return &(idx_[0]); }
const double* Basis::g_ptr(void)  const    { return &(g_[0]); }
const double* Basis::kpg_ptr(void)  const  { return &(kpg_[0]); }
const double* Basis::gi_ptr(void) const    { return &(gi_[0]); }
const double* Basis::kpgi_ptr(void) const  { return &(kpgi_[0]); }
const double* Basis::g2_ptr(void) const    { return &(g2_[0]); }
const double* Basis::kpg2_ptr(void) const  { return &(kpg2_[0]); }
const double* Basis::g2i_ptr(void) const   { return &(g2i_[0]); }
const double* Basis::kpg2i_ptr(void) const { return &(kpg2i_[0]); }
99
const double* Basis::gx_ptr(int j) const
100
{ return &(gx_[j*localsize_[mype_]]); }
Francois Gygi committed
101
const double* Basis::kpgx_ptr(int j) const
102
{ return &(kpgx_[j*localsize_[mype_]]); }
Francois Gygi committed
103 104

////////////////////////////////////////////////////////////////////////////////
Francois Gygi committed
105
bool Basis::factorizable(int n) const
Francois Gygi committed
106 107
{
  // next lines: use AIX criterion for all platforms (AIX and fftw)
108

Francois Gygi committed
109 110 111
//#if AIX

  // Acceptable lengths for FFTs in the ESSL library:
112 113
  // n = (2^h) (3^i) (5^j) (7^k) (11^m) for n <= 37748736
  // where:
Francois Gygi committed
114 115 116 117 118 119 120 121 122 123 124 125
  //  h = 1, 2, ..., 25
  //  i = 0, 1, 2
  //  j, k, m = 0, 1
  if ( n % 11 == 0 ) n /= 11;
  if ( n % 7 == 0 ) n /= 7;
  if ( n % 5 == 0 ) n /= 5;
  if ( n % 3 == 0 ) n /= 3;
  if ( n % 3 == 0 ) n /= 3;
  // the bound h <= 25 is not tested since 2^25 would cause
  // memory allocation problems
  while ( ( n % 2 == 0 ) ) n /= 2;
  return ( n == 1 );
126

Francois Gygi committed
127 128 129 130 131 132 133 134 135
// #else
//   while ( n % 5 == 0 ) n /= 5;
//   while ( n % 3 == 0 ) n /= 3;
//   while ( n % 2 == 0 ) n /= 2;
//   return ( n == 1 );
// #endif
}

////////////////////////////////////////////////////////////////////////////////
Francois Gygi committed
136
int Basis::np(int i) const { return np_[i]; }
Francois Gygi committed
137 138

////////////////////////////////////////////////////////////////////////////////
Francois Gygi committed
139
const D3vector Basis::kpoint(void) const { return kpoint_; }
Francois Gygi committed
140 141

////////////////////////////////////////////////////////////////////////////////
Francois Gygi committed
142
bool Basis::real(void) const { return real_; }
Francois Gygi committed
143 144 145 146 147 148 149 150 151

inline double sqr( double x ) { return x*x; }
inline void swap(int &x, int &y) { int tmp = x; x = y; y = tmp; }

////////////////////////////////////////////////////////////////////////////////
class Rod
{
  // z-column of non-zero reciprocal lattice vectors
  int h_, k_, lmin_, size_;
152

Francois Gygi committed
153
  public:
154
  Rod(int h, int k, int lmin, int size) : h_(h), k_(k),
Francois Gygi committed
155
  lmin_(lmin), size_(size) {}
156

Francois Gygi committed
157 158 159 160 161 162 163 164 165 166 167 168 169 170
  int h(void) const { return h_; }
  int k(void) const { return k_; }
  int lmin(void) const { return lmin_; }
  int size(void) const { return size_; }
  bool operator< (const Rod& x) const
  {
    return size_ < x.size();
  }
};

////////////////////////////////////////////////////////////////////////////////
class Node
{
  int id_, nrods_, size_;
171

Francois Gygi committed
172 173 174
  public:
  Node() : id_(0), nrods_(0), size_(0) {}
  Node(int id) : id_(id), nrods_(0), size_(0) {}
175

Francois Gygi committed
176 177 178
  int id(void) const { return id_; }
  int nrods(void) const { return nrods_; }
  int size(void) const { return size_; }
179

Francois Gygi committed
180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217
  void addrod(const Rod& r)
  {
    nrods_++;
    size_ += r.size();
  }
  bool operator< (const Node& x) const
  {
    return size_ < x.size();
  }
};

////////////////////////////////////////////////////////////////////////////////
template <class T>
struct ptr_less
{
  public:
  bool operator() ( T* x, T* y ) { return *x < *y; }
};
template <class T>
struct ptr_greater
{
  public:
  bool operator() ( T* x, T* y ) { return *y < *x; }
};

////////////////////////////////////////////////////////////////////////////////
template <class T>
struct VectorLess
{
  // function object for indirect comparison of vector elements
  public:
  vector<T>& a_;
  VectorLess<T>(vector<T>& a) : a_(a) {};
  bool operator() (int i, int j) const
  {
    return a_[i] < a_[j];
  }
};
218

Francois Gygi committed
219
////////////////////////////////////////////////////////////////////////////////
220
Basis::Basis(MPI_Comm comm, D3vector kpoint) : comm_(comm)
Francois Gygi committed
221 222 223
{
  // Construct the default empty basis
  // cell and refcell are (0,0,0)
224 225
  MPI_Comm_rank(comm_,&mype_);
  MPI_Comm_size(comm_,&npes_);
Francois Gygi committed
226 227 228 229

  ecut_ = 0.0;
  kpoint_ = kpoint;
  real_ = ( kpoint == D3vector(0.0,0.0,0.0) );
230

231 232 233 234 235 236 237
  localsize_.resize(npes_);
  nrod_loc_.resize(npes_);
  rod_h_.resize(npes_);
  rod_k_.resize(npes_);
  rod_lmin_.resize(npes_);
  rod_size_.resize(npes_);
  rod_first_.resize(npes_);
238

Francois Gygi committed
239 240 241 242 243
  // resize with zero cutoff to initialize empty Basis
  resize(cell_,refcell_,0.0);
}

////////////////////////////////////////////////////////////////////////////////
Francois Gygi committed
244
Basis::~Basis(void) {}
Francois Gygi committed
245 246

////////////////////////////////////////////////////////////////////////////////
Francois Gygi committed
247
bool Basis::resize(const UnitCell& cell, const UnitCell& refcell,
Francois Gygi committed
248 249 250 251 252
  double ecut)
{
  assert(ecut>=0.0);
  assert(cell.volume() >= 0.0);
  assert(refcell.volume() >= 0.0);
253

254 255 256 257 258 259 260
  if ( ecut == ecut_ && refcell == refcell_ && refcell_.volume() != 0.0 )
  {
    cell_ = cell;
    // only the cell changes, ecut and the refcell remain unchanged
    update_g();
    return true;
  }
261

Francois Gygi committed
262 263 264
  ecut_ = ecut;
  cell_ = cell;
  refcell_ = refcell;
265

Francois Gygi committed
266 267 268 269 270 271 272 273
  if ( ecut == 0.0 || cell.volume() == 0.0)
  {
    idxmax_[0] = 0;
    idxmax_[1] = 0;
    idxmax_[2] = 0;
    idxmin_[0] = 0;
    idxmin_[1] = 0;
    idxmin_[2] = 0;
274

Francois Gygi committed
275 276
    size_ = 0;
    nrods_ = 0;
277
    for ( int ipe = 0; ipe < npes_; ipe++ )
Francois Gygi committed
278 279 280 281 282 283
    {
      localsize_[ipe] = 0;
      nrod_loc_[ipe] = 0;
    }
    maxlocalsize_ = minlocalsize_ = 0;
    np_[0] = np_[1] = np_[2] = 0;
284 285 286 287 288 289 290 291 292 293 294 295
    idx_.resize(3*localsize_[mype_]);
    g_.resize(localsize_[mype_]);
    kpg_.resize(localsize_[mype_]);
    gi_.resize(localsize_[mype_]);
    kpgi_.resize(localsize_[mype_]);
    g2_.resize(localsize_[mype_]);
    kpg2_.resize(localsize_[mype_]);
    g2i_.resize(localsize_[mype_]);
    kpg2i_.resize(localsize_[mype_]);
    gx_.resize(3*localsize_[mype_]);
    kpgx_.resize(3*localsize_[mype_]);
    isort_loc.resize(localsize_[mype_]);
Francois Gygi committed
296 297
    return true;
  }
298

Francois Gygi committed
299 300 301 302 303 304
  const double two_ecut = 2.0 * ecut;
  const double twopi = 2.0 * M_PI;

  const double kpx = kpoint_.x;
  const double kpy = kpoint_.y;
  const double kpz = kpoint_.z;
305

Francois Gygi committed
306 307 308 309
  UnitCell defcell;
  // defcell: cell used to define which vectors are contained in the Basis
  // if refcell is defined, defcell = refcell
  // otherwise, defcell = cell
310
  if ( norm2(refcell.b(0)) + norm2(refcell.b(1)) + norm2(refcell.b(2)) == 0.0 )
Francois Gygi committed
311 312 313 314 315 316 317
  {
    defcell = cell;
  }
  else
  {
    defcell = refcell;
  }
318

Francois Gygi committed
319 320 321
  const D3vector b0 = defcell.b(0);
  const D3vector b1 = defcell.b(1);
  const D3vector b2 = defcell.b(2);
322

323
  const double normb2 = norm2(b2);
Francois Gygi committed
324
  const double b2inv2 = 1.0 / normb2;
325

Francois Gygi committed
326
  const D3vector kp = kpx*b0 + kpy*b1 + kpz*b2;
327

Francois Gygi committed
328
  if ( !cell.in_bz(kp) )
329
  {
330
    if ( mype_ == 0 )
331 332 333
      cout << " Basis::resize: warning: " << kpoint_
           << " out of the BZ: " << kp << endl;
  }
Francois Gygi committed
334 335 336

  const double fac = sqrt(two_ecut) / twopi;

337
  // define safe enclosing domain for any k-point value in the BZ
338
  const int hmax = (int) ( 1.5 + fac * ( length(defcell.a(0) ) ) );
339
  const int hmin = - hmax;
340

341
  const int kmax = (int) ( 1.5 + fac * ( length(defcell.a(1) ) ) );
342
  const int kmin = - kmax;
343

344
  const int lmax = (int) ( 1.5 + fac * ( length(defcell.a(2) ) ) );
345
  const int lmin = - lmax;
346

Francois Gygi committed
347
  multiset<Rod> rodset;
348

Francois Gygi committed
349
  // build rod set
350

351 352 353 354 355 356
  int hmax_used = hmin;
  int hmin_used = hmax;
  int kmax_used = kmin;
  int kmin_used = kmax;
  int lmax_used = lmin;
  int lmin_used = lmax;
Francois Gygi committed
357 358 359 360 361 362

  if ( real_ )
  {
    // build basis at kpoint (0,0,0)

    // rod(0,0,0)
363 364 365 366
    // length of rod(0,0,0) is lend+1
    int lend = (int) ( sqrt(two_ecut * b2inv2) );
    size_ = lend + 1;
    rodset.insert(Rod(0,0,0,lend+1));
Francois Gygi committed
367 368
    nrods_ = 1;

369
    hmax_used = 0;
Francois Gygi committed
370
    hmin_used = 0;
371 372 373 374
    kmin_used = 0;
    kmax_used = 0;
    lmin_used = 0;
    lmax_used = lend;
375

Francois Gygi committed
376
    // rods (0,k,l)
377
    for ( int k = 1; k <= kmax; k++ )
Francois Gygi committed
378
    {
379 380
      int lstart=lmax,lend=lmin;
      bool found = false;
381
      for ( int l = lmin; l <= lmax; l++ )
Francois Gygi committed
382
      {
383
        const double two_e = norm2(k*b1+l*b2);
384
        if ( two_e < two_ecut )
Francois Gygi committed
385
        {
386 387 388
          lstart = min(l,lstart);
          lend = max(l,lend);
          found = true;
Francois Gygi committed
389 390
        }
      }
391 392 393 394 395 396 397 398 399 400 401 402
      if ( found )
      {
        // non-zero intersection was found
        const int rodsize = lend - lstart + 1;
        size_ += rodsize;
        rodset.insert(Rod(0,k,lstart,rodsize));
        nrods_++;
        kmax_used = max(k,kmax_used);
        kmin_used = min(k,kmin_used);
        lmax_used = max(lend,lmax_used);
        lmin_used = min(lstart,lmin_used);
      }
Francois Gygi committed
403 404
    }
    // rods (h,k,l) h>0
405
    for ( int h = 1; h <= hmax; h++ )
Francois Gygi committed
406
    {
407
      for ( int k = kmin; k <= kmax; k++ )
Francois Gygi committed
408
      {
409 410
        int lstart=lmax,lend=lmin;
        bool found = false;
411
        for ( int l = lmin; l <= lmax; l++ )
Francois Gygi committed
412
        {
413
          const double two_e = norm2(h*b0+k*b1+l*b2);
414
          if ( two_e < two_ecut )
Francois Gygi committed
415
          {
416 417 418
            lstart = min(l,lstart);
            lend = max(l,lend);
            found = true;
Francois Gygi committed
419 420
          }
        }
421 422 423 424 425 426 427 428 429 430 431 432 433 434
        if ( found )
        {
          // non-zero intersection was found
          const int rodsize = lend - lstart + 1;
          size_ += rodsize;
          rodset.insert(Rod(h,k,lstart,rodsize));
          nrods_++;
          hmax_used = max(h,hmax_used);
          hmin_used = min(h,hmin_used);
          kmax_used = max(k,kmax_used);
          kmin_used = min(k,kmin_used);
          lmax_used = max(lend,lmax_used);
          lmin_used = min(lstart,lmin_used);
        }
Francois Gygi committed
435 436 437 438 439 440 441 442 443 444
      }
    }
  }
  else
  {
    // build Basis for k != (0,0,0)

    size_ = 0;
    nrods_ = 0;
    // rods (h,k,l)
445
    for ( int h = hmin; h <= hmax; h++ )
Francois Gygi committed
446
    {
447
      for ( int k = kmin; k <= kmax; k++ )
Francois Gygi committed
448
      {
449 450
        int lstart=lmax,lend=lmin;
        bool found = false;
451
        for ( int l = lmin; l <= lmax; l++ )
Francois Gygi committed
452
        {
453
          const double two_e = norm2((kpx+h)*b0 + (kpy+k)*b1 + (kpz+l)*b2);
454
          if ( two_e < two_ecut )
Francois Gygi committed
455
          {
456 457 458
            lstart = min(l,lstart);
            lend = max(l,lend);
            found = true;
Francois Gygi committed
459 460
          }
        }
461 462 463 464 465 466 467 468 469 470 471 472 473 474
        if ( found )
        {
          // non-zero intersection was found
          const int rodsize = lend - lstart + 1;
          size_ += rodsize;
          rodset.insert(Rod(h,k,lstart,rodsize));
          nrods_++;
          hmax_used = max(h,hmax_used);
          hmin_used = min(h,hmin_used);
          kmax_used = max(k,kmax_used);
          kmin_used = min(k,kmin_used);
          lmax_used = max(lend,lmax_used);
          lmin_used = min(lstart,lmin_used);
        }
Francois Gygi committed
475 476 477
      }
    }
  }
478

479 480 481 482 483 484 485 486
#if DEBUG
  cout << " hmin/hmax: " << hmin << " / " << hmax << endl;
  cout << " kmin/kmax: " << kmin << " / " << kmax << endl;
  cout << " lmin/lmax: " << lmin << " / " << lmax << endl;
  cout << " hmin/hmax used: " << hmin_used << " / " << hmax_used << endl;
  cout << " kmin/kmax used: " << kmin_used << " / " << kmax_used << endl;
  cout << " lmin/lmax used: " << lmin_used << " / " << lmax_used << endl;
#endif
Francois Gygi committed
487 488 489

  idxmax_[0] = hmax_used;
  idxmin_[0] = hmin_used;
490

Francois Gygi committed
491 492
  idxmax_[1] = kmax_used;
  idxmin_[1] = kmin_used;
493

Francois Gygi committed
494 495
  idxmax_[2] = lmax_used;
  idxmin_[2] = lmin_used;
496

497 498 499
  assert(hmax_used - hmin_used + 1 <= 2 * hmax);
  assert(kmax_used - kmin_used + 1 <= 2 * kmax);
  assert(lmax_used - lmin_used + 1 <= 2 * lmax);
500

Francois Gygi committed
501
  // compute good FFT sizes
502 503
  // use values independent of the kpoint
  int n;
504
  n = 2 * hmax;
505 506
  while ( !factorizable(n) ) n+=2;
  np_[0] = n;
507
  n = 2 * kmax;
508 509
  while ( !factorizable(n) ) n+=2;
  np_[1] = n;
510
  n = 2 * lmax;
511 512
  while ( !factorizable(n) ) n+=2;
  np_[2] = n;
Francois Gygi committed
513

514
  // Distribute the basis on npes_ processors
Francois Gygi committed
515 516

  // build a min-heap of Nodes
517

518
  vector<Node*> nodes(npes_);
519

520
  for ( int ipe = 0; ipe < npes_; ipe++ )
Francois Gygi committed
521 522 523 524 525 526 527 528 529 530 531
  {
    nodes[ipe] = new Node(ipe);
    localsize_[ipe] = 0;
    nrod_loc_[ipe] = 0;
    rod_h_[ipe].resize(0);
    rod_k_[ipe].resize(0);
    rod_lmin_[ipe].resize(0);
    rod_size_[ipe].resize(0);
  }

  // nodes contains a valid min-heap of zero-size Nodes
532

Francois Gygi committed
533 534 535 536 537 538 539 540
  // insert rods into the min-heap
  // keep track of where rod(0,0,0) goes
  int pe_rod0 = -1, rank_rod0 = -1;
  multiset<Rod>::iterator p = rodset.begin();
  while ( p != rodset.end() )
  {
    // pop smallest element
    pop_heap(nodes.begin(), nodes.end(), ptr_greater<Node>());
541

Francois Gygi committed
542
    // add rod size to smaller element
543 544
    nodes[npes_-1]->addrod(*p);
    int ipe = nodes[npes_-1]->id();
Francois Gygi committed
545 546 547 548 549 550 551 552 553 554 555

    // update info about rod on process ipe
    nrod_loc_[ipe]++;
    rod_h_[ipe].push_back(p->h());
    rod_k_[ipe].push_back(p->k());
    rod_lmin_[ipe].push_back(p->lmin());
    rod_size_[ipe].push_back(p->size());
    localsize_[ipe] += p->size();
    if ( p->h() == 0 && p->k() == 0 )
    {
      pe_rod0 = ipe;
556
      rank_rod0 = nodes[npes_-1]->nrods()-1;
Francois Gygi committed
557 558 559 560
    }

    // push modified element back in the heap
    push_heap(nodes.begin(), nodes.end(), ptr_greater<Node>());
561

Francois Gygi committed
562 563
    p++;
  }
564 565

  maxlocalsize_ = (*max_element(nodes.begin(), nodes.end(),
Francois Gygi committed
566
    ptr_less<Node>()))->size();
567
  minlocalsize_ = (*min_element(nodes.begin(), nodes.end(),
Francois Gygi committed
568
    ptr_less<Node>()))->size();
569

570
  for ( int ipe = 0; ipe < npes_; ipe++ )
Francois Gygi committed
571 572 573
  {
    delete nodes[ipe];
  }
574

Francois Gygi committed
575 576 577 578 579 580 581
  // swap node pe_rod0 with node 0 in order to have rod(0,0,0) on node 0
  swap(nrod_loc_[0], nrod_loc_[pe_rod0]);
  rod_h_[pe_rod0].swap(rod_h_[0]);
  rod_k_[pe_rod0].swap(rod_k_[0]);
  rod_lmin_[pe_rod0].swap(rod_lmin_[0]);
  rod_size_[pe_rod0].swap(rod_size_[0]);
  swap(localsize_[0], localsize_[pe_rod0]);
582
  //Node *tmpnodeptr = nodes[0]; nodes[0] = nodes[pe_rod0];
Francois Gygi committed
583
  //  nodes[pe_rod0]=tmpnodeptr;
584

Francois Gygi committed
585 586 587 588 589
  // reorder rods on node 0 so that rod(0,0,0) comes first
  swap(rod_h_[0][rank_rod0], rod_h_[0][0]);
  swap(rod_k_[0][rank_rod0], rod_k_[0][0]);
  swap(rod_lmin_[0][rank_rod0], rod_lmin_[0][0]);
  swap(rod_size_[0][rank_rod0], rod_size_[0][0]);
590

Francois Gygi committed
591
  // compute position of first element of rod (ipe,irod)
592
  for ( int ipe = 0; ipe < npes_; ipe++ )
Francois Gygi committed
593 594 595 596 597 598 599 600 601
  {
    rod_first_[ipe].resize(nrod_loc_[ipe]);
    if ( nrod_loc_[ipe] > 0 )
      rod_first_[ipe][0] = 0;
    for ( int irod = 1; irod < nrod_loc_[ipe]; irod++ )
    {
      rod_first_[ipe][irod] = rod_first_[ipe][irod-1] + rod_size_[ipe][irod-1];
    }
  }
602

Francois Gygi committed
603
  // local arrays idx, g, gi, g2i, g2, gx
604
  idx_.resize(3*localsize_[mype_]);
Francois Gygi committed
605
  int i = 0;
606
  for ( int irod = 0; irod < nrod_loc_[mype_]; irod++ )
Francois Gygi committed
607
  {
608
    for ( int l = 0; l < rod_size_[mype_][irod]; l++ )
Francois Gygi committed
609
    {
610 611 612
      idx_[3*i]   = rod_h_[mype_][irod];
      idx_[3*i+1] = rod_k_[mype_][irod];
      idx_[3*i+2] = rod_lmin_[mype_][irod] + l;
613

Francois Gygi committed
614 615 616
      i++;
    }
  }
617

618 619 620 621 622 623 624 625 626 627 628
  g_.resize(localsize_[mype_]);
  kpg_.resize(localsize_[mype_]);
  gi_.resize(localsize_[mype_]);
  kpgi_.resize(localsize_[mype_]);
  g2_.resize(localsize_[mype_]);
  kpg2_.resize(localsize_[mype_]);
  g2i_.resize(localsize_[mype_]);
  kpg2i_.resize(localsize_[mype_]);
  gx_.resize(3*localsize_[mype_]);
  kpgx_.resize(3*localsize_[mype_]);
  isort_loc.resize(localsize_[mype_]);
629

Francois Gygi committed
630
  update_g();
631

Francois Gygi committed
632
  // basis set construction is complete
633

Francois Gygi committed
634 635 636 637
  return true;
}

////////////////////////////////////////////////////////////////////////////////
Francois Gygi committed
638
void Basis::update_g(void)
Francois Gygi committed
639
{
Francois Gygi committed
640
  // compute the values of g, kpg, gi, g2i, g2, kpg2, gx
Francois Gygi committed
641
  // N.B. use the values of cell (not defcell)
642

643
  const int locsize = localsize_[mype_];
Francois Gygi committed
644 645 646 647 648
  for ( int i = 0; i < locsize; i++ )
  {
    D3vector gt = idx_[3*i+0] * cell_.b(0) +
                  idx_[3*i+1] * cell_.b(1) +
                  idx_[3*i+2] * cell_.b(2);
649

Francois Gygi committed
650 651 652
    D3vector kpgt = (kpoint_.x + idx_[3*i+0]) * cell_.b(0) +
                    (kpoint_.y + idx_[3*i+1]) * cell_.b(1) +
                    (kpoint_.z + idx_[3*i+2]) * cell_.b(2);
653

Francois Gygi committed
654 655 656
    gx_[i] = gt.x;
    gx_[locsize+i] = gt.y;
    gx_[locsize+locsize+i] = gt.z;
Francois Gygi committed
657 658 659
    kpgx_[i] = kpgt.x;
    kpgx_[locsize+i] = kpgt.y;
    kpgx_[locsize+locsize+i] = kpgt.z;
Francois Gygi committed
660

661
    g2_[i] = norm2(gt);
Francois Gygi committed
662
    g_[i] = sqrt( g2_[i] );
663

664
    kpg2_[i] = norm2(kpgt);
Francois Gygi committed
665
    kpg_[i] = sqrt( kpg2_[i] );
666

Francois Gygi committed
667
    gi_[i] = g_[i] > 0.0 ? 1.0 / g_[i] : 0.0;
Francois Gygi committed
668
    kpgi_[i] = kpg_[i] > 0.0 ? 1.0 / kpg_[i] : 0.0;
Francois Gygi committed
669
    g2i_[i] = gi_[i] * gi_[i];
Francois Gygi committed
670
    kpg2i_[i] = kpgi_[i] * kpgi_[i];
Francois Gygi committed
671 672
    isort_loc[i] = i;
  }
673

Francois Gygi committed
674 675 676 677 678
  VectorLess<double> g2_less(g2_);
  sort(isort_loc.begin(), isort_loc.end(), g2_less);
#if DEBUG
  for ( int i = 0; i < locsize; i++ )
  {
679
    cout << mype_ << " sorted " << i << " " << g2_[isort_loc[i]] << endl;
Francois Gygi committed
680 681 682 683 684 685 686
  }
#endif
}

////////////////////////////////////////////////////////////////////////////////
void Basis::print(ostream& os)
{
687
  os << mype_ << ": ";
Francois Gygi committed
688
  os << " Basis.kpoint():   " << kpoint() << endl;
689
  os << mype_ << ": ";
Francois Gygi committed
690 691 692
  os << " Basis.kpoint():   " << kpoint().x << " * b0 + "
                              << kpoint().y << " * b1 + "
                              << kpoint().z << " * b2" << endl;
693
  os << mype_ << ": ";
694
  os << " Basis.kpoint():   " << kpoint().x * cell().b(0) +
Francois Gygi committed
695 696
                                 kpoint().y * cell().b(1) +
                                 kpoint().z * cell().b(2) << endl;
697
  os << mype_ << ": ";
Francois Gygi committed
698
  os << " Basis.cell():     " << endl << cell() << endl;
699
  os << mype_ << ": ";
Francois Gygi committed
700
  os << " Basis.ref cell(): " << endl << refcell() << endl;
701
  os << mype_ << ": ";
Francois Gygi committed
702
  os << " Basis.ecut():     " << ecut() << endl;
703
  os << mype_ << ": ";
Francois Gygi committed
704 705
  os << " Basis.np(0,1,2):  " << np(0) << " "
       << np(1) << " " << np(2) << endl;
706
  os << mype_ << ": ";
Francois Gygi committed
707 708
  os << " Basis.idxmin:       " << idxmin(0) << " "
       << idxmin(1) << " " << idxmin(2) << endl;
709
  os << mype_ << ": ";
Francois Gygi committed
710 711
  os << " Basis.idxmax:       " << idxmax(0) << " "
       << idxmax(1) << " " << idxmax(2) << endl;
712
  os << mype_ << ": ";
Francois Gygi committed
713
  os << " Basis.size():     " << size() << endl;
714
  os << mype_ << ": ";
Francois Gygi committed
715
  os << " Basis.localsize(): " << localsize() << endl;
716
  os << mype_ << ": ";
Francois Gygi committed
717
  os << " Basis.nrods():    " << nrods() << endl;
718
  os << mype_ << ": ";
Francois Gygi committed
719
  os << " Basis.real():     " << real() << endl;
720
  os << mype_ << ": ";
Francois Gygi committed
721
  os << " Basis total mem size (MB): " << memsize() / 1048576 << endl;
722
  os << mype_ << ": ";
Francois Gygi committed
723
  os << " Basis local mem size (MB): " << localmemsize() / 1048576 << endl;
724

725
  os << mype_ << ": ";
726
  os << "   ig      i   j   k        gx      gy      gz       |k+g|^2"
Francois Gygi committed
727
     << endl;
728
  os << mype_ << ": ";
729
  os << "   --      -   -   -        --      --      --       -------"
Francois Gygi committed
730 731 732
     << endl;
  for ( int i = 0; i < localsize(); i++ )
  {
733
    os << mype_ << ": ";
734 735 736
    os << setw(5) << i << "   "
       << setw(4) << idx(3*i)
       << setw(4) << idx(3*i+1)
Francois Gygi committed
737 738
       << setw(4) << idx(3*i+2)
       << "    "
Francois Gygi committed
739 740 741
       << setw(8) << setprecision(4) << gx(i)
       << setw(8) << setprecision(4) << gx(i+localsize())
       << setw(8) << setprecision(4) << gx(i+2*localsize())
Francois Gygi committed
742 743 744 745 746 747 748 749 750 751 752
       << setw(12) << setprecision(4) << 0.5 * kpg2(i)
       << endl;
  }
}

////////////////////////////////////////////////////////////////////////////////
ostream& operator<<(ostream& os, Basis& b)
{
  b.print(os);
  return os;
}