testFourierTransform.C 17 KB
Newer Older
Francois Gygi committed
1 2 3 4 5 6
////////////////////////////////////////////////////////////////////////////////
//
// Copyright (c) 2008 The Regents of the University of California
//
// This file is part of Qbox
//
Francois Gygi committed
7 8
// Qbox is distributed under the terms of the GNU General Public License
// as published by the Free Software Foundation, either version 2 of
Francois Gygi committed
9 10 11 12
// the License, or (at your option) any later version.
// See the file COPYING in the root directory of this distribution
// or <http://www.gnu.org/licenses/>.
//
Francois Gygi committed
13 14 15
//
// testFourierTransform.C
//
16 17 18 19
// test and timing of the Qbox FourierTransform class
//
// The FourierTransform functionality is tested in the following 8 tests
// relevant to the use of the class in different parts of Qbox
Francois Gygi committed
20 21 22

#include <iostream>
#include <iomanip>
23
#include <cstdlib>
Francois Gygi committed
24 25 26 27 28 29
using namespace std;

#include "Basis.h"
#include "FourierTransform.h"
#include "Timer.h"

Francois Gygi committed
30 31 32 33
#if USE_APC
#include "apc.h"
#endif

34
double fft_flops(int n)
35
{
36
  return 5.0 * n * log((double) n) / log(2.0);
37 38
}

Francois Gygi committed
39 40 41 42 43
int main(int argc, char **argv)
{
  Timer tm;
#if USE_MPI
  MPI_Init(&argc,&argv);
Francois Gygi committed
44 45 46
#endif
#if USE_APC
  ApcInit();
Francois Gygi committed
47 48
#endif
  {
49

50
#if USE_MPI
Francois Gygi committed
51 52 53
  char processor_name[MPI_MAX_PROCESSOR_NAME];
  int namelen;
  PMPI_Get_processor_name(processor_name,&namelen);
54 55 56
#else
  const char* processor_name = "serial";
#endif
Francois Gygi committed
57

58 59
  int mype;
  MPI_Comm_rank(MPI_COMM_WORLD,&mype);
60
  cout << " Process " << mype << " on " << processor_name << endl;
Francois Gygi committed
61

62 63 64
  D3vector a,b,c,kpoint;
  double ecut;
  if ( argc == 5 )
Francois Gygi committed
65
  {
66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88
    a = D3vector(atof(argv[1]),0,0);
    b = D3vector(0,atof(argv[2]),0);
    c = D3vector(0,0,atof(argv[3]));
    ecut = atof(argv[4]);
  }
  else if ( argc == 11 )
  {
    a = D3vector(atof(argv[1]),atof(argv[2]),atof(argv[3]));
    b = D3vector(atof(argv[4]),atof(argv[5]),atof(argv[6]));
    c = D3vector(atof(argv[7]),atof(argv[8]),atof(argv[9]));
    ecut = atof(argv[10]);
  }
  else if ( argc == 14 )
  {
    a = D3vector(atof(argv[1]),atof(argv[2]),atof(argv[3]));
    b = D3vector(atof(argv[4]),atof(argv[5]),atof(argv[6]));
    c = D3vector(atof(argv[7]),atof(argv[8]),atof(argv[9]));
    ecut = atof(argv[10]);
    kpoint = D3vector(atof(argv[11]),atof(argv[12]),atof(argv[13]));
  }
  else
  {
    cout << " use: testFourierTransform a b c ecut(a.u.) [kpoint] " << endl;
89
    return 1;
Francois Gygi committed
90 91 92
  }
  UnitCell cell(a,b,c);
  const double omega = cell.volume();
93

Francois Gygi committed
94 95
  // start scope of wf-v transforms
  {
96 97
  // transform and interpolate as for wavefunctions

98
  Basis basis(MPI_COMM_WORLD,kpoint);
Francois Gygi committed
99
  basis.resize(cell,cell,ecut);
100
  FourierTransform ft(basis,basis.np(0),basis.np(1),basis.np(2));
101
  FourierTransform ft2(basis,2*basis.np(0),2*basis.np(1),2*basis.np(2));
102
  vector<complex<double> > f1(ft.np012loc());
103
  vector<complex<double> > f2(ft2.np012loc());
Francois Gygi committed
104
  vector<complex<double> > x(basis.localsize());
105 106
  vector<complex<double> > x1(basis.localsize());
  vector<complex<double> > x2(basis.localsize());
107 108 109 110

  double flops = 2*basis.nrod_loc() *      fft_flops(ft2.np2()) +
                 ft2.np1()/2 * ft2.np2() * fft_flops(ft2.np0()) +
                 ft2.np0()   * ft2.np2() * fft_flops(ft2.np1());
111
  if ( mype == 0 )
Francois Gygi committed
112
  {
113 114
    cout << " wfbasis.size() = " << basis.size() << endl;
    cout << " wfbasis.np() = " << basis.np(0) << " " << basis.np(1)
Francois Gygi committed
115
         << " " << basis.np(2) << endl;
116
    cout << " flop count: " << flops << endl;
Francois Gygi committed
117
  }
118
  cout << " wfbasis.nrod_loc(): " << basis.nrod_loc() << endl;
119
  cout << " zvec.size: "
120 121 122
       << 2*basis.nrod_loc()*ft2.np2() * sizeof(complex<double>)
       << endl;

Francois Gygi committed
123 124 125
  cout.setf(ios::fixed,ios::floatfield);
  cout.setf(ios::right,ios::adjustfield);
  cout << setprecision(6);
126

Francois Gygi committed
127 128 129 130 131 132 133 134
  const double rc = 1.0;
#if 1
  // Initialize with Fourier coefficients of a normalized gaussian distribution
  for ( int i = 0; i < basis.localsize(); i++ )
  {
    double g2 = basis.g2(i);
    double y = 1.0/omega * exp( -0.25 * g2 * rc*rc );
    x[i] = y;
135 136
    x1[i] = y;
    x2[i] = y;
Francois Gygi committed
137 138 139 140
    // x[i] = complex<double>(y,y);
  }
#endif

141
  // test ft (small grid)
142
  cout << mype << ": ft.np2_loc(): " << ft.np2_loc() << endl;
143
  MPI_Barrier(MPI_COMM_WORLD);
Francois Gygi committed
144
  cout << " test ft: ";
145 146 147 148
  ft.forward(&f1[0],&x[0]);
  cout << " forward done ";
  ft.backward(&x[0],&f1[0]);
  cout << " backward done " << endl;
149
  MPI_Barrier(MPI_COMM_WORLD);
150

151
#if 1
152

153
  MPI_Barrier(MPI_COMM_WORLD);
154 155 156
  tm.reset();
  ft2.reset_timers();
  tm.start();
Francois Gygi committed
157 158 159
#if USE_APC
  ApcStart(1);
#endif
160
  ft2.forward(&f2[0],&x[0]);
Francois Gygi committed
161 162 163
#if USE_APC
  ApcStop(1);
#endif
164
  tm.stop();
165 166
  cout << " fwd1: size: " << ft2.np0() << " "
       << ft2.np1() << " " << ft2.np2() << endl;
Francois Gygi committed
167
  cout << " fwd1: vgrid->wf" << endl;
168 169 170 171 172 173 174 175 176 177 178 179 180 181 182
  cout << " fwd1: tm_f_fft:    " << ft2.tm_f_fft.real() << endl;
  cout << " fwd1: tm_f_mpi:    " << ft2.tm_f_mpi.real() << endl;
  cout << " fwd1: tm_f_pack:   " << ft2.tm_f_pack.real() << endl;
  cout << " fwd1: tm_f_unpack: " << ft2.tm_f_unpack.real() << endl;
  cout << " fwd1: tm_f_zero:   " << ft2.tm_f_zero.real() << endl;
  cout << " fwd1: tm_f_map:    " << ft2.tm_f_map.real() << endl;
  cout << " fwd1: tm_f_total:  " << ft2.tm_f_fft.real() +
                                    ft2.tm_f_mpi.real() +
                                    ft2.tm_f_pack.real() +
                                    ft2.tm_f_unpack.real() +
                                    ft2.tm_f_zero.real() +
                                    ft2.tm_f_map.real() << endl;
  cout << " fwd1 time: " << tm.cpu() << " / " << tm.real()
  << "    " << 1.e-6*flops/tm.real() << " MFlops" << endl;

183
  MPI_Barrier(MPI_COMM_WORLD);
184 185 186
  tm.reset();
  ft2.reset_timers();
  tm.start();
Francois Gygi committed
187 188 189
#if USE_APC
  ApcStart(2);
#endif
190
  ft2.backward(&x[0],&f2[0]);
Francois Gygi committed
191 192 193
#if USE_APC
  ApcStop(2);
#endif
194
  tm.stop();
195 196
  cout << " bwd1: size: " << ft2.np0() << " "
       << ft2.np1() << " " << ft2.np2() << endl;
Francois Gygi committed
197
  cout << " bwd1: wf->vgrid" << endl;
198 199 200 201 202 203 204 205 206 207 208 209 210 211
  cout << " bwd1: tm_b_fft:    " << ft2.tm_b_fft.real() << endl;
  cout << " bwd1: tm_b_mpi:    " << ft2.tm_b_mpi.real() << endl;
  cout << " bwd1: tm_b_pack:   " << ft2.tm_b_pack.real() << endl;
  cout << " bwd1: tm_b_unpack: " << ft2.tm_b_unpack.real() << endl;
  cout << " bwd1: tm_b_zero:   " << ft2.tm_b_zero.real() << endl;
  cout << " bwd1: tm_b_map:    " << ft2.tm_b_map.real() << endl;
  cout << " bwd1: tm_b_total:  " << ft2.tm_b_fft.real() +
                                    ft2.tm_b_mpi.real() +
                                    ft2.tm_b_pack.real() +
                                    ft2.tm_b_unpack.real() +
                                    ft2.tm_b_zero.real() +
                                    ft2.tm_b_map.real() << endl;
  cout << " bwd1 time: " << tm.cpu() << " / " << tm.real()
  << "    " << 1.e-6*flops/tm.real() << " MFlops" << endl;
212

213
  MPI_Barrier(MPI_COMM_WORLD);
214 215 216
  tm.reset();
  ft2.reset_timers();
  tm.start();
Francois Gygi committed
217 218 219
#if USE_APC
  ApcStart(3);
#endif
220
  ft2.forward(&f2[0],&x[0]);
Francois Gygi committed
221 222 223
#if USE_APC
  ApcStop(3);
#endif
224
  tm.stop();
225 226
  cout << " fwd2: size: " << ft2.np0() << " "
       << ft2.np1() << " " << ft2.np2() << endl;
Francois Gygi committed
227
  cout << " fwd2: vgrid->wf" << endl;
228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245
  cout << " fwd2: tm_f_fft:    " << ft2.tm_f_fft.real() << endl;
  cout << " fwd2: tm_f_mpi:    " << ft2.tm_f_mpi.real() << endl;
  cout << " fwd2: tm_f_pack:   " << ft2.tm_f_pack.real() << endl;
  cout << " fwd2: tm_f_unpack: " << ft2.tm_f_unpack.real() << endl;
  cout << " fwd2: tm_f_zero:   " << ft2.tm_f_zero.real() << endl;
  cout << " fwd2: tm_f_map:    " << ft2.tm_f_map.real() << endl;
  cout << " fwd2: tm_f_total:  " << ft2.tm_f_fft.real() +
                                    ft2.tm_f_mpi.real() +
                                    ft2.tm_f_pack.real() +
                                    ft2.tm_f_unpack.real() +
                                    ft2.tm_f_zero.real() +
                                    ft2.tm_f_map.real() << endl;

  //cout << " " << 2*basis.np(0) << " " << 2*basis.np(1)
  //     << " " << 2*basis.np(2) << " ";
  cout << " fwd2 time: " << tm.cpu() << " / " << tm.real()
  << "    " << 1.e-6*flops/tm.real() << " MFlops" << endl;

246
  MPI_Barrier(MPI_COMM_WORLD);
247 248 249
  tm.reset();
  ft2.reset_timers();
  tm.start();
Francois Gygi committed
250 251 252
#if USE_APC
  ApcStart(4);
#endif
253
  ft2.backward(&x[0],&f2[0]);
Francois Gygi committed
254 255 256
#if USE_APC
  ApcStop(4);
#endif
257
  tm.stop();
258 259
  cout << " bwd2: size: " << ft2.np0() << " "
       << ft2.np1() << " " << ft2.np2() << endl;
Francois Gygi committed
260
  cout << " bwd2: wf->vgrid" << endl;
261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277
  cout << " bwd2: tm_b_fft:    " << ft2.tm_b_fft.real() << endl;
  cout << " bwd2: tm_b_mpi:    " << ft2.tm_b_mpi.real() << endl;
  cout << " bwd2: tm_b_pack:   " << ft2.tm_b_pack.real() << endl;
  cout << " bwd2: tm_b_unpack: " << ft2.tm_b_unpack.real() << endl;
  cout << " bwd2: tm_b_zero:   " << ft2.tm_b_zero.real() << endl;
  cout << " bwd2: tm_b_map:    " << ft2.tm_b_map.real() << endl;
  cout << " bwd2: tm_b_total:  " << ft2.tm_b_fft.real() +
                                    ft2.tm_b_mpi.real() +
                                    ft2.tm_b_pack.real() +
                                    ft2.tm_b_unpack.real() +
                                    ft2.tm_b_zero.real() +
                                    ft2.tm_b_map.real() << endl;

  //cout << " " << 2*basis.np(0) << " " << 2*basis.np(1)
  //     << " " << 2*basis.np(2) << " ";
  cout << " bwd2 time: " << tm.cpu() << " / " << tm.real()
  << "    " << 1.e-6*flops/tm.real() << " MFlops" << endl;
278

279
  // double transform
280
  MPI_Barrier(MPI_COMM_WORLD);
281 282 283
  tm.reset();
  ft2.reset_timers();
  tm.start();
Francois Gygi committed
284 285 286
#if USE_APC
  ApcStart(5);
#endif
287
  ft2.forward(&f2[0],&x1[0],&x2[0]);
Francois Gygi committed
288 289 290
#if USE_APC
  ApcStop(5);
#endif
291
  tm.stop();
292 293
  cout << " fwd3: size: " << ft2.np0() << " "
       << ft2.np1() << " " << ft2.np2() << endl;
Francois Gygi committed
294
  cout << " fwd3: vgrid->wf double transform" << endl;
295 296 297 298 299 300 301 302 303 304 305 306 307 308 309
  cout << " fwd3: tm_f_fft:    " << ft2.tm_f_fft.real() << endl;
  cout << " fwd3: tm_f_mpi:    " << ft2.tm_f_mpi.real() << endl;
  cout << " fwd3: tm_f_pack:   " << ft2.tm_f_pack.real() << endl;
  cout << " fwd3: tm_f_unpack: " << ft2.tm_f_unpack.real() << endl;
  cout << " fwd3: tm_f_zero:   " << ft2.tm_f_zero.real() << endl;
  cout << " fwd3: tm_f_map:    " << ft2.tm_f_map.real() << endl;
  cout << " fwd3: tm_f_total:  " << ft2.tm_f_fft.real() +
                                    ft2.tm_f_mpi.real() +
                                    ft2.tm_f_pack.real() +
                                    ft2.tm_f_unpack.real() +
                                    ft2.tm_f_zero.real() +
                                    ft2.tm_f_map.real() << endl;
  cout << " fwd3 time: " << tm.cpu() << " / " << tm.real()
  << "    " << 1.e-6*flops/tm.real() << " MFlops" << endl;

310
  MPI_Barrier(MPI_COMM_WORLD);
311 312 313
  tm.reset();
  ft2.reset_timers();
  tm.start();
Francois Gygi committed
314 315 316
#if USE_APC
  ApcStart(6);
#endif
317
  ft2.backward(&x1[0],&x2[0],&f2[0]);
Francois Gygi committed
318 319 320
#if USE_APC
  ApcStop(6);
#endif
321
  tm.stop();
322 323
  cout << " bwd3: size: " << ft2.np0() << " "
       << ft2.np1() << " " << ft2.np2() << endl;
Francois Gygi committed
324
  cout << " bwd3: wf->vgrid double transform" << endl;
325 326 327 328 329 330 331 332 333 334 335 336 337 338
  cout << " bwd3: tm_b_fft:    " << ft2.tm_b_fft.real() << endl;
  cout << " bwd3: tm_b_mpi:    " << ft2.tm_b_mpi.real() << endl;
  cout << " bwd3: tm_b_pack:   " << ft2.tm_b_pack.real() << endl;
  cout << " bwd3: tm_b_unpack: " << ft2.tm_b_unpack.real() << endl;
  cout << " bwd3: tm_b_zero:   " << ft2.tm_b_zero.real() << endl;
  cout << " bwd3: tm_b_map:    " << ft2.tm_b_map.real() << endl;
  cout << " bwd3: tm_b_total:  " << ft2.tm_b_fft.real() +
                                    ft2.tm_b_mpi.real() +
                                    ft2.tm_b_pack.real() +
                                    ft2.tm_b_unpack.real() +
                                    ft2.tm_b_zero.real() +
                                    ft2.tm_b_map.real() << endl;
  cout << " bwd3 time: " << tm.cpu() << " / " << tm.real()
  << "    " << 1.e-6*flops/tm.real() << " MFlops" << endl;
339

340
#endif
Francois Gygi committed
341
  } // end of scope for wf-v transforms
342

343
#if 1
Francois Gygi committed
344 345
  ////////////////////////////////////////////////////////////
  // v(g)->vgrid
346
  Basis vbasis(MPI_COMM_WORLD,kpoint);
Francois Gygi committed
347 348 349 350 351 352
  vbasis.resize(cell,cell,4.0*ecut);
  cout << " vbasis.np() = " << vbasis.np(0) << " " << vbasis.np(1)
       << " " << vbasis.np(2) << endl;
  FourierTransform vft(vbasis,vbasis.np(0),vbasis.np(1),vbasis.np(2));
  vector<complex<double> > vf(vft.np012loc());
  vector<complex<double> > vg(vbasis.localsize());
353

Francois Gygi committed
354 355 356
  double vflops = 2*vbasis.nrod_loc() *      fft_flops(vft.np2()) +
                   vft.np1()   * vft.np2() * fft_flops(vft.np0()) +
                   vft.np0()   * vft.np2() * fft_flops(vft.np1());
357
  MPI_Barrier(MPI_COMM_WORLD);
Francois Gygi committed
358 359 360
  tm.reset();
  vft.reset_timers();
  tm.start();
Francois Gygi committed
361 362 363
#if USE_APC
  ApcStart(7);
#endif
Francois Gygi committed
364
  vft.forward(&vf[0],&vg[0]);
Francois Gygi committed
365 366 367
#if USE_APC
  ApcStop(7);
#endif
Francois Gygi committed
368
  tm.stop();
369 370
  cout << " fwd4: size: " << vft.np0() << " "
       << vft.np1() << " " << vft.np2() << endl;
Francois Gygi committed
371
  cout << " fwd4: vgrid->v(g)" << endl;
372 373 374 375 376 377 378 379 380 381 382 383
  cout << " fwd4: tm_f_fft:    " << vft.tm_f_fft.real() << endl;
  cout << " fwd4: tm_f_mpi:    " << vft.tm_f_mpi.real() << endl;
  cout << " fwd4: tm_f_pack:   " << vft.tm_f_pack.real() << endl;
  cout << " fwd4: tm_f_unpack: " << vft.tm_f_unpack.real() << endl;
  cout << " fwd4: tm_f_zero:   " << vft.tm_f_zero.real() << endl;
  cout << " fwd4: tm_f_map:    " << vft.tm_f_map.real() << endl;
  cout << " fwd4: tm_f_total:  " << vft.tm_f_fft.real() +
                                    vft.tm_f_mpi.real() +
                                    vft.tm_f_pack.real() +
                                    vft.tm_f_unpack.real() +
                                    vft.tm_f_zero.real() +
                                    vft.tm_f_map.real() << endl;
Francois Gygi committed
384 385
  cout << " fwd4 time: " << tm.cpu() << " / " << tm.real()
  << "    " << 1.e-6*vflops/tm.real() << " MFlops" << endl;
386

387
  MPI_Barrier(MPI_COMM_WORLD);
Francois Gygi committed
388 389 390
  tm.reset();
  vft.reset_timers();
  tm.start();
Francois Gygi committed
391 392 393
#if USE_APC
  ApcStart(8);
#endif
Francois Gygi committed
394
  vft.backward(&vg[0],&vf[0]);
Francois Gygi committed
395 396 397
#if USE_APC
  ApcStop(8);
#endif
Francois Gygi committed
398
  tm.stop();
399 400
  cout << " bwd4: size: " << vft.np0() << " "
       << vft.np1() << " " << vft.np2() << endl;
Francois Gygi committed
401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416
  cout << " bwd4: v(g)->vgrid" << endl;
  cout << " bwd4: tm_b_fft:    " << vft.tm_b_fft.real() << endl;
  cout << " bwd4: tm_b_mpi:    " << vft.tm_b_mpi.real() << endl;
  cout << " bwd4: tm_b_pack:   " << vft.tm_b_pack.real() << endl;
  cout << " bwd4: tm_b_unpack: " << vft.tm_b_unpack.real() << endl;
  cout << " bwd4: tm_b_zero:   " << vft.tm_b_zero.real() << endl;
  cout << " bwd4: tm_b_map:    " << vft.tm_b_map.real() << endl;
  cout << " bwd4: tm_b_total:  " << vft.tm_b_fft.real() +
                                    vft.tm_b_mpi.real() +
                                    vft.tm_b_pack.real() +
                                    vft.tm_b_unpack.real() +
                                    vft.tm_b_zero.real() +
                                    vft.tm_b_map.real() << endl;
  cout << " bwd4 time: " << tm.cpu() << " / " << tm.real()
  << "    " << 1.e-6*vflops/tm.real() << " MFlops" << endl;
#if 0
Francois Gygi committed
417 418 419
  //////////////////////////////////////////////////////////////////////////////
  // Integration of a 2-norm normalized plane wave
  //////////////////////////////////////////////////////////////////////////////
420

Francois Gygi committed
421 422 423 424 425
  for ( int i = 0; i < basis.localsize(); i++ )
  {
    x[i] = 0.0;
  }
  if ( ctxt.myproc() == 0 ) x[1] = 1.0/sqrt(2.0);
426

427
  ft2.backward(&x[0],&f2[0]);
Francois Gygi committed
428 429 430 431 432 433 434 435 436 437 438 439

#if 0
  for ( int i = 0; i < basis.localsize(); i++ )
    cout << basis.kv(3*i) << " " << basis.kv(3*i+1) << " " << basis.kv(3*i+2)
         << "     " << x[i] << endl;
  for ( int i = 0; i < ft.np0(); i++ )
    for ( int j = 0; j < ft.np1(); j++ )
      for ( int k = 0; k < ft.np2_loc(); k++ )
        cout << mype << ": "
             << i << " " << j << " " << k+ft.np2_first() << " "
             << f[ft.index(i,j,k)] << endl;
#endif
440

Francois Gygi committed
441
#if 0
Francois Gygi committed
442
  // integral of f^2 in r space must be 1.0
443
  double sum=0.0, tsum = 0.0;
444
  for ( int i = 0; i < f2.size(); i++ )
445
    tsum += norm(f2[i]);
Francois Gygi committed
446
  MPI_Allreduce(&tsum,&sum,1,MPI_DOUBLE,MPI_SUM,ctxt.comm());
447

448
  cout << " sum pw^2: " << sum / ft2.np012() << endl;
449

Francois Gygi committed
450 451 452 453 454 455
  //////////////////////////////////////////////////////////////////////////////
  // Integration of a 2-norm normalized gaussian
  //////////////////////////////////////////////////////////////////////////////
  for ( int i = 0; i < basis.localsize(); i++ )
  {
    double g2 = basis.g2(i);
456
    x[i] = 1.0 / sqrt(omega) * pow(2.0*M_PI*rc*rc,0.75) *
Francois Gygi committed
457 458
           exp( -0.25 * g2 * rc*rc );
  }
Francois Gygi committed
459
#endif
460

Francois Gygi committed
461
#if 0
462 463 464 465 466 467 468 469
  // Compute norm in g space
  double gnorm = 0.0;
  for ( int i = 0; i < basis.localsize(); i++ )
    gnorm += 2.0 * norm(x[i]);
  if ( ctxt.onpe0() )
    gnorm -= norm(x[0]);
  ctxt.dsum(1,1,&gnorm,1);
  cout << " gaussian gnorm: " << gnorm << endl;
470

Francois Gygi committed
471
  ft2.backward(&x[0],&f2[0]);
Francois Gygi committed
472
#endif
473

Francois Gygi committed
474 475 476 477 478 479 480 481 482
//   for ( int i = 0; i < basis.localsize(); i++ )
//     cout << basis.kv(3*i) << " " << basis.kv(3*i+1) << " " << basis.kv(3*i+2)
//          << "     " << x[i] << endl;
//   for ( int i = 0; i < ft2.np0(); i++ )
//     for ( int j = 0; j < ft2.np1(); j++ )
//       for ( int k = 0; k < ft2.np2_loc(); k++ )
//         cout << mype << ": "
//              << i << " " << j << " " << k+ft2.np2_first() << " "
//              << f2[ft2.index(i,j,k)] << endl;
483

Francois Gygi committed
484 485
  // integral of gaussian^2 in r space must be 1.0
  tsum = 0.0;
486
  for ( int i = 0; i < f2.size(); i++ )
487
    tsum += norm(f2[i]);
Francois Gygi committed
488
  MPI_Allreduce(&tsum,&sum,1,MPI_DOUBLE,MPI_SUM,ctxt.comm());
489

490
  cout << " gaussian rnorm: " << sum / ft2.np012() << endl;
Francois Gygi committed
491
#endif
492
#endif
493

494
  } // Context scope
Francois Gygi committed
495 496 497
#if USE_APC
  ApcFinalize();
#endif
498 499
#if USE_MPI
  MPI_Finalize();
Francois Gygi committed
500
#endif
501 502
  return 0;
}