testFourierTransform.C 16.4 KB
Newer Older
Francois Gygi committed
1 2 3 4 5 6
////////////////////////////////////////////////////////////////////////////////
//
// Copyright (c) 2008 The Regents of the University of California
//
// This file is part of Qbox
//
Francois Gygi committed
7 8
// Qbox is distributed under the terms of the GNU General Public License
// as published by the Free Software Foundation, either version 2 of
Francois Gygi committed
9 10 11 12
// the License, or (at your option) any later version.
// See the file COPYING in the root directory of this distribution
// or <http://www.gnu.org/licenses/>.
//
Francois Gygi committed
13 14 15
//
// testFourierTransform.C
//
16 17 18 19
// test and timing of the Qbox FourierTransform class
//
// The FourierTransform functionality is tested in the following 8 tests
// relevant to the use of the class in different parts of Qbox
Francois Gygi committed
20 21 22

#include <iostream>
#include <iomanip>
23
#include <cstdlib>
Francois Gygi committed
24 25 26 27 28 29
using namespace std;

#include "Basis.h"
#include "FourierTransform.h"
#include "Timer.h"

30
double fft_flops(int n)
31
{
32
  return 5.0 * n * log((double) n) / log(2.0);
33 34
}

Francois Gygi committed
35 36 37 38 39 40 41
int main(int argc, char **argv)
{
  Timer tm;
#if USE_MPI
  MPI_Init(&argc,&argv);
#endif
  {
42

43
#if USE_MPI
Francois Gygi committed
44 45 46
  char processor_name[MPI_MAX_PROCESSOR_NAME];
  int namelen;
  PMPI_Get_processor_name(processor_name,&namelen);
47 48 49
#else
  const char* processor_name = "serial";
#endif
Francois Gygi committed
50

51 52
  int mype;
  MPI_Comm_rank(MPI_COMM_WORLD,&mype);
53
  cout << " Process " << mype << " on " << processor_name << endl;
Francois Gygi committed
54

55 56 57
  D3vector a,b,c,kpoint;
  double ecut;
  if ( argc == 5 )
Francois Gygi committed
58
  {
59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81
    a = D3vector(atof(argv[1]),0,0);
    b = D3vector(0,atof(argv[2]),0);
    c = D3vector(0,0,atof(argv[3]));
    ecut = atof(argv[4]);
  }
  else if ( argc == 11 )
  {
    a = D3vector(atof(argv[1]),atof(argv[2]),atof(argv[3]));
    b = D3vector(atof(argv[4]),atof(argv[5]),atof(argv[6]));
    c = D3vector(atof(argv[7]),atof(argv[8]),atof(argv[9]));
    ecut = atof(argv[10]);
  }
  else if ( argc == 14 )
  {
    a = D3vector(atof(argv[1]),atof(argv[2]),atof(argv[3]));
    b = D3vector(atof(argv[4]),atof(argv[5]),atof(argv[6]));
    c = D3vector(atof(argv[7]),atof(argv[8]),atof(argv[9]));
    ecut = atof(argv[10]);
    kpoint = D3vector(atof(argv[11]),atof(argv[12]),atof(argv[13]));
  }
  else
  {
    cout << " use: testFourierTransform a b c ecut(a.u.) [kpoint] " << endl;
82
    return 1;
Francois Gygi committed
83 84 85
  }
  UnitCell cell(a,b,c);
  const double omega = cell.volume();
86

Francois Gygi committed
87 88
  // start scope of wf-v transforms
  {
89 90
  // transform and interpolate as for wavefunctions

91
  Basis basis(MPI_COMM_WORLD,kpoint);
Francois Gygi committed
92
  basis.resize(cell,cell,ecut);
93
  FourierTransform ft(basis,basis.np(0),basis.np(1),basis.np(2));
94
  FourierTransform ft2(basis,2*basis.np(0),2*basis.np(1),2*basis.np(2));
95
  vector<complex<double> > f1(ft.np012loc());
96
  vector<complex<double> > f2(ft2.np012loc());
Francois Gygi committed
97
  vector<complex<double> > x(basis.localsize());
98 99
  vector<complex<double> > x1(basis.localsize());
  vector<complex<double> > x2(basis.localsize());
100 101 102 103

  double flops = 2*basis.nrod_loc() *      fft_flops(ft2.np2()) +
                 ft2.np1()/2 * ft2.np2() * fft_flops(ft2.np0()) +
                 ft2.np0()   * ft2.np2() * fft_flops(ft2.np1());
104
  if ( mype == 0 )
Francois Gygi committed
105
  {
106 107
    cout << " wfbasis.size() = " << basis.size() << endl;
    cout << " wfbasis.np() = " << basis.np(0) << " " << basis.np(1)
Francois Gygi committed
108
         << " " << basis.np(2) << endl;
109
    cout << " flop count: " << flops << endl;
Francois Gygi committed
110
  }
111
  cout << " wfbasis.nrod_loc(): " << basis.nrod_loc() << endl;
112
  cout << " zvec.size: "
113 114 115
       << 2*basis.nrod_loc()*ft2.np2() * sizeof(complex<double>)
       << endl;

Francois Gygi committed
116 117 118
  cout.setf(ios::fixed,ios::floatfield);
  cout.setf(ios::right,ios::adjustfield);
  cout << setprecision(6);
119

Francois Gygi committed
120 121 122 123 124 125 126 127
  const double rc = 1.0;
#if 1
  // Initialize with Fourier coefficients of a normalized gaussian distribution
  for ( int i = 0; i < basis.localsize(); i++ )
  {
    double g2 = basis.g2(i);
    double y = 1.0/omega * exp( -0.25 * g2 * rc*rc );
    x[i] = y;
128 129
    x1[i] = y;
    x2[i] = y;
Francois Gygi committed
130 131 132 133
    // x[i] = complex<double>(y,y);
  }
#endif

134
  // test ft (small grid)
135
  cout << mype << ": ft.np2_loc(): " << ft.np2_loc() << endl;
136
  MPI_Barrier(MPI_COMM_WORLD);
Francois Gygi committed
137
  cout << " test ft: ";
138 139 140 141
  ft.forward(&f1[0],&x[0]);
  cout << " forward done ";
  ft.backward(&x[0],&f1[0]);
  cout << " backward done " << endl;
142
  MPI_Barrier(MPI_COMM_WORLD);
143

144
#if 1
145

146
  MPI_Barrier(MPI_COMM_WORLD);
147 148 149 150 151
  tm.reset();
  ft2.reset_timers();
  tm.start();
  ft2.forward(&f2[0],&x[0]);
  tm.stop();
152 153
  cout << " fwd1: size: " << ft2.np0() << " "
       << ft2.np1() << " " << ft2.np2() << endl;
Francois Gygi committed
154
  cout << " fwd1: vgrid->wf" << endl;
155 156 157 158 159 160 161 162 163 164 165 166 167 168 169
  cout << " fwd1: tm_f_fft:    " << ft2.tm_f_fft.real() << endl;
  cout << " fwd1: tm_f_mpi:    " << ft2.tm_f_mpi.real() << endl;
  cout << " fwd1: tm_f_pack:   " << ft2.tm_f_pack.real() << endl;
  cout << " fwd1: tm_f_unpack: " << ft2.tm_f_unpack.real() << endl;
  cout << " fwd1: tm_f_zero:   " << ft2.tm_f_zero.real() << endl;
  cout << " fwd1: tm_f_map:    " << ft2.tm_f_map.real() << endl;
  cout << " fwd1: tm_f_total:  " << ft2.tm_f_fft.real() +
                                    ft2.tm_f_mpi.real() +
                                    ft2.tm_f_pack.real() +
                                    ft2.tm_f_unpack.real() +
                                    ft2.tm_f_zero.real() +
                                    ft2.tm_f_map.real() << endl;
  cout << " fwd1 time: " << tm.cpu() << " / " << tm.real()
  << "    " << 1.e-6*flops/tm.real() << " MFlops" << endl;

170
  MPI_Barrier(MPI_COMM_WORLD);
171 172 173 174 175
  tm.reset();
  ft2.reset_timers();
  tm.start();
  ft2.backward(&x[0],&f2[0]);
  tm.stop();
176 177
  cout << " bwd1: size: " << ft2.np0() << " "
       << ft2.np1() << " " << ft2.np2() << endl;
Francois Gygi committed
178
  cout << " bwd1: wf->vgrid" << endl;
179 180 181 182 183 184 185 186 187 188 189 190 191 192
  cout << " bwd1: tm_b_fft:    " << ft2.tm_b_fft.real() << endl;
  cout << " bwd1: tm_b_mpi:    " << ft2.tm_b_mpi.real() << endl;
  cout << " bwd1: tm_b_pack:   " << ft2.tm_b_pack.real() << endl;
  cout << " bwd1: tm_b_unpack: " << ft2.tm_b_unpack.real() << endl;
  cout << " bwd1: tm_b_zero:   " << ft2.tm_b_zero.real() << endl;
  cout << " bwd1: tm_b_map:    " << ft2.tm_b_map.real() << endl;
  cout << " bwd1: tm_b_total:  " << ft2.tm_b_fft.real() +
                                    ft2.tm_b_mpi.real() +
                                    ft2.tm_b_pack.real() +
                                    ft2.tm_b_unpack.real() +
                                    ft2.tm_b_zero.real() +
                                    ft2.tm_b_map.real() << endl;
  cout << " bwd1 time: " << tm.cpu() << " / " << tm.real()
  << "    " << 1.e-6*flops/tm.real() << " MFlops" << endl;
193

194
  MPI_Barrier(MPI_COMM_WORLD);
195 196 197 198 199
  tm.reset();
  ft2.reset_timers();
  tm.start();
  ft2.forward(&f2[0],&x[0]);
  tm.stop();
200 201
  cout << " fwd2: size: " << ft2.np0() << " "
       << ft2.np1() << " " << ft2.np2() << endl;
Francois Gygi committed
202
  cout << " fwd2: vgrid->wf" << endl;
203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220
  cout << " fwd2: tm_f_fft:    " << ft2.tm_f_fft.real() << endl;
  cout << " fwd2: tm_f_mpi:    " << ft2.tm_f_mpi.real() << endl;
  cout << " fwd2: tm_f_pack:   " << ft2.tm_f_pack.real() << endl;
  cout << " fwd2: tm_f_unpack: " << ft2.tm_f_unpack.real() << endl;
  cout << " fwd2: tm_f_zero:   " << ft2.tm_f_zero.real() << endl;
  cout << " fwd2: tm_f_map:    " << ft2.tm_f_map.real() << endl;
  cout << " fwd2: tm_f_total:  " << ft2.tm_f_fft.real() +
                                    ft2.tm_f_mpi.real() +
                                    ft2.tm_f_pack.real() +
                                    ft2.tm_f_unpack.real() +
                                    ft2.tm_f_zero.real() +
                                    ft2.tm_f_map.real() << endl;

  //cout << " " << 2*basis.np(0) << " " << 2*basis.np(1)
  //     << " " << 2*basis.np(2) << " ";
  cout << " fwd2 time: " << tm.cpu() << " / " << tm.real()
  << "    " << 1.e-6*flops/tm.real() << " MFlops" << endl;

221
  MPI_Barrier(MPI_COMM_WORLD);
222 223 224 225 226
  tm.reset();
  ft2.reset_timers();
  tm.start();
  ft2.backward(&x[0],&f2[0]);
  tm.stop();
227 228
  cout << " bwd2: size: " << ft2.np0() << " "
       << ft2.np1() << " " << ft2.np2() << endl;
Francois Gygi committed
229
  cout << " bwd2: wf->vgrid" << endl;
230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246
  cout << " bwd2: tm_b_fft:    " << ft2.tm_b_fft.real() << endl;
  cout << " bwd2: tm_b_mpi:    " << ft2.tm_b_mpi.real() << endl;
  cout << " bwd2: tm_b_pack:   " << ft2.tm_b_pack.real() << endl;
  cout << " bwd2: tm_b_unpack: " << ft2.tm_b_unpack.real() << endl;
  cout << " bwd2: tm_b_zero:   " << ft2.tm_b_zero.real() << endl;
  cout << " bwd2: tm_b_map:    " << ft2.tm_b_map.real() << endl;
  cout << " bwd2: tm_b_total:  " << ft2.tm_b_fft.real() +
                                    ft2.tm_b_mpi.real() +
                                    ft2.tm_b_pack.real() +
                                    ft2.tm_b_unpack.real() +
                                    ft2.tm_b_zero.real() +
                                    ft2.tm_b_map.real() << endl;

  //cout << " " << 2*basis.np(0) << " " << 2*basis.np(1)
  //     << " " << 2*basis.np(2) << " ";
  cout << " bwd2 time: " << tm.cpu() << " / " << tm.real()
  << "    " << 1.e-6*flops/tm.real() << " MFlops" << endl;
247

248
  // double transform
249
  MPI_Barrier(MPI_COMM_WORLD);
250 251 252 253 254
  tm.reset();
  ft2.reset_timers();
  tm.start();
  ft2.forward(&f2[0],&x1[0],&x2[0]);
  tm.stop();
255 256
  cout << " fwd3: size: " << ft2.np0() << " "
       << ft2.np1() << " " << ft2.np2() << endl;
Francois Gygi committed
257
  cout << " fwd3: vgrid->wf double transform" << endl;
258 259 260 261 262 263 264 265 266 267 268 269 270 271 272
  cout << " fwd3: tm_f_fft:    " << ft2.tm_f_fft.real() << endl;
  cout << " fwd3: tm_f_mpi:    " << ft2.tm_f_mpi.real() << endl;
  cout << " fwd3: tm_f_pack:   " << ft2.tm_f_pack.real() << endl;
  cout << " fwd3: tm_f_unpack: " << ft2.tm_f_unpack.real() << endl;
  cout << " fwd3: tm_f_zero:   " << ft2.tm_f_zero.real() << endl;
  cout << " fwd3: tm_f_map:    " << ft2.tm_f_map.real() << endl;
  cout << " fwd3: tm_f_total:  " << ft2.tm_f_fft.real() +
                                    ft2.tm_f_mpi.real() +
                                    ft2.tm_f_pack.real() +
                                    ft2.tm_f_unpack.real() +
                                    ft2.tm_f_zero.real() +
                                    ft2.tm_f_map.real() << endl;
  cout << " fwd3 time: " << tm.cpu() << " / " << tm.real()
  << "    " << 1.e-6*flops/tm.real() << " MFlops" << endl;

273
  MPI_Barrier(MPI_COMM_WORLD);
274 275 276 277 278
  tm.reset();
  ft2.reset_timers();
  tm.start();
  ft2.backward(&x1[0],&x2[0],&f2[0]);
  tm.stop();
279 280
  cout << " bwd3: size: " << ft2.np0() << " "
       << ft2.np1() << " " << ft2.np2() << endl;
Francois Gygi committed
281
  cout << " bwd3: wf->vgrid double transform" << endl;
282 283 284 285 286 287 288 289 290 291 292 293 294 295
  cout << " bwd3: tm_b_fft:    " << ft2.tm_b_fft.real() << endl;
  cout << " bwd3: tm_b_mpi:    " << ft2.tm_b_mpi.real() << endl;
  cout << " bwd3: tm_b_pack:   " << ft2.tm_b_pack.real() << endl;
  cout << " bwd3: tm_b_unpack: " << ft2.tm_b_unpack.real() << endl;
  cout << " bwd3: tm_b_zero:   " << ft2.tm_b_zero.real() << endl;
  cout << " bwd3: tm_b_map:    " << ft2.tm_b_map.real() << endl;
  cout << " bwd3: tm_b_total:  " << ft2.tm_b_fft.real() +
                                    ft2.tm_b_mpi.real() +
                                    ft2.tm_b_pack.real() +
                                    ft2.tm_b_unpack.real() +
                                    ft2.tm_b_zero.real() +
                                    ft2.tm_b_map.real() << endl;
  cout << " bwd3 time: " << tm.cpu() << " / " << tm.real()
  << "    " << 1.e-6*flops/tm.real() << " MFlops" << endl;
296

297
#endif
Francois Gygi committed
298
  } // end of scope for wf-v transforms
299

300
#if 1
Francois Gygi committed
301 302
  ////////////////////////////////////////////////////////////
  // v(g)->vgrid
303
  Basis vbasis(MPI_COMM_WORLD,kpoint);
Francois Gygi committed
304 305 306 307 308 309
  vbasis.resize(cell,cell,4.0*ecut);
  cout << " vbasis.np() = " << vbasis.np(0) << " " << vbasis.np(1)
       << " " << vbasis.np(2) << endl;
  FourierTransform vft(vbasis,vbasis.np(0),vbasis.np(1),vbasis.np(2));
  vector<complex<double> > vf(vft.np012loc());
  vector<complex<double> > vg(vbasis.localsize());
310

Francois Gygi committed
311 312 313
  double vflops = 2*vbasis.nrod_loc() *      fft_flops(vft.np2()) +
                   vft.np1()   * vft.np2() * fft_flops(vft.np0()) +
                   vft.np0()   * vft.np2() * fft_flops(vft.np1());
314
  MPI_Barrier(MPI_COMM_WORLD);
Francois Gygi committed
315 316 317 318 319
  tm.reset();
  vft.reset_timers();
  tm.start();
  vft.forward(&vf[0],&vg[0]);
  tm.stop();
320 321
  cout << " fwd4: size: " << vft.np0() << " "
       << vft.np1() << " " << vft.np2() << endl;
Francois Gygi committed
322
  cout << " fwd4: vgrid->v(g)" << endl;
323 324 325 326 327 328 329 330 331 332 333 334
  cout << " fwd4: tm_f_fft:    " << vft.tm_f_fft.real() << endl;
  cout << " fwd4: tm_f_mpi:    " << vft.tm_f_mpi.real() << endl;
  cout << " fwd4: tm_f_pack:   " << vft.tm_f_pack.real() << endl;
  cout << " fwd4: tm_f_unpack: " << vft.tm_f_unpack.real() << endl;
  cout << " fwd4: tm_f_zero:   " << vft.tm_f_zero.real() << endl;
  cout << " fwd4: tm_f_map:    " << vft.tm_f_map.real() << endl;
  cout << " fwd4: tm_f_total:  " << vft.tm_f_fft.real() +
                                    vft.tm_f_mpi.real() +
                                    vft.tm_f_pack.real() +
                                    vft.tm_f_unpack.real() +
                                    vft.tm_f_zero.real() +
                                    vft.tm_f_map.real() << endl;
Francois Gygi committed
335 336
  cout << " fwd4 time: " << tm.cpu() << " / " << tm.real()
  << "    " << 1.e-6*vflops/tm.real() << " MFlops" << endl;
337

338
  MPI_Barrier(MPI_COMM_WORLD);
Francois Gygi committed
339 340 341 342 343
  tm.reset();
  vft.reset_timers();
  tm.start();
  vft.backward(&vg[0],&vf[0]);
  tm.stop();
344 345
  cout << " bwd4: size: " << vft.np0() << " "
       << vft.np1() << " " << vft.np2() << endl;
Francois Gygi committed
346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361
  cout << " bwd4: v(g)->vgrid" << endl;
  cout << " bwd4: tm_b_fft:    " << vft.tm_b_fft.real() << endl;
  cout << " bwd4: tm_b_mpi:    " << vft.tm_b_mpi.real() << endl;
  cout << " bwd4: tm_b_pack:   " << vft.tm_b_pack.real() << endl;
  cout << " bwd4: tm_b_unpack: " << vft.tm_b_unpack.real() << endl;
  cout << " bwd4: tm_b_zero:   " << vft.tm_b_zero.real() << endl;
  cout << " bwd4: tm_b_map:    " << vft.tm_b_map.real() << endl;
  cout << " bwd4: tm_b_total:  " << vft.tm_b_fft.real() +
                                    vft.tm_b_mpi.real() +
                                    vft.tm_b_pack.real() +
                                    vft.tm_b_unpack.real() +
                                    vft.tm_b_zero.real() +
                                    vft.tm_b_map.real() << endl;
  cout << " bwd4 time: " << tm.cpu() << " / " << tm.real()
  << "    " << 1.e-6*vflops/tm.real() << " MFlops" << endl;
#if 0
Francois Gygi committed
362 363 364
  //////////////////////////////////////////////////////////////////////////////
  // Integration of a 2-norm normalized plane wave
  //////////////////////////////////////////////////////////////////////////////
365

Francois Gygi committed
366 367 368 369 370
  for ( int i = 0; i < basis.localsize(); i++ )
  {
    x[i] = 0.0;
  }
  if ( ctxt.myproc() == 0 ) x[1] = 1.0/sqrt(2.0);
371

372
  ft2.backward(&x[0],&f2[0]);
Francois Gygi committed
373 374 375 376 377 378 379 380 381 382 383 384

#if 0
  for ( int i = 0; i < basis.localsize(); i++ )
    cout << basis.kv(3*i) << " " << basis.kv(3*i+1) << " " << basis.kv(3*i+2)
         << "     " << x[i] << endl;
  for ( int i = 0; i < ft.np0(); i++ )
    for ( int j = 0; j < ft.np1(); j++ )
      for ( int k = 0; k < ft.np2_loc(); k++ )
        cout << mype << ": "
             << i << " " << j << " " << k+ft.np2_first() << " "
             << f[ft.index(i,j,k)] << endl;
#endif
385

Francois Gygi committed
386
#if 0
Francois Gygi committed
387
  // integral of f^2 in r space must be 1.0
388
  double sum=0.0, tsum = 0.0;
389
  for ( int i = 0; i < f2.size(); i++ )
390
    tsum += norm(f2[i]);
Francois Gygi committed
391
  MPI_Allreduce(&tsum,&sum,1,MPI_DOUBLE,MPI_SUM,ctxt.comm());
392

393
  cout << " sum pw^2: " << sum / ft2.np012() << endl;
394

Francois Gygi committed
395 396 397 398 399 400
  //////////////////////////////////////////////////////////////////////////////
  // Integration of a 2-norm normalized gaussian
  //////////////////////////////////////////////////////////////////////////////
  for ( int i = 0; i < basis.localsize(); i++ )
  {
    double g2 = basis.g2(i);
401
    x[i] = 1.0 / sqrt(omega) * pow(2.0*M_PI*rc*rc,0.75) *
Francois Gygi committed
402 403
           exp( -0.25 * g2 * rc*rc );
  }
Francois Gygi committed
404
#endif
405

Francois Gygi committed
406
#if 0
407 408 409 410 411 412 413 414
  // Compute norm in g space
  double gnorm = 0.0;
  for ( int i = 0; i < basis.localsize(); i++ )
    gnorm += 2.0 * norm(x[i]);
  if ( ctxt.onpe0() )
    gnorm -= norm(x[0]);
  ctxt.dsum(1,1,&gnorm,1);
  cout << " gaussian gnorm: " << gnorm << endl;
415

Francois Gygi committed
416
  ft2.backward(&x[0],&f2[0]);
Francois Gygi committed
417
#endif
418

Francois Gygi committed
419 420 421 422 423 424 425 426 427
//   for ( int i = 0; i < basis.localsize(); i++ )
//     cout << basis.kv(3*i) << " " << basis.kv(3*i+1) << " " << basis.kv(3*i+2)
//          << "     " << x[i] << endl;
//   for ( int i = 0; i < ft2.np0(); i++ )
//     for ( int j = 0; j < ft2.np1(); j++ )
//       for ( int k = 0; k < ft2.np2_loc(); k++ )
//         cout << mype << ": "
//              << i << " " << j << " " << k+ft2.np2_first() << " "
//              << f2[ft2.index(i,j,k)] << endl;
428

Francois Gygi committed
429 430
  // integral of gaussian^2 in r space must be 1.0
  tsum = 0.0;
431
  for ( int i = 0; i < f2.size(); i++ )
432
    tsum += norm(f2[i]);
Francois Gygi committed
433
  MPI_Allreduce(&tsum,&sum,1,MPI_DOUBLE,MPI_SUM,ctxt.comm());
434

435
  cout << " gaussian rnorm: " << sum / ft2.np012() << endl;
Francois Gygi committed
436
#endif
437
#endif
438

439 440 441
  } // Context scope
#if USE_MPI
  MPI_Finalize();
Francois Gygi committed
442
#endif
443 444
  return 0;
}