JDWavefunctionStepper.C 11.8 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28
////////////////////////////////////////////////////////////////////////////////
//
// Copyright (c) 2008 The Regents of the University of California
//
// This file is part of Qbox
//
// Qbox is distributed under the terms of the GNU General Public License
// as published by the Free Software Foundation, either version 2 of
// the License, or (at your option) any later version.
// See the file COPYING in the root directory of this distribution
// or <http://www.gnu.org/licenses/>.
//
////////////////////////////////////////////////////////////////////////////////
//
// JDWavefunctionStepper.C
//
////////////////////////////////////////////////////////////////////////////////

#include "JDWavefunctionStepper.h"
#include "Wavefunction.h"
#include "SlaterDet.h"
#include "EnergyFunctional.h"
#include "Preconditioner.h"
#include <iostream>
using namespace std;

////////////////////////////////////////////////////////////////////////////////
JDWavefunctionStepper::JDWavefunctionStepper(Wavefunction& wf,
29 30 31
  Preconditioner& prec, EnergyFunctional& ef, TimerMap& tmap) :
  WavefunctionStepper(wf,tmap), prec_(prec), wft_(wf), dwft_(wf), ef_(ef)
{}
Francois Gygi committed
32 33 34

////////////////////////////////////////////////////////////////////////////////
JDWavefunctionStepper::~JDWavefunctionStepper(void)
35
{}
36 37 38 39 40 41 42

////////////////////////////////////////////////////////////////////////////////
void JDWavefunctionStepper::update(Wavefunction& dwf)
{
  // save copy of wf in wft_ and dwf in dwft_
  wft_ = wf_;
  dwft_ = dwf;
Francois Gygi committed
43
  tmap_["jd_residual"].start();
44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63
  for ( int ispin = 0; ispin < wf_.nspin(); ispin++ )
  {
    for ( int ikp = 0; ikp < wf_.nkp(); ikp++ )
    {
      if ( wf_.sd(ispin,ikp)->basis().real() )
      {
        // compute A = Y^T H Y  and descent direction HY - YA
        // proxy real matrices c, cp
        DoubleMatrix c_proxy(wf_.sd(ispin,ikp)->c());
        DoubleMatrix cp_proxy(dwf.sd(ispin,ikp)->c());
        DoubleMatrix a(c_proxy.context(),c_proxy.n(),c_proxy.n(),
                       c_proxy.nb(),c_proxy.nb());

        // factor 2.0 in next line: G and -G
        a.gemm('t','n',2.0,c_proxy,cp_proxy,0.0);
        // rank-1 update correction
        a.ger(-1.0,c_proxy,0,cp_proxy,0);

        // cp = cp - c * a
        cp_proxy.gemm('n','n',-1.0,c_proxy,a,1.0);
Francois Gygi committed
64 65 66 67 68 69 70 71 72 73 74 75 76
      }
      else // real
      {
        // compute A = Y^T H Y  and descent direction HY - YA
        ComplexMatrix& c = wf_.sd(ispin,ikp)->c();
        ComplexMatrix& cp = dwf.sd(ispin,ikp)->c();
        ComplexMatrix a(c.context(),c.n(),c.n(),c.nb(),c.nb());

        // (Y,HY)
        a.gemm('c','n',1.0,c,cp,0.0);

        // cp = cp - c * a
        cp.gemm('n','n',-1.0,c,a,1.0);
77
        // dwf.sd->c() now contains the descent direction (HV-VA)
Francois Gygi committed
78 79 80 81
      }
    } // ikp
  } // ispin
  tmap_["jd_residual"].stop();
82

Francois Gygi committed
83
  // dwf.sd->c() now contains the descent direction (HV-VA)
84 85 86

  // update preconditioner
  prec_.update(wf_);
Francois Gygi committed
87 88 89 90 91 92 93 94

  tmap_["jd_compute_z"].start();
  for ( int ispin = 0; ispin < wf_.nspin(); ispin++ )
  {
    for ( int ikp = 0; ikp < wf_.nkp(); ikp++ )
    {
      if ( wf_.sd(ispin,ikp)->basis().real() )
      {
95 96 97 98
        // Apply preconditioner K and store -K(HV-VA) in wf

        double* c = (double*) wf_.sd(ispin,ikp)->c().valptr();
        double* dc = (double*) dwf.sd(ispin,ikp)->c().valptr();
Francois Gygi committed
99 100 101 102
        DoubleMatrix c_proxy(wf_.sd(ispin,ikp)->c());
        DoubleMatrix a(c_proxy.context(),c_proxy.n(),c_proxy.n(),
                       c_proxy.nb(),c_proxy.nb());
        DoubleMatrix c_proxy_t(wft_.sd(ispin,ikp)->c());
103 104 105 106 107 108 109 110 111
        const int mloc = wf_.sd(ispin,ikp)->c().mloc();
        const int ngwl = wf_.sd(ispin,ikp)->basis().localsize();
        const int nloc = wf_.sd(ispin,ikp)->c().nloc();

        for ( int n = 0; n < nloc; n++ )
        {
          // note: double mloc length for complex<double> indices
          double* dcn = &dc[2*mloc*n];
          double* cn  =  &c[2*mloc*n];
Francois Gygi committed
112

113 114
          for ( int i = 0; i < ngwl; i++ )
          {
115
            const double fac = prec_.diag(ispin,ikp,n,i);
116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142
            const double f0 = -fac * dcn[2*i];
            const double f1 = -fac * dcn[2*i+1];
            cn[2*i] = f0;
            cn[2*i+1] = f1;
          }
        }
        // wf_ now contains the preconditioned descent
        // direction Z = -K(HY-YA)

        // orthogonalize Z to Y
        // Z = Z - YY^T Z
        // A = Y^T * Z
        // factor 2.0 in next line: G and -G
        a.gemm('t','n',2.0,c_proxy_t,c_proxy,0.0);
        // rank-1 update correction
        a.ger(-1.0,c_proxy_t,0,c_proxy,0);

        // Z = Z - Y * A
        c_proxy.gemm('n','n',-1.0,c_proxy_t,a,1.0);

        // orthogonalize Z: gram(Z)
        wf_.sd(ispin,ikp)->gram();

        // wf now contains Z, orthonormal
      } // if real
      else
      {
Francois Gygi committed
143
        // Apply preconditioner K and store -K(HV-VA) in wf
Francois Gygi committed
144
        ComplexMatrix& c = wf_.sd(ispin,ikp)->c();
Francois Gygi committed
145
        complex<double>* cv = c.valptr();
Francois Gygi committed
146
        ComplexMatrix& cp = dwf.sd(ispin,ikp)->c();
Francois Gygi committed
147
        complex<double>* cpv = cp.valptr();
Francois Gygi committed
148 149
        ComplexMatrix& ct = wft_.sd(ispin,ikp)->c();
        ComplexMatrix a(c.context(),c.n(),c.n(),c.nb(),c.nb());
Francois Gygi committed
150 151 152 153 154 155 156 157
        const int ngwl = wf_.sd(ispin,ikp)->basis().localsize();
        const int mloc = c.mloc();
        const int nloc = c.nloc();

        for ( int n = 0; n < nloc; n++ )
        {
          complex<double>* cpn = &cpv[mloc*n];
          complex<double>* cn  =  &cv[mloc*n];
Francois Gygi committed
158

Francois Gygi committed
159 160
          for ( int i = 0; i < ngwl; i++ )
          {
161
            const double fac = prec_.diag(ispin,ikp,n,i);
Francois Gygi committed
162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179
            cn[i] = -fac * cpn[i];
          }
        }
        // wf_ now contains the preconditioned descent
        // direction Z = -K(HY-YA)

        // orthogonalize Z to Y
        // Z = Z - YY^T Z
        // A = Y^T * Z
        a.gemm('c','n',1.0,ct,c,0.0);

        // Z = Z - Y * A
        c.gemm('n','n',-1.0,ct,a,1.0);

        // orthogonalize Z: gram(Z)
        wf_.sd(ispin,ikp)->gram();

        // wf now contains Z, orthonormal
180 181 182
      }
    } // ikp
  } // ispin
Francois Gygi committed
183
  tmap_["jd_compute_z"].stop();
184

Francois Gygi committed
185
  tmap_["jd_hz"].start();
186 187 188 189 190 191 192 193
  // compute HZ
  const bool compute_hpsi = true;
  const bool compute_forces = false;
  const bool compute_stress = false;
  vector<vector<double> > fion;
  valarray<double> sigma;
  ef_.energy(compute_hpsi,dwf,compute_forces,fion,
             compute_stress,sigma);
Francois Gygi committed
194
  tmap_["jd_hz"].stop();
195

Francois Gygi committed
196
  tmap_["jd_blocks"].start();
197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217
  for ( int ispin = 0; ispin < wf_.nspin(); ispin++ )
  {
    for ( int ikp = 0; ikp < wf_.nkp(); ikp++ )
    {
      if ( wf_.sd(ispin,ikp)->basis().real() )
      {
        // wf now contains Z
        // dwf now contains HZ
        // compute blocks (Y,HY), (Y,HZ), (Z,HZ)

        // proxy real matrices c, cp
        DoubleMatrix c_proxy(wf_.sd(ispin,ikp)->c());
        DoubleMatrix c_proxy_t(wft_.sd(ispin,ikp)->c());
        DoubleMatrix cp_proxy(dwf.sd(ispin,ikp)->c());
        DoubleMatrix cp_proxy_t(dwft_.sd(ispin,ikp)->c());

        DoubleMatrix a(c_proxy.context(),c_proxy.n(),c_proxy.n(),
                       c_proxy.nb(),c_proxy.nb());
        DoubleMatrix h(c_proxy.context(),2*c_proxy.n(),2*c_proxy.n(),
                       c_proxy.nb(),c_proxy.nb());

Francois Gygi committed
218
        // (Y,HY)
219
        // factor 2.0 in next line: G and -G
Francois Gygi committed
220
        tmap_["jd_gemm"].start();
221
        a.gemm('t','n',2.0,c_proxy_t,cp_proxy_t,0.0);
Francois Gygi committed
222
        tmap_["jd_gemm"].stop();
223 224 225
        // rank-1 update correction
        a.ger(-1.0,c_proxy_t,0,cp_proxy_t,0);
        // a contains (Y,HY), copy to h11 block
Francois Gygi committed
226
        tmap_["jd_getsub"].start();
227
        h.getsub(a,a.m(),a.n(),0,0,0,0);
Francois Gygi committed
228
        tmap_["jd_getsub"].stop();
229

Francois Gygi committed
230
        // (Z,HY)
231
        // factor 2.0 in next line: G and -G
Francois Gygi committed
232
        tmap_["jd_gemm"].start();
233
        a.gemm('t','n',2.0,c_proxy,cp_proxy_t,0.0);
Francois Gygi committed
234
        tmap_["jd_gemm"].stop();
235 236 237
        // rank-1 update correction
        a.ger(-1.0,c_proxy,0,cp_proxy_t,0);
        // a contains (Z,HY), copy to h21 block
Francois Gygi committed
238
        tmap_["jd_getsub"].start();
239
        h.getsub(a,a.m(),a.n(),0,0,a.m(),0);
Francois Gygi committed
240
        tmap_["jd_getsub"].stop();
Francois Gygi committed
241 242

        // (Z,HZ)
243
        // factor 2.0 in next line: G and -G
Francois Gygi committed
244
        tmap_["jd_gemm"].start();
245
        a.gemm('t','n',2.0,c_proxy,cp_proxy,0.0);
Francois Gygi committed
246
        tmap_["jd_gemm"].stop();
247 248 249
        // rank-1 update correction
        a.ger(-1.0,c_proxy,0,cp_proxy,0);
        // a contains (Z,HZ), copy to h22 block
Francois Gygi committed
250
        tmap_["jd_getsub"].start();
251
        h.getsub(a,a.m(),a.n(),0,0,a.m(),a.n());
Francois Gygi committed
252
        tmap_["jd_getsub"].stop();
Francois Gygi committed
253

254 255 256 257 258
        // diagonalize h
        // Note: we only need the first n eigenvectors of the (2n x 2n) matrix
        valarray<double> w(h.m());
        // q is (2n,2n)
        DoubleMatrix q(h.context(),h.n(),h.n(),h.nb(),h.nb());
Francois Gygi committed
259
        tmap_["jd_syev"].start();
260
        h.syev('l',w,q);
Francois Gygi committed
261
        tmap_["jd_syev"].stop();
262 263 264 265

        // compute the first n eigenvectors and store in wf
        // Y = Z Q21 (store result in dwf)
        // get Q21 in a
Francois Gygi committed
266
        tmap_["jd_getsub"].start();
267
        a.getsub(q,a.n(),a.n(),a.n(),0);
Francois Gygi committed
268 269
        tmap_["jd_getsub"].stop();
        tmap_["jd_gemm"].start();
270
        cp_proxy.gemm('n','n',1.0,c_proxy,a,0.0);
Francois Gygi committed
271
        tmap_["jd_gemm"].stop();
272 273 274

        // Y = Y Q11 (store result in wf)
        // get Q11 in a
Francois Gygi committed
275
        tmap_["jd_getsub"].start();
276
        a.getsub(q,a.n(),a.n(),0,0);
Francois Gygi committed
277 278
        tmap_["jd_getsub"].stop();
        tmap_["jd_gemm"].start();
279
        c_proxy.gemm('n','n',1.0,c_proxy_t,a,0.0);
Francois Gygi committed
280
        tmap_["jd_gemm"].stop();
281 282 283 284 285 286 287 288

        // add two contributions
        c_proxy += cp_proxy;

        // wf now contains the corrected eigenvectors
      } // if real
      else
      {
Francois Gygi committed
289 290 291 292 293 294 295 296 297 298 299 300 301 302
        // wf now contains Z
        // dwf now contains HZ
        // compute blocks (Y,HY), (Y,HZ), (Z,HZ)

        ComplexMatrix& c = wf_.sd(ispin,ikp)->c();
        ComplexMatrix& ct = wft_.sd(ispin,ikp)->c();
        ComplexMatrix& cp = dwf.sd(ispin,ikp)->c();
        ComplexMatrix& cpt = dwft_.sd(ispin,ikp)->c();

        ComplexMatrix a(c.context(),c.n(),c.n(),c.nb(),c.nb());
        ComplexMatrix h(c.context(),2*c.n(),2*c.n(),c.nb(),c.nb());

        // (Y,HY)
        // factor 2.0 in next line: G and -G
Francois Gygi committed
303
        tmap_["jd_gemm"].start();
Francois Gygi committed
304
        a.gemm('c','n',1.0,ct,cpt,0.0);
Francois Gygi committed
305
        tmap_["jd_gemm"].stop();
Francois Gygi committed
306
        // a contains (Y,HY), copy to h11 block
Francois Gygi committed
307
        tmap_["jd_getsub"].start();
Francois Gygi committed
308
        h.getsub(a,a.m(),a.n(),0,0,0,0);
Francois Gygi committed
309
        tmap_["jd_getsub"].stop();
Francois Gygi committed
310 311

        // (Z,HY)
Francois Gygi committed
312
        tmap_["jd_gemm"].start();
Francois Gygi committed
313
        a.gemm('c','n',1.0,c,cpt,0.0);
Francois Gygi committed
314
        tmap_["jd_gemm"].stop();
Francois Gygi committed
315
        // a contains (Z,HY), copy to h21 block
Francois Gygi committed
316
        tmap_["jd_getsub"].start();
Francois Gygi committed
317
        h.getsub(a,a.m(),a.n(),0,0,a.m(),0);
Francois Gygi committed
318
        tmap_["jd_getsub"].stop();
Francois Gygi committed
319 320

        // (Z,HZ)
Francois Gygi committed
321
        tmap_["jd_gemm"].start();
Francois Gygi committed
322
        a.gemm('c','n',1.0,c,cp,0.0);
Francois Gygi committed
323
        tmap_["jd_gemm"].stop();
Francois Gygi committed
324
        // a contains (Z,HZ), copy to h22 block
Francois Gygi committed
325
        tmap_["jd_getsub"].start();
Francois Gygi committed
326
        h.getsub(a,a.m(),a.n(),0,0,a.m(),a.n());
Francois Gygi committed
327
        tmap_["jd_getsub"].stop();
Francois Gygi committed
328 329 330 331 332 333

        // diagonalize h
        // Note: we only need the first n eigenvectors of the (2n x 2n) matrix
        valarray<double> w(h.m());
        // q is (2n,2n)
        ComplexMatrix q(h.context(),h.n(),h.n(),h.nb(),h.nb());
Francois Gygi committed
334
        tmap_["jd_heev"].start();
Francois Gygi committed
335
        h.heev('l',w,q);
Francois Gygi committed
336
        tmap_["jd_heev"].stop();
Francois Gygi committed
337 338 339 340

        // compute the first n eigenvectors and store in wf
        // Y = Z Q21 (store result in dwf)
        // get Q21 in a
Francois Gygi committed
341
        tmap_["jd_getsub"].start();
Francois Gygi committed
342
        a.getsub(q,a.n(),a.n(),a.n(),0);
Francois Gygi committed
343 344
        tmap_["jd_getsub"].stop();
        tmap_["jd_gemm"].start();
Francois Gygi committed
345
        cp.gemm('n','n',1.0,c,a,0.0);
Francois Gygi committed
346
        tmap_["jd_gemm"].stop();
Francois Gygi committed
347 348 349

        // Y = Y Q11 (store result in wf)
        // get Q11 in a
Francois Gygi committed
350
        tmap_["jd_getsub"].start();
Francois Gygi committed
351
        a.getsub(q,a.n(),a.n(),0,0);
Francois Gygi committed
352 353
        tmap_["jd_getsub"].stop();
        tmap_["jd_gemm"].start();
Francois Gygi committed
354
        c.gemm('n','n',1.0,ct,a,0.0);
Francois Gygi committed
355
        tmap_["jd_gemm"].stop();
Francois Gygi committed
356 357 358 359 360 361

        // add two contributions
        c += cp;

        // wf now contains the corrected eigenvectors
      } // if real
362 363
    } // ikp
  } // ispin
Francois Gygi committed
364
  tmap_["jd_blocks"].stop();
365
}