jacobi.C 14.6 KB
Newer Older
Francois Gygi committed
1 2
////////////////////////////////////////////////////////////////////////////////
//
Francois Gygi committed
3 4 5 6
// Copyright (c) 2008 The Regents of the University of California
//
// This file is part of Qbox
//
Francois Gygi committed
7 8
// Qbox is distributed under the terms of the GNU General Public License
// as published by the Free Software Foundation, either version 2 of
Francois Gygi committed
9 10 11 12 13 14
// the License, or (at your option) any later version.
// See the file COPYING in the root directory of this distribution
// or <http://www.gnu.org/licenses/>.
//
////////////////////////////////////////////////////////////////////////////////
//
Francois Gygi committed
15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51
// jacobi.C
//
////////////////////////////////////////////////////////////////////////////////

#include <cmath>
#include <cassert>
#include <vector>
#include <deque>
#include <algorithm>
#include <iostream>
#include <iomanip>
using namespace std;
#ifdef USE_MPI
#include <mpi.h>
#endif

#ifdef SCALAPACK
#include "blacs.h"
#endif

#include "Context.h"
#include "Matrix.h"
#include "blas.h"

int jacobi(int maxsweep, double threshold, DoubleMatrix& a, DoubleMatrix& u,
              vector<double>& e)
{
  assert(threshold>1.e-14);
  const Context& ctxt = a.context();
  // The input matrix is a
  // the orthogonal transformation is returned in u
  // on exit, a is diagonal, u contains eigenvectors, e contains eigenvalues
  assert(a.m()==a.n());
  assert(u.m()==u.n());
  assert(a.m()==u.m());
  assert(a.mb()==u.mb());
  assert(a.nb()==u.nb());
52

Francois Gygi committed
53
  const int mloc = a.mloc();
Francois Gygi committed
54 55
  // const int nloc = a.nloc();
  // cout << ctxt.mype() << ": nloc: " << nloc << endl;
56

57 58 59 60 61 62 63
  // identify the last active process column
  // process columns beyond that column do not have any elements of a[k]
  // compute num_nblocks = total number of column blocks
  // if num_nblocks >= ctxt.npcol(), all process columns are active
  // otherwise, the last active process column has index num_nblocks-1
  const int num_nblocks = u.n() / u.nb() + ( u.n()%u.nb() == 0 ? 0 : 1 );
  const int last_active_process_col = min(ctxt.npcol()-1, num_nblocks-1);
64

Francois Gygi committed
65 66
  // initialize u with the identity
  u.identity();
67

Francois Gygi committed
68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83
  // eigenvalue array
  e.resize(a.n());

  // check if the local number of rows is odd
  const bool nloc_odd = ( a.nloc()%2 != 0 );

  // if nloc is odd, an auxiliary array is created to host an extra column
  // for both a and u
  vector<double> a_aux, u_aux;
  if ( nloc_odd )
  {
    a_aux.resize(mloc);
    u_aux.resize(mloc);
  }

  // compute local number of pairs nploc
84
  const int nploc = (a.nloc()+1)/2;
Francois Gygi committed
85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101
  // dimension of top and bot arrays is nploc: local number of pairs
  deque<int> top(nploc), bot(nploc);

  // compute total number of pairs np
  int np = nploc;
  ctxt.isum('r',1,1,&np,1);
  //cout << ctxt.mype() << ": np=" << np << endl;

  // initialize top and bot arrays
  // the pair i is (top[i],bot[i])
  // top[i] is the local index of the top column of pair i
  // bot[i] is the local index of the bottom column of pair i
  for ( int i = 0; i < nploc; i++ )
    top[i] = i;
  for ( int i = 0; i < nploc; i++ )
    bot[nploc-i-1] = nploc+i;
  // if top[i] or bot[i] == nloc,the data resides in the array a_aux or u_aux
102

Francois Gygi committed
103
  // jglobal: global column index
104
  // jglobal[i] is the global column index of the column residing in
Francois Gygi committed
105 106
  // the local vector i. If nloc_odd and i==2*nploc-1, jglobal[i] == -1
  vector<int> jglobal(2*nploc,-1);
107 108
  for ( int jblock = 0; jblock < a.nblocks(); jblock++ )
    for ( int y = 0; y < a.nbs(jblock); y++ )
Francois Gygi committed
109
      jglobal[y + jblock*a.nb()] = a.j(jblock,y);
110

Francois Gygi committed
111 112 113 114 115 116 117 118 119 120 121 122 123
  // store addresses of columns of a and of u in acol and ucol
  vector<double*> acol(2*nploc);
  vector<double*> ucol(2*nploc);
  for ( int i = 0; i < a.nloc(); i++ )
    acol[i] = a.valptr(i*a.mloc());
  // if nloc is odd, store the address of vector 2*nploc-1
  if ( nloc_odd )
    acol[2*nploc-1] = &a_aux[0];
  for ( int i = 0; i < u.nloc(); i++ )
    ucol[i] = u.valptr(i*u.mloc());
  // if nloc is odd, store the address of vector 2*nploc-1
  if ( nloc_odd )
    ucol[2*nploc-1] = &u_aux[0];
124

Francois Gygi committed
125 126 127 128
  //for ( int i = 0; i < acol.size(); i++ )
  //  cout << ctxt.mype() << ": acol[" << i << "]=" << acol[i] << endl;
  //for ( int i = 0; i < ucol.size(); i++ )
  //  cout << ctxt.mype() << ": ucol[" << i << "]=" << ucol[i] << endl;
129

Francois Gygi committed
130 131
  // the vectors of the pair (top[i],bot[i]) are located at
  // addresses acol[top[i]] and acol[bot[i]]
132

Francois Gygi committed
133 134 135 136 137 138 139 140 141
  bool done = false;
  int nsweep = 0;
  while ( !done )
  {
    int npairs_above_threshold = 0;
    // sweep: process local pairs and rotate 2*np-1 times
    nsweep++;
    for ( int irot = 0; irot < 2*np-1; irot++ )
    {
142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161
      //cout << ctxt.mype() << ": top[i]: ";
      //for ( int i = 0; i < nploc; i++ )
      //  cout << setw(3) << top[i];
      //cout << endl;

      //cout << ctxt.mype() << ": bot[i]: ";
      //for ( int i = 0; i < nploc; i++ )
      //  cout << setw(3) << bot[i];
      //cout << endl;

      //cout << ctxt.mype() << ": jglobal[top[i]]: ";
      //for ( int i = 0; i < nploc; i++ )
      //  cout << setw(3) << jglobal[top[i]];
      //cout << endl;

      //cout << ctxt.mype() << ": jglobal[bot[i]]: ";
      //for ( int i = 0; i < nploc; i++ )
      //  cout << setw(3) << jglobal[bot[i]];
      //cout << endl;

Francois Gygi committed
162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182
      // perform Jacobi rotation for all local pairs

      // compute off-diagonal matrix elements apq for all pairs
      // skip the pair if one or both of the vectors is a dummy vector
      // i.e. a vector having jglobal==-1
      vector<double> apq(nploc);
      for ( int ipair = 0; ipair < nploc; ipair++ )
      {
        //cout << ctxt.mype() << ": computing apq for global pair "
        //     << jglobal[top[ipair]] << " " << jglobal[bot[ipair]] << endl;
        apq[ipair] = 0.0;
        if ( jglobal[top[ipair]] >= 0 && jglobal[bot[ipair]] >= 0 )
        {
          const double *ap = acol[top[ipair]];
          const double *uq = ucol[bot[ipair]];
          int one = 1;
          int mloc = a.mloc();
          //cout << ctxt.mype() << ": apq ddot: ipair=" << ipair << endl;
          //cout << ctxt.mype() << ": apq ddot: top=" << top[ipair]
          //     << " bot=" << bot[ipair] << endl;
          //cout << ctxt.mype() << ": ap=" << ap << " uq=" << uq << endl;
183

Francois Gygi committed
184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219
          //for ( int i = 0; i < mloc; i++ )
          //  cout << ap[i] << " " << uq[i] << endl;
          apq[ipair] = ddot(&mloc,ap,&one,uq,&one);
        }
      }
      // apq now contains partial sums of apq
      ctxt.dsum('c',nploc,1,&apq[0],nploc);
      // apq now contains the off-diagonal elements apq

      // compute the diagonal elements and perform the rotation only on
      // pairs having abs(apq)>threshold
      // skip pairs having dummy vectors
      vector<double> appqq(2*nploc);
      for ( int ipair = 0; ipair < nploc; ipair++ )
      {
        appqq[2*ipair]   = 0.0;
        appqq[2*ipair+1] = 0.0;
        if ( jglobal[top[ipair]] >= 0 &&
             jglobal[bot[ipair]] >= 0 &&
             fabs(apq[ipair]) > threshold )
        {
          // compute diagonal matrix elements
          const double *ap = acol[top[ipair]];
          const double *aq = acol[bot[ipair]];
          const double *up = ucol[top[ipair]];
          const double *uq = ucol[bot[ipair]];
          // compute matrix elements app, apq, aqq
          int one = 1;
          int mloc = a.mloc();
          appqq[2*ipair]   = ddot(&mloc,ap,&one,up,&one);
          appqq[2*ipair+1] = ddot(&mloc,aq,&one,uq,&one);
        }
      }
      // appqq now contains partial sums of app and aqq
      ctxt.dsum('c',2*nploc,1,&appqq[0],2*nploc);
      // appqq now contains the off-diagonal elements app and aqq
220

Francois Gygi committed
221 222 223 224 225 226 227 228
      for ( int ipair = 0; ipair < nploc; ipair++ )
      {
        // keep count of the number of pairs above threshold
        if ( jglobal[top[ipair]] >= 0 &&
             jglobal[bot[ipair]] >= 0 &&
             fabs(apq[ipair]) > threshold )
        {
          npairs_above_threshold++;
229

Francois Gygi committed
230 231 232 233 234 235
          // compute rotation sine and cosine
          const double app = appqq[2*ipair];
          const double aqq = appqq[2*ipair+1];

          // fabs(apq[ipair]) is larger than machine epsilon
          // since it is larger than the threshold (which is > macheps)
236 237 238 239 240
          const double tau = ( aqq - app ) / ( 2.0 * apq[ipair]);
          const double sq = sqrt( 1.0 + tau*tau );
          double t = 1.0 / ( fabs(tau) + sq );
          if ( tau < 0.0 ) t = -t;
          double c = 1.0 / sqrt(1.0+t*t);
Francois Gygi committed
241
          double s = t * c;
242

Francois Gygi committed
243 244 245 246 247 248
          // the drot function computes
          // c*x + s*y -> x
          //-s*x + c*y -> y
          // call drot with args c, -s
          // change the sign of s before call to drot
          s = -s;
249

Francois Gygi committed
250
          //cout << " p=" << jglobal[top[ipair]] << " q=" << jglobal[bot[ipair]]
251
          //     << " app=" << app << " aqq=" << aqq
Francois Gygi committed
252 253 254 255
          //     << " apq=" << apq[ipair] << endl;
          //cout << " tau=" << tau << " c=" << c << " s=" << s << endl;

          // update columns of a and u
256 257 258
          double *ap = acol[top[ipair]];
          double *aq = acol[bot[ipair]];
          double *up = ucol[top[ipair]];
Francois Gygi committed
259 260 261 262 263 264 265 266 267
          double *uq = ucol[bot[ipair]];
          int one = 1;
          int mloc = a.mloc();
          drot(&mloc,ap,&one,aq,&one,&c,&s);
          drot(&mloc,up,&one,uq,&one,&c,&s);
          //cout << " apq_check=" << ddot(&mloc,ap,&one,uq,&one);
          //cout << " aqp_check=" << ddot(&mloc,aq,&one,up,&one) << endl;
        }
      } // for ipair
268

Francois Gygi committed
269 270 271
      // all local pairs have been processed

      // rotate top and bot arrays
272
      if ( nploc > 0 )
Francois Gygi committed
273
      {
274 275 276 277 278 279 280 281
        bot.push_back(top.back());
        top.pop_back();
        top.push_front(bot.front());
        bot.pop_front();

        // make rotation skip element 0 on the first process column
        // if my process column is zero, swap top[0] and top[1]
        if ( ctxt.mycol() == 0 )
Francois Gygi committed
282
        {
283 284 285 286 287 288 289 290 291 292 293 294 295
          if ( nploc > 1 )
          {
            int tmp = top[0];
            top[0] = top[1];
            top[1] = tmp;
          }
          else
          {
            // if there is only one local pair, exchange top[0] and bot[0]
            int tmp = top[0];
            top[0] = bot[0];
            bot[0] = tmp;
          }
Francois Gygi committed
296
        }
297

298 299 300 301
        int rbufi_left, rbufi_right, sbufi_left, sbufi_right;
        // send buffers contain a column of a and of u
        vector<double> sbuf_left(2*a.mloc()), sbuf_right(2*a.mloc());
        vector<double> rbuf_left(2*a.mloc()), rbuf_right(2*a.mloc());
Francois Gygi committed
302

303
        // on each task except mycol==npcol-1
304 305
        // send jglobal[bot[nploc-1]] to the right
        // if jglobal != -1 send vector bot[nploc-1] to the right
Francois Gygi committed
306

307
        // on each task except mycol==npcol-1
308 309 310
        // recv jglobal from the right
        // if jglobal != -1 recv a vector from the right into bot[nploc-1]
        // set value of jglobal[bot[nploc-1]]
311 312

        // on each task except mycol==0
313 314
        // send jglobal[top[0]] to the left
        // if jglobal != -1 send vector top[0] to the left
Francois Gygi committed
315

316
        // on each task except mycol==0
317 318 319
        // recv jglobal from the left
        // if jglobal != -1 recv a vector from the left into top[0]
        // set value of jglobal[top[0]]
Francois Gygi committed
320

321
        // exchange jglobal values first
322

323 324 325 326 327 328
        if ( ctxt.mycol() < last_active_process_col )
        {
          sbufi_right = jglobal[bot[nploc-1]];
          ctxt.isend(1,1,&sbufi_right,1,ctxt.myrow(),ctxt.mycol()+1);
          ctxt.irecv(1,1,&rbufi_right,1,ctxt.myrow(),ctxt.mycol()+1);
          jglobal[bot[nploc-1]] = rbufi_right;
329
          //cout << ctxt.mype() << ": received jglobal="
330 331 332 333 334 335 336 337
          //     << jglobal[bot[nploc-1]] << " from right" << endl;
        }
        if ( ctxt.mycol() != 0 )
        {
          sbufi_left = jglobal[top[0]];
          ctxt.isend(1,1,&sbufi_left,1,ctxt.myrow(),ctxt.mycol()-1);
          ctxt.irecv(1,1,&rbufi_left,1,ctxt.myrow(),ctxt.mycol()-1);
          jglobal[top[0]] = rbufi_left;
338
          //cout << ctxt.mype() << ": received jglobal="
339 340
          //     << jglobal[top[0]] << " from left" << endl;
        }
Francois Gygi committed
341

342
        // exchange column vectors
Francois Gygi committed
343

344 345 346 347 348 349 350 351
        if ( ctxt.mycol() < last_active_process_col )
        {
          memcpy(&sbuf_right[0],    acol[bot[nploc-1]], mloc*sizeof(double) );
          memcpy(&sbuf_right[mloc], ucol[bot[nploc-1]], mloc*sizeof(double) );
          ctxt.dsend(2*mloc,1,&sbuf_right[0],2*mloc,ctxt.myrow(),ctxt.mycol()+1);
          ctxt.drecv(2*mloc,1,&rbuf_right[0],2*mloc,ctxt.myrow(),ctxt.mycol()+1);
          memcpy(acol[bot[nploc-1]], &rbuf_right[0],    mloc*sizeof(double) );
          memcpy(ucol[bot[nploc-1]], &rbuf_right[mloc], mloc*sizeof(double) );
352
          //cout << ctxt.mype() << ": received vector jglobal="
353 354 355 356 357 358 359 360 361 362
          //     << jglobal[bot[nploc-1]] << " from right" << endl;
        }
        if ( ctxt.mycol() != 0 )
        {
          memcpy(&sbuf_left[0],       acol[top[0]],     mloc*sizeof(double) );
          memcpy(&sbuf_left[mloc],    ucol[top[0]],     mloc*sizeof(double) );
          ctxt.dsend(2*mloc,1,&sbuf_left[0],2*mloc,ctxt.myrow(),ctxt.mycol()-1);
          ctxt.drecv(2*mloc,1,&rbuf_left[0],2*mloc,ctxt.myrow(),ctxt.mycol()-1);
          memcpy(acol[top[0]],       &rbuf_left[0],     mloc*sizeof(double) );
          memcpy(ucol[top[0]],       &rbuf_left[mloc],  mloc*sizeof(double) );
363
          //cout << ctxt.mype() << ": received vector jglobal="
364 365 366
          //     << jglobal[top[0]] << " from left" << endl;
        }
      } // if nploc > 0
Francois Gygi committed
367 368 369 370
      ctxt.barrier();
      // end of step

    } // for irot
371

Francois Gygi committed
372 373 374
    // sweep is complete
    // accumulate number of pairs above threshold
    ctxt.isum('r',1,1,&npairs_above_threshold,1);
375

Francois Gygi committed
376
    done = ( ( npairs_above_threshold == 0 ) || ( nsweep >= maxsweep ) );
377

Francois Gygi committed
378 379
  } // while !done
  // cout << " nsweep=" << nsweep << endl;
380

Francois Gygi committed
381 382 383
  // if a dummy vector was used, (i.e. if nloc_odd), the dummy vector
  // may end up anywhere in the array after all rotations are completed.
  // The array a_aux may contain a (non-dummy) vector.
384

Francois Gygi committed
385 386
  if ( nloc_odd )
  {
387
    // find position of the dummy vector and copy a_aux onto it
Francois Gygi committed
388 389 390 391 392
    int idum = 0;
    while ( jglobal[idum] != -1 && idum < 2*nploc ) idum++;
    //cout << ctxt.mype() << ": idum=" << idum << endl;
    if ( idum != 2*nploc-1 )
    {
393
      memcpy(acol[idum],&a_aux[0],mloc*sizeof(double));
Francois Gygi committed
394
      memcpy(ucol[idum],&u_aux[0],mloc*sizeof(double));
395
    }
Francois Gygi committed
396
  }
397

Francois Gygi committed
398 399 400
  // compute eigenvalues
  for ( int i = 0; i < a.n(); i++ )
    e[i] = 0.0;
401 402 403 404 405 406 407 408 409 410 411 412
  for ( int jblock = 0; jblock < a.nblocks(); jblock++ )
    for ( int y = 0; y < a.nbs(jblock); y++ )
    {
      // j is the global column index
      int j = a.j(jblock,y);
      int jjj = y + jblock*a.nb();
      const double *ap = a.valptr(jjj*a.mloc());
      const double *up = u.valptr(jjj*u.mloc());
      int mloc = a.mloc();
      int one = 1;
      e[j] = ddot(&mloc,ap,&one,up,&one);
    }
Francois Gygi committed
413 414 415 416
  // e now contains the partial sums of the diagonal elements of a
  ctxt.dsum(a.n(),1,&e[0],a.n());
  // e contains the eigenvalues of a
  // u contains the eigenvectors of a
417

Francois Gygi committed
418 419
  return nsweep;
}