jacobi.C 14.6 KB
 Francois Gygi committed Jul 21, 2006 1 2 //////////////////////////////////////////////////////////////////////////////// //  Francois Gygi committed Aug 13, 2008 3 4 5 6 // Copyright (c) 2008 The Regents of the University of California // // This file is part of Qbox //  Francois Gygi committed Sep 08, 2008 7 8 // Qbox is distributed under the terms of the GNU General Public License // as published by the Free Software Foundation, either version 2 of  Francois Gygi committed Aug 13, 2008 9 10 11 12 13 14 // the License, or (at your option) any later version. // See the file COPYING in the root directory of this distribution // or . // //////////////////////////////////////////////////////////////////////////////// //  Francois Gygi committed Jul 21, 2006 15 16 17 // jacobi.C // ////////////////////////////////////////////////////////////////////////////////  Francois Gygi committed Sep 08, 2008 18 // $Id: jacobi.C,v 1.5 2008-09-08 15:56:19 fgygi Exp$  Francois Gygi committed Jul 21, 2006 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52  #include #include #include #include #include #include #include using namespace std; #ifdef USE_MPI #include #endif #ifdef SCALAPACK #include "blacs.h" #endif #include "Context.h" #include "Matrix.h" #include "blas.h" int jacobi(int maxsweep, double threshold, DoubleMatrix& a, DoubleMatrix& u, vector& e) { assert(threshold>1.e-14); const Context& ctxt = a.context(); // The input matrix is a // the orthogonal transformation is returned in u // on exit, a is diagonal, u contains eigenvectors, e contains eigenvalues assert(a.m()==a.n()); assert(u.m()==u.n()); assert(a.m()==u.m()); assert(a.mb()==u.mb()); assert(a.nb()==u.nb());  Francois Gygi committed Oct 19, 2007 53   Francois Gygi committed Jul 21, 2006 54 55 56  const int mloc = a.mloc(); const int nloc = a.nloc(); //cout << ctxt.mype() << ": nloc: " << nloc << endl;  Francois Gygi committed Oct 19, 2007 57   Francois Gygi committed Nov 04, 2006 58 59 60 61 62 63 64  // identify the last active process column // process columns beyond that column do not have any elements of a[k] // compute num_nblocks = total number of column blocks // if num_nblocks >= ctxt.npcol(), all process columns are active // otherwise, the last active process column has index num_nblocks-1 const int num_nblocks = u.n() / u.nb() + ( u.n()%u.nb() == 0 ? 0 : 1 ); const int last_active_process_col = min(ctxt.npcol()-1, num_nblocks-1);  Francois Gygi committed Oct 19, 2007 65   Francois Gygi committed Jul 21, 2006 66 67  // initialize u with the identity u.identity();  Francois Gygi committed Oct 19, 2007 68   Francois Gygi committed Jul 21, 2006 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84  // eigenvalue array e.resize(a.n()); // check if the local number of rows is odd const bool nloc_odd = ( a.nloc()%2 != 0 ); // if nloc is odd, an auxiliary array is created to host an extra column // for both a and u vector a_aux, u_aux; if ( nloc_odd ) { a_aux.resize(mloc); u_aux.resize(mloc); } // compute local number of pairs nploc  Francois Gygi committed Oct 19, 2007 85  const int nploc = (a.nloc()+1)/2;  Francois Gygi committed Jul 21, 2006 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102  // dimension of top and bot arrays is nploc: local number of pairs deque top(nploc), bot(nploc); // compute total number of pairs np int np = nploc; ctxt.isum('r',1,1,&np,1); //cout << ctxt.mype() << ": np=" << np << endl; // initialize top and bot arrays // the pair i is (top[i],bot[i]) // top[i] is the local index of the top column of pair i // bot[i] is the local index of the bottom column of pair i for ( int i = 0; i < nploc; i++ ) top[i] = i; for ( int i = 0; i < nploc; i++ ) bot[nploc-i-1] = nploc+i; // if top[i] or bot[i] == nloc,the data resides in the array a_aux or u_aux  Francois Gygi committed Oct 19, 2007 103   Francois Gygi committed Jul 21, 2006 104  // jglobal: global column index  Francois Gygi committed Oct 19, 2007 105  // jglobal[i] is the global column index of the column residing in  Francois Gygi committed Jul 21, 2006 106 107  // the local vector i. If nloc_odd and i==2*nploc-1, jglobal[i] == -1 vector jglobal(2*nploc,-1);  Francois Gygi committed Oct 19, 2007 108 109  for ( int jblock = 0; jblock < a.nblocks(); jblock++ ) for ( int y = 0; y < a.nbs(jblock); y++ )  Francois Gygi committed Jul 21, 2006 110  jglobal[y + jblock*a.nb()] = a.j(jblock,y);  Francois Gygi committed Oct 19, 2007 111   Francois Gygi committed Jul 21, 2006 112 113 114 115 116 117 118 119 120 121 122 123 124  // store addresses of columns of a and of u in acol and ucol vector acol(2*nploc); vector ucol(2*nploc); for ( int i = 0; i < a.nloc(); i++ ) acol[i] = a.valptr(i*a.mloc()); // if nloc is odd, store the address of vector 2*nploc-1 if ( nloc_odd ) acol[2*nploc-1] = &a_aux[0]; for ( int i = 0; i < u.nloc(); i++ ) ucol[i] = u.valptr(i*u.mloc()); // if nloc is odd, store the address of vector 2*nploc-1 if ( nloc_odd ) ucol[2*nploc-1] = &u_aux[0];  Francois Gygi committed Oct 19, 2007 125   Francois Gygi committed Jul 21, 2006 126 127 128 129  //for ( int i = 0; i < acol.size(); i++ ) // cout << ctxt.mype() << ": acol[" << i << "]=" << acol[i] << endl; //for ( int i = 0; i < ucol.size(); i++ ) // cout << ctxt.mype() << ": ucol[" << i << "]=" << ucol[i] << endl;  Francois Gygi committed Oct 19, 2007 130   Francois Gygi committed Jul 21, 2006 131 132  // the vectors of the pair (top[i],bot[i]) are located at // addresses acol[top[i]] and acol[bot[i]]  Francois Gygi committed Oct 19, 2007 133   Francois Gygi committed Jul 21, 2006 134 135 136 137 138 139 140 141 142  bool done = false; int nsweep = 0; while ( !done ) { int npairs_above_threshold = 0; // sweep: process local pairs and rotate 2*np-1 times nsweep++; for ( int irot = 0; irot < 2*np-1; irot++ ) {  Francois Gygi committed Oct 19, 2007 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162  //cout << ctxt.mype() << ": top[i]: "; //for ( int i = 0; i < nploc; i++ ) // cout << setw(3) << top[i]; //cout << endl; //cout << ctxt.mype() << ": bot[i]: "; //for ( int i = 0; i < nploc; i++ ) // cout << setw(3) << bot[i]; //cout << endl; //cout << ctxt.mype() << ": jglobal[top[i]]: "; //for ( int i = 0; i < nploc; i++ ) // cout << setw(3) << jglobal[top[i]]; //cout << endl; //cout << ctxt.mype() << ": jglobal[bot[i]]: "; //for ( int i = 0; i < nploc; i++ ) // cout << setw(3) << jglobal[bot[i]]; //cout << endl;  Francois Gygi committed Jul 21, 2006 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183  // perform Jacobi rotation for all local pairs // compute off-diagonal matrix elements apq for all pairs // skip the pair if one or both of the vectors is a dummy vector // i.e. a vector having jglobal==-1 vector apq(nploc); for ( int ipair = 0; ipair < nploc; ipair++ ) { //cout << ctxt.mype() << ": computing apq for global pair " // << jglobal[top[ipair]] << " " << jglobal[bot[ipair]] << endl; apq[ipair] = 0.0; if ( jglobal[top[ipair]] >= 0 && jglobal[bot[ipair]] >= 0 ) { const double *ap = acol[top[ipair]]; const double *uq = ucol[bot[ipair]]; int one = 1; int mloc = a.mloc(); //cout << ctxt.mype() << ": apq ddot: ipair=" << ipair << endl; //cout << ctxt.mype() << ": apq ddot: top=" << top[ipair] // << " bot=" << bot[ipair] << endl; //cout << ctxt.mype() << ": ap=" << ap << " uq=" << uq << endl;  Francois Gygi committed Oct 19, 2007 184   Francois Gygi committed Jul 21, 2006 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220  //for ( int i = 0; i < mloc; i++ ) // cout << ap[i] << " " << uq[i] << endl; apq[ipair] = ddot(&mloc,ap,&one,uq,&one); } } // apq now contains partial sums of apq ctxt.dsum('c',nploc,1,&apq[0],nploc); // apq now contains the off-diagonal elements apq // compute the diagonal elements and perform the rotation only on // pairs having abs(apq)>threshold // skip pairs having dummy vectors vector appqq(2*nploc); for ( int ipair = 0; ipair < nploc; ipair++ ) { appqq[2*ipair] = 0.0; appqq[2*ipair+1] = 0.0; if ( jglobal[top[ipair]] >= 0 && jglobal[bot[ipair]] >= 0 && fabs(apq[ipair]) > threshold ) { // compute diagonal matrix elements const double *ap = acol[top[ipair]]; const double *aq = acol[bot[ipair]]; const double *up = ucol[top[ipair]]; const double *uq = ucol[bot[ipair]]; // compute matrix elements app, apq, aqq int one = 1; int mloc = a.mloc(); appqq[2*ipair] = ddot(&mloc,ap,&one,up,&one); appqq[2*ipair+1] = ddot(&mloc,aq,&one,uq,&one); } } // appqq now contains partial sums of app and aqq ctxt.dsum('c',2*nploc,1,&appqq[0],2*nploc); // appqq now contains the off-diagonal elements app and aqq  Francois Gygi committed Oct 19, 2007 221   Francois Gygi committed Jul 21, 2006 222 223 224 225 226 227 228 229  for ( int ipair = 0; ipair < nploc; ipair++ ) { // keep count of the number of pairs above threshold if ( jglobal[top[ipair]] >= 0 && jglobal[bot[ipair]] >= 0 && fabs(apq[ipair]) > threshold ) { npairs_above_threshold++;  Francois Gygi committed Oct 19, 2007 230   Francois Gygi committed Jul 21, 2006 231 232 233 234 235 236  // compute rotation sine and cosine const double app = appqq[2*ipair]; const double aqq = appqq[2*ipair+1]; // fabs(apq[ipair]) is larger than machine epsilon // since it is larger than the threshold (which is > macheps)  Francois Gygi committed Oct 19, 2007 237 238 239 240 241  const double tau = ( aqq - app ) / ( 2.0 * apq[ipair]); const double sq = sqrt( 1.0 + tau*tau ); double t = 1.0 / ( fabs(tau) + sq ); if ( tau < 0.0 ) t = -t; double c = 1.0 / sqrt(1.0+t*t);  Francois Gygi committed Jul 21, 2006 242  double s = t * c;  Francois Gygi committed Oct 19, 2007 243   Francois Gygi committed Jul 21, 2006 244 245 246 247 248 249  // the drot function computes // c*x + s*y -> x //-s*x + c*y -> y // call drot with args c, -s // change the sign of s before call to drot s = -s;  Francois Gygi committed Oct 19, 2007 250   Francois Gygi committed Jul 21, 2006 251  //cout << " p=" << jglobal[top[ipair]] << " q=" << jglobal[bot[ipair]]  Francois Gygi committed Oct 19, 2007 252  // << " app=" << app << " aqq=" << aqq  Francois Gygi committed Jul 21, 2006 253 254 255 256  // << " apq=" << apq[ipair] << endl; //cout << " tau=" << tau << " c=" << c << " s=" << s << endl; // update columns of a and u  Francois Gygi committed Oct 19, 2007 257 258 259  double *ap = acol[top[ipair]]; double *aq = acol[bot[ipair]]; double *up = ucol[top[ipair]];  Francois Gygi committed Jul 21, 2006 260 261 262 263 264 265 266 267 268  double *uq = ucol[bot[ipair]]; int one = 1; int mloc = a.mloc(); drot(&mloc,ap,&one,aq,&one,&c,&s); drot(&mloc,up,&one,uq,&one,&c,&s); //cout << " apq_check=" << ddot(&mloc,ap,&one,uq,&one); //cout << " aqp_check=" << ddot(&mloc,aq,&one,up,&one) << endl; } } // for ipair  Francois Gygi committed Oct 19, 2007 269   Francois Gygi committed Jul 21, 2006 270 271 272  // all local pairs have been processed // rotate top and bot arrays  Francois Gygi committed Nov 04, 2006 273  if ( nploc > 0 )  Francois Gygi committed Jul 21, 2006 274  {  Francois Gygi committed Nov 04, 2006 275 276 277 278 279 280 281 282  bot.push_back(top.back()); top.pop_back(); top.push_front(bot.front()); bot.pop_front(); // make rotation skip element 0 on the first process column // if my process column is zero, swap top[0] and top[1] if ( ctxt.mycol() == 0 )  Francois Gygi committed Jul 21, 2006 283  {  Francois Gygi committed Nov 04, 2006 284 285 286 287 288 289 290 291 292 293 294 295 296  if ( nploc > 1 ) { int tmp = top[0]; top[0] = top[1]; top[1] = tmp; } else { // if there is only one local pair, exchange top[0] and bot[0] int tmp = top[0]; top[0] = bot[0]; bot[0] = tmp; }  Francois Gygi committed Jul 21, 2006 297  }  Francois Gygi committed Oct 19, 2007 298   Francois Gygi committed Nov 04, 2006 299 300 301 302  int rbufi_left, rbufi_right, sbufi_left, sbufi_right; // send buffers contain a column of a and of u vector sbuf_left(2*a.mloc()), sbuf_right(2*a.mloc()); vector rbuf_left(2*a.mloc()), rbuf_right(2*a.mloc());  Francois Gygi committed Jul 21, 2006 303   Francois Gygi committed Oct 19, 2007 304  // on each task except mycol==npcol-1  Francois Gygi committed Nov 04, 2006 305 306  // send jglobal[bot[nploc-1]] to the right // if jglobal != -1 send vector bot[nploc-1] to the right  Francois Gygi committed Jul 21, 2006 307   Francois Gygi committed Oct 19, 2007 308  // on each task except mycol==npcol-1  Francois Gygi committed Nov 04, 2006 309 310 311  // recv jglobal from the right // if jglobal != -1 recv a vector from the right into bot[nploc-1] // set value of jglobal[bot[nploc-1]]  Francois Gygi committed Oct 19, 2007 312 313  // on each task except mycol==0  Francois Gygi committed Nov 04, 2006 314 315  // send jglobal[top[0]] to the left // if jglobal != -1 send vector top[0] to the left  Francois Gygi committed Jul 21, 2006 316   Francois Gygi committed Oct 19, 2007 317  // on each task except mycol==0  Francois Gygi committed Nov 04, 2006 318 319 320  // recv jglobal from the left // if jglobal != -1 recv a vector from the left into top[0] // set value of jglobal[top[0]]  Francois Gygi committed Jul 21, 2006 321   Francois Gygi committed Nov 04, 2006 322  // exchange jglobal values first  Francois Gygi committed Oct 19, 2007 323   Francois Gygi committed Nov 04, 2006 324 325 326 327 328 329  if ( ctxt.mycol() < last_active_process_col ) { sbufi_right = jglobal[bot[nploc-1]]; ctxt.isend(1,1,&sbufi_right,1,ctxt.myrow(),ctxt.mycol()+1); ctxt.irecv(1,1,&rbufi_right,1,ctxt.myrow(),ctxt.mycol()+1); jglobal[bot[nploc-1]] = rbufi_right;  Francois Gygi committed Oct 19, 2007 330  //cout << ctxt.mype() << ": received jglobal="  Francois Gygi committed Nov 04, 2006 331 332 333 334 335 336 337 338  // << jglobal[bot[nploc-1]] << " from right" << endl; } if ( ctxt.mycol() != 0 ) { sbufi_left = jglobal[top[0]]; ctxt.isend(1,1,&sbufi_left,1,ctxt.myrow(),ctxt.mycol()-1); ctxt.irecv(1,1,&rbufi_left,1,ctxt.myrow(),ctxt.mycol()-1); jglobal[top[0]] = rbufi_left;  Francois Gygi committed Oct 19, 2007 339  //cout << ctxt.mype() << ": received jglobal="  Francois Gygi committed Nov 04, 2006 340 341  // << jglobal[top[0]] << " from left" << endl; }  Francois Gygi committed Jul 21, 2006 342   Francois Gygi committed Nov 04, 2006 343  // exchange column vectors  Francois Gygi committed Jul 21, 2006 344   Francois Gygi committed Nov 04, 2006 345 346 347 348 349 350 351 352  if ( ctxt.mycol() < last_active_process_col ) { memcpy(&sbuf_right[0], acol[bot[nploc-1]], mloc*sizeof(double) ); memcpy(&sbuf_right[mloc], ucol[bot[nploc-1]], mloc*sizeof(double) ); ctxt.dsend(2*mloc,1,&sbuf_right[0],2*mloc,ctxt.myrow(),ctxt.mycol()+1); ctxt.drecv(2*mloc,1,&rbuf_right[0],2*mloc,ctxt.myrow(),ctxt.mycol()+1); memcpy(acol[bot[nploc-1]], &rbuf_right[0], mloc*sizeof(double) ); memcpy(ucol[bot[nploc-1]], &rbuf_right[mloc], mloc*sizeof(double) );  Francois Gygi committed Oct 19, 2007 353  //cout << ctxt.mype() << ": received vector jglobal="  Francois Gygi committed Nov 04, 2006 354 355 356 357 358 359 360 361 362 363  // << jglobal[bot[nploc-1]] << " from right" << endl; } if ( ctxt.mycol() != 0 ) { memcpy(&sbuf_left[0], acol[top[0]], mloc*sizeof(double) ); memcpy(&sbuf_left[mloc], ucol[top[0]], mloc*sizeof(double) ); ctxt.dsend(2*mloc,1,&sbuf_left[0],2*mloc,ctxt.myrow(),ctxt.mycol()-1); ctxt.drecv(2*mloc,1,&rbuf_left[0],2*mloc,ctxt.myrow(),ctxt.mycol()-1); memcpy(acol[top[0]], &rbuf_left[0], mloc*sizeof(double) ); memcpy(ucol[top[0]], &rbuf_left[mloc], mloc*sizeof(double) );  Francois Gygi committed Oct 19, 2007 364  //cout << ctxt.mype() << ": received vector jglobal="  Francois Gygi committed Nov 04, 2006 365 366 367  // << jglobal[top[0]] << " from left" << endl; } } // if nploc > 0  Francois Gygi committed Jul 21, 2006 368 369 370 371  ctxt.barrier(); // end of step } // for irot  Francois Gygi committed Oct 19, 2007 372   Francois Gygi committed Jul 21, 2006 373 374 375  // sweep is complete // accumulate number of pairs above threshold ctxt.isum('r',1,1,&npairs_above_threshold,1);  Francois Gygi committed Oct 19, 2007 376   Francois Gygi committed Jul 21, 2006 377  done = ( ( npairs_above_threshold == 0 ) || ( nsweep >= maxsweep ) );  Francois Gygi committed Oct 19, 2007 378   Francois Gygi committed Jul 21, 2006 379 380  } // while !done // cout << " nsweep=" << nsweep << endl;  Francois Gygi committed Oct 19, 2007 381   Francois Gygi committed Jul 21, 2006 382 383 384  // if a dummy vector was used, (i.e. if nloc_odd), the dummy vector // may end up anywhere in the array after all rotations are completed. // The array a_aux may contain a (non-dummy) vector.  Francois Gygi committed Oct 19, 2007 385   Francois Gygi committed Jul 21, 2006 386 387  if ( nloc_odd ) {  Francois Gygi committed Oct 19, 2007 388  // find position of the dummy vector and copy a_aux onto it  Francois Gygi committed Jul 21, 2006 389 390 391 392 393  int idum = 0; while ( jglobal[idum] != -1 && idum < 2*nploc ) idum++; //cout << ctxt.mype() << ": idum=" << idum << endl; if ( idum != 2*nploc-1 ) {  Francois Gygi committed Oct 19, 2007 394  memcpy(acol[idum],&a_aux[0],mloc*sizeof(double));  Francois Gygi committed Jul 21, 2006 395  memcpy(ucol[idum],&u_aux[0],mloc*sizeof(double));  Francois Gygi committed Oct 19, 2007 396  }  Francois Gygi committed Jul 21, 2006 397  }  Francois Gygi committed Oct 19, 2007 398   Francois Gygi committed Jul 21, 2006 399 400 401  // compute eigenvalues for ( int i = 0; i < a.n(); i++ ) e[i] = 0.0;  Francois Gygi committed Oct 19, 2007 402 403 404 405 406 407 408 409 410 411 412 413  for ( int jblock = 0; jblock < a.nblocks(); jblock++ ) for ( int y = 0; y < a.nbs(jblock); y++ ) { // j is the global column index int j = a.j(jblock,y); int jjj = y + jblock*a.nb(); const double *ap = a.valptr(jjj*a.mloc()); const double *up = u.valptr(jjj*u.mloc()); int mloc = a.mloc(); int one = 1; e[j] = ddot(&mloc,ap,&one,up,&one); }  Francois Gygi committed Jul 21, 2006 414 415 416 417  // e now contains the partial sums of the diagonal elements of a ctxt.dsum(a.n(),1,&e[0],a.n()); // e contains the eigenvalues of a // u contains the eigenvectors of a  Francois Gygi committed Oct 19, 2007 418   Francois Gygi committed Jul 21, 2006 419 420  return nsweep; }