////////////////////////////////////////////////////////////////////////////////
//
// Copyright (c) 2008 The Regents of the University of California
//
// This file is part of Qbox
//
// Qbox is distributed under the terms of the GNU General Public License
// as published by the Free Software Foundation, either version 2 of
// the License, or (at your option) any later version.
// See the file COPYING in the root directory of this distribution
// or .
//
////////////////////////////////////////////////////////////////////////////////
//
// Matrix.h
//
////////////////////////////////////////////////////////////////////////////////
#ifndef MATRIX_H
#define MATRIX_H
#define MATRIX_DEF_BLOCK_SIZE 64
#include "Context.h"
#include
#include
#include // memcpy
class ComplexMatrix;
class DoubleMatrix
{
private:
Context ctxt_;
int ictxt_;
int lld_; // leading dimension of local matrix
int m_, n_; // size of global matrix
int mb_, nb_; // size of blocks
int mloc_, nloc_; // size of local array
int size_; // size of local array;
int nprow_, npcol_; // number of process rows and cols in context
int myrow_, mycol_; // position of my process in the process grid
int mblocks_, nblocks_; // number of local blocks
int desc_[9];
bool active_;
bool m_incomplete_, n_incomplete_; // this process has an incomplete block
bool reference_; // object was created using the copy constructor
double* val;
public:
double* valptr(int i=0) { return &val[i]; }
const double* cvalptr(int i=0) const { return &val[i]; }
double& operator[] (int i) { return val[i]; }
const double& operator[] (int i) const { return val[i]; }
const Context& context(void) const { return ctxt_; }
int ictxt(void) const { return ictxt_; }
int lld(void) const { return lld_; }
int m(void) const { return m_; } // size of global matrix
int n(void) const { return n_; } // size of global matrix
int mb(void) const { return mb_; } // size of blocks
int nb(void) const { return nb_; } // size of blocks
int mloc(void) const { return mloc_; } // size of local array
int nloc(void) const { return nloc_; } // size of local array
int size(void) const { return size_; } // size of local array
int localsize(void) const { return mloc_*nloc_; } // local size of val
double memsize(void) const { return (double)m_ * (double)n_ * sizeof(double); }
double localmemsize(void) const
{ return (double) mloc_ * (double) nloc_ * sizeof(double); }
const int* desc(void) const { return &desc_[0]; }
// local block size of block (l,m)
int mbs(int l) const
{
// if l is the last block and this process holds an incomplete
// block, then the size is m_%mb_. Otherwise, it is mb_.
return ( (l==mblocks_-1) && ( m_incomplete_ ) ) ? m_%mb_ : mb_;
}
int nbs(int m) const
{
// if m is the last block and this process holds an incomplete
// block, then the size is n_%nb_. Otherwise, it is nb_.
return ( (m==nblocks_-1) && ( n_incomplete_ ) ) ? n_%nb_ : nb_;
}
// number of local blocks
int mblocks(void) const { return mblocks_; }
int nblocks(void) const { return nblocks_; }
// functions to compute local indices from global indices
// index of blocks: element (i,j) is at position (x,y)
// in local block (l,m) of process (pr,pc)
int l(int i) const { return i/(nprow_*mb_); }
int x(int i) const { return i % mb_; }
int pr(int i) const { return (i/mb_) % nprow_; }
int m(int j) const { return j/(npcol_*nb_); }
int y(int j) const { return j % nb_; }
int pc(int j) const { return (j/nb_) % npcol_; }
// global indices:
// (i,j) is the global index of element (x,y) of block (l,m)
int i(int l, int x) const { return (l * nprow_ + myrow_) * mb_ + x; }
int j(int m, int y) const { return (m * npcol_ + mycol_) * nb_ + y; }
int iglobal(int ilocal) const
{ return mb_*(nprow_*(ilocal/mb_)+myrow_)+ilocal%mb_; }
int jglobal(int jlocal) const
{ return nb_*(npcol_*(jlocal/nb_)+mycol_)+jlocal%nb_; }
int iglobal(int iprow, int ilocal) const
{ return mb_*(nprow_*(ilocal/mb_)+iprow)+ilocal%mb_; }
int jglobal(int jpcol, int jlocal) const
{ return nb_*(npcol_*(jlocal/nb_)+jpcol)+jlocal%nb_; }
// store element a(ii,jj) (where ii,jj are global indices)
// in array val:
// int iii = l(ii) * mb_ + x(ii);
// int jjj = m(jj) * nb_ + y(jj);
// val[iii+mloc_*jjj] = a_ij;
// active flag: the matrix has elements on this process
bool active(void) const { return active_; }
void init_size(int m, int n, int mb = MATRIX_DEF_BLOCK_SIZE,
int nb = MATRIX_DEF_BLOCK_SIZE);
void resize(int m, int n, int mb = MATRIX_DEF_BLOCK_SIZE,
int nb = MATRIX_DEF_BLOCK_SIZE)
{
const int old_size = size_;
init_size(m,n,mb,nb);
if ( size_ == old_size ) return;
delete[] val;
val = new double[size_];
clear();
}
void print(std::ostream& os) const;
explicit DoubleMatrix(const Context& ctxt) : ctxt_(ctxt),
m_(0), n_(0), mb_(0), nb_(0), mloc_(0), nloc_(0),
size_(0), reference_(false), val(0) {}
// Construct a DoubleMatrix of dimensions m,n
explicit DoubleMatrix(const Context& ctxt, int m, int n,
int mb=MATRIX_DEF_BLOCK_SIZE,
int nb=MATRIX_DEF_BLOCK_SIZE) : ctxt_(ctxt),
m_(0), n_(0), mb_(0), nb_(0), size_(0), reference_(false), val(0)
{
resize(m,n,mb,nb);
}
// copy constructor: create a separate copy of rhs
explicit DoubleMatrix(const DoubleMatrix& rhs) : ctxt_(rhs.context()),
reference_(false)
{
init_size(rhs.m(),rhs.n(),rhs.mb(),rhs.nb());
val = new double[size_];
memcpy(val, rhs.val, size_*sizeof(double));
}
// reference constructor create a proxy for a ComplexMatrix rhs
explicit DoubleMatrix(ComplexMatrix& rhs);
explicit DoubleMatrix(const ComplexMatrix& rhs);
~DoubleMatrix(void)
{
if ( !reference_ ) delete[] val;
}
DoubleMatrix& operator=(const DoubleMatrix& a);
DoubleMatrix& operator+=(const DoubleMatrix& a);
DoubleMatrix& operator-=(const DoubleMatrix& a);
DoubleMatrix& operator*=(double a);
void matgather(double *a, int lda) const;
void initdiag(const double* const a); // initialize diagonal from a[]
void init(const double* const a, int lda);
void set(char uplo, double x);
void identity(void);
void clear(void);
double dot(const DoubleMatrix &a) const;
void axpy(double alpha, const DoubleMatrix &a);
void scal(double alpha);
double nrm2(void) const;
double asum(void) const;
double amax(void) const;
double trace(void) const;
void symmetrize(char uplo);
// rank-1 update: *this += alpha * x(kx) * (y(ky))^T
// where x(kx) is row kx of x, and y(ky) is row ky of y
void ger(double alpha, const DoubleMatrix& x, int kx,
const DoubleMatrix& y, int ky);
// symmetric rank-1 update
void syr(char uplo, double alpha, const DoubleMatrix& x,
int ix, char rowcol);
// get submatrix A(ia:ia+m,ja:ja+n) of A
void getsub(const DoubleMatrix& a,int m,int n,int ia,int ja);
// get submatrix A(ia:ia+m,ja:ja+n) of A and store in
// this(idest:idest+m,jdest:jdest+n)
void getsub(const DoubleMatrix& a,int m,int n,int isrc,int jsrc,
int idest, int jdest);
// matrix * matrix
// this = alpha*op(A)*op(B)+beta*this
void gemm(char transa, char transb,
double alpha, const DoubleMatrix& a,
const DoubleMatrix& b, double beta);
// symmetric_matrix * matrix
// *this = alpha * A * B + beta * this
void symm(char side, char uplo,
double alpha, const DoubleMatrix& a,
const DoubleMatrix& b, double beta);
// symmetric rank k update
// this = beta * this + alpha * A * A^T (trans=='n')
// this = beta * this + alpha * A^T * A (trans=='t')
void syrk(char uplo, char trans,
double alpha, const DoubleMatrix& a, double beta);
// matrix transpose
// this = alpha * transpose(A) + beta * this
void transpose(double alpha, const DoubleMatrix& a, double beta);
void transpose(const DoubleMatrix& a);
void trmm(char side, char uplo, char trans, char diag,
double alpha, const DoubleMatrix& a);
// solve triangular system
void trsm(char side, char uplo, char trans, char diag,
double alpha, const DoubleMatrix& a);
void trtrs(char uplo, char trans, char diag, DoubleMatrix& b) const;
// Cholesky decomposition of a symmetric matrix
void potrf(char uplo);
// Inverse of a symmetric matrix from Cholesky factor
void potri(char uplo);
// Polar decomposition, tolerance ||I-X^T*X||& ipiv);
// compute inverse of a square matrix
void inverse(void);
// compute inverse and determinant of a square matrix
double inverse_det(void);
// compute determinant of a square matrix in LU form
double det_from_lu(std::valarray ipiv);
// compute inverse of a square matrix in LU form
void inverse_from_lu(std::valarray& ipiv);
// Inverse of triangular matrix
void trtri(char uplo,char diag);
double pocon(char) const;
void sygst(int, char, const DoubleMatrix&);
// compute eigenvalues and eigenvectors of symmetric matrix *this
void syev(char uplo, std::valarray& w, DoubleMatrix& z);
// compute eigenvalues (only) of symmetric matrix *this
void syev(char uplo, std::valarray& w);
// compute eigenvalues and eigenvectors of symmetric matrix *this
// using the divide and conquer method of Tisseur and Dongarra
void syevd(char uplo, std::valarray& w, DoubleMatrix& z);
// compute eigenvalues (only) of symmetric matrix *this
// using the divide and conquer method of Tisseur and Dongarra
void syevd(char uplo, std::valarray& w);
// compute eigenvalues and eigenvectors of symmetric matrix *this
// using the expert driver
void syevx(char uplo, std::valarray& w, DoubleMatrix& z,
double abstol);
// permute the coeff of the matrix *this
void lapiv(char direc, char rowcol, const int *ipiv);
// signature of a permutation returned by lu
int signature(std::valarray ipiv);
// compute eigenvalues (only) of symmetric matrix *this
// using the divide and conquer method of Tisseur and Dongarra
//void syevx(char uplo, std::valarray& w);
};
std::ostream& operator << ( std::ostream& os, const DoubleMatrix& a );
class ComplexMatrix
{
private:
Context ctxt_;
int ictxt_;
int lld_; // leading dimension of local matrix
int m_, n_; // size of global matrix
int mb_, nb_; // size of blocks
int mloc_, nloc_; // size of local array
int size_; // size of local array;
int nprow_, npcol_; // number of process rows and cols in context
int myrow_, mycol_; // position of my process in the process grid
int mblocks_, nblocks_; // number of local blocks
int desc_[9];
bool active_;
bool m_incomplete_, n_incomplete_; // this process has an incomplete block
bool reference_; // object was created using the copy constructor
std::complex* val;
public:
std::complex* valptr(int i=0) { return &val[i]; }
const std::complex* cvalptr(int i=0) const { return &val[i]; }
std::complex& operator[] (int i) { return val[i]; }
const std::complex& operator[] (int i) const { return val[i]; }
const Context& context(void) const { return ctxt_; }
int ictxt(void) const { return ictxt_; }
int lld(void) const { return lld_; }
int m(void) const { return m_; } // size of global matrix
int n(void) const { return n_; } // size of global matrix
int mb(void) const { return mb_; } // size of blocks
int nb(void) const { return nb_; } // size of blocks
int mloc(void) const { return mloc_; } // size of local array
int nloc(void) const { return nloc_; } // size of local array
int size(void) const { return size_; } // size of local array
int localsize(void) const { return mloc_*nloc_; } // local size of val
double memsize(void) const
{ return (double) m_ * (double) n_ * sizeof(std::complex); }
double localmemsize(void) const
{ return (double) mloc_ * (double) nloc_ * sizeof(std::complex); }
const int* desc(void) const { return &desc_[0]; }
// local block size of block (l,m)
int mbs(int l) const
{
// if l is the last block and this process holds an incomplete
// block, then the size is m_%mb_. Otherwise, it is mb_.
return ( (l==mblocks_-1) && ( m_incomplete_ ) ) ? m_%mb_ : mb_;
}
int nbs(int m) const
{
// if m is the last block and this process holds an incomplete
// block, then the size is n_%nb_. Otherwise, it is nb_.
return ( (m==nblocks_-1) && ( n_incomplete_ ) ) ? n_%nb_ : nb_;
}
// number of local blocks
int mblocks(void) const { return mblocks_; }
int nblocks(void) const { return nblocks_; }
// functions to compute local indices from global indices
// index of blocks: element (i,j) is at position (x,y)
// in local block (l,m) of process (pr,pc)
int l(int i) const { return i/(nprow_*mb_); }
int x(int i) const { return i % mb_; }
int pr(int i) const { return (i/mb_) % nprow_; }
int m(int j) const { return j/(npcol_*nb_); }
int y(int j) const { return j % nb_; }
int pc(int j) const { return (j/nb_) % npcol_; }
// global indices:
// (i,j) is the global index of element (x,y) of block (l,m)
int i(int l, int x) const { return (l * nprow_ + myrow_) * mb_ + x; }
int j(int m, int y) const { return (m * npcol_ + mycol_) * nb_ + y; }
int iglobal(int ilocal) const
{ return mb_*(nprow_*(ilocal/mb_)+myrow_)+ilocal%mb_; }
int jglobal(int jlocal) const
{ return nb_*(npcol_*(jlocal/nb_)+mycol_)+jlocal%nb_; }
int iglobal(int iprow, int ilocal) const
{ return mb_*(nprow_*(ilocal/mb_)+iprow)+ilocal%mb_; }
int jglobal(int jpcol, int jlocal) const
{ return nb_*(npcol_*(jlocal/nb_)+jpcol)+jlocal%nb_; }
// store element a(ii,jj) (where ii,jj are global indices)
// in array val:
// int iii = l(ii) * mb_ + x(ii);
// int jjj = m(jj) * nb_ + y(jj);
// val[iii+mloc_*jjj] = a_ij;
// active flag: the matrix has elements on this process
bool active(void) const { return active_; }
void init_size(int m, int n, int mb = MATRIX_DEF_BLOCK_SIZE,
int nb = MATRIX_DEF_BLOCK_SIZE);
void resize(int m, int n, int mb = MATRIX_DEF_BLOCK_SIZE,
int nb = MATRIX_DEF_BLOCK_SIZE)
{
const int old_size = size_;
init_size(m,n,mb,nb);
if ( size_ == old_size ) return;
delete[] val;
val = new std::complex[size_];
clear();
}
void print(std::ostream& os) const;
explicit ComplexMatrix(const Context& ctxt) : ctxt_(ctxt),
m_(0), n_(0), mb_(0), nb_(0), mloc_(0), nloc_(0),
size_(0), reference_(false), val(0) {}
// Construct a ComplexMatrix of dimensions m,n
explicit ComplexMatrix(const Context& ctxt, int m, int n,
int mb=MATRIX_DEF_BLOCK_SIZE,
int nb=MATRIX_DEF_BLOCK_SIZE) : ctxt_(ctxt),
m_(0), n_(0), mb_(0), nb_(0), size_(0), reference_(false), val(0)
{
resize(m,n,mb,nb);
}
// copy constructor: create a separate copy of rhs
explicit ComplexMatrix(const ComplexMatrix& rhs) : ctxt_(rhs.context()),
reference_(false)
{
init_size(rhs.m(),rhs.n(),rhs.mb(),rhs.nb());
val = new std::complex[size_];
memcpy(val, rhs.val, size_*sizeof(std::complex));
}
// reference constructor: create a proxy for a DoubleMatrix rhs
explicit ComplexMatrix(DoubleMatrix& rhs);
explicit ComplexMatrix(const DoubleMatrix& rhs);
~ComplexMatrix(void)
{
if ( !reference_ ) delete[] val;
}
ComplexMatrix& operator=(const ComplexMatrix& a);
ComplexMatrix& operator+=(const ComplexMatrix& a);
ComplexMatrix& operator-=(const ComplexMatrix& a);
ComplexMatrix& operator*=(double a);
ComplexMatrix& operator*=(std::complex a);
void matgather(std::complex *a, int lda) const;
void initdiag(const std::complex* const a); // initialize diagonal
void init(const std::complex* const a, int lda);
void set(char uplo, std::complex x);
void identity(void);
void clear(void);
std::complex dot(const ComplexMatrix &a) const; // tr A^H * A
std::complex dotu(const ComplexMatrix &a) const; // tr A^T * A
void axpy(std::complex alpha, const ComplexMatrix &a);
void axpy(double alpha, const ComplexMatrix &a);
void scal(std::complex alpha);
void scal(double alpha);
double nrm2(void) const;
double asum(void) const;
double amax(void) const;
std::complex trace(void) const;
void symmetrize(char uplo);
// rank-1 update: *this += alpha * x(kx) * (y(ky))^T
// where x(kx) is row kx of x, and y(ky) is row ky of y
void ger(std::complex alpha, const ComplexMatrix& x,int kx,
const ComplexMatrix& y,int ky);
void geru(std::complex alpha, const ComplexMatrix& x,int kx,
const ComplexMatrix& y,int ky);
void gerc(std::complex alpha, const ComplexMatrix& x,int kx,
const ComplexMatrix& y,int ky);
// symmetric rank-1 update
void her(char uplo, std::complex alpha,
const ComplexMatrix& x, int ix, char rowcol);
// get submatrix A(ia:ia+m,ja:ja+n) of A
void getsub(const ComplexMatrix& a, int m, int n, int ia, int ja);
// get submatrix A(ia:ia+m,ja:ja+n) of A and store in
// this(idest:idest+m,jdest:jdest+n)
void getsub(const ComplexMatrix& a,int m,int n,int isrc,int jsrc,
int idest, int jdest);
// matrix * matrix
// this = alpha*op(A)*op(B)+beta*this
void gemm(char transa, char transb,
std::complex alpha, const ComplexMatrix& a,
const ComplexMatrix& b, std::complex beta);
// hermitian_matrix * matrix
// *this = alpha * A * B + beta * this
void hemm(char side, char uplo,
std::complex alpha, const ComplexMatrix& a,
const ComplexMatrix& b, std::complex beta);
// complex_symmetric_matrix * matrix
// *this = alpha * A * B + beta * this
void symm(char side, char uplo,
std::complex alpha, const ComplexMatrix& a,
const ComplexMatrix& b, std::complex beta);
// hermitian rank k update
// this = beta * this + alpha * A * A^H (trans=='n')
// this = beta * this + alpha * A^H * A (trans=='c')
void herk(char uplo, char trans,
double alpha, const ComplexMatrix& a, double beta);
// matrix transpose
// this = alpha * transpose(A) + beta * this
void transpose(std::complex alpha, const ComplexMatrix& a,
std::complex beta);
void transpose(const ComplexMatrix& a);
void trmm(char side, char uplo, char trans, char diag,
std::complex alpha, const ComplexMatrix& a);
void trsm(char side, char uplo, char trans, char diag,
std::complex alpha, const ComplexMatrix& a);
void trtrs(char uplo, char trans, char diag, ComplexMatrix& b) const;
// Cholesky decomposition of a hermitian matrix
void potrf(char uplo);
// Inverse of a symmetric matrix from Cholesky factor
void potri(char uplo);
// LU decomposition
void lu(std::valarray& ipiv);
// compute inverse of a square matrix
void inverse(void);
// compute inverse and determinant of a square matrix
std::complex inverse_det(void);
// compute determinant of a square matrix in LU form
std::complex det_from_lu(std::valarray ipiv);
// compute inverse of a square matrix in LU form
void inverse_from_lu(std::valarray& ipiv);
// Inverse of triangular matrix
void trtri(char uplo,char diag);
double pocon(char) const;
// compute eigenvalues and eigenvectors of hermitian matrix *this
void heev(char uplo, std::valarray& w, ComplexMatrix& z);
// compute eigenvalues (only) of hermitian matrix *this
void heev(char uplo, std::valarray& w);
// eigenvalues/eigenvectors using the divide and conquer method
// compute eigenvalues and eigenvectors of hermitian matrix *this
void heevd(char uplo, std::valarray& w, ComplexMatrix& z);
// compute eigenvalues (only) of hermitian matrix *this
void heevd(char uplo, std::valarray& w);
// permute the coeff of the matrix *this
void lapiv(char direc, char rowcol, const int *ipiv);
// signature of a permutation returned by lu
int signature(std::valarray ipiv);
};
std::ostream& operator << ( std::ostream& os, const ComplexMatrix& a );
#endif