diff --git a/build/bgq-anl-omp.mk b/build/bgq-anl-omp.mk new file mode 100644 index 0000000..c24fdd4 --- /dev/null +++ b/build/bgq-anl-omp.mk @@ -0,0 +1,90 @@ +#------------------------------------------------------------------------------- +# +# bgq.mk +# +#------------------------------------------------------------------------------- +# + PLT=BGQ +#------------------------------------------------------------------------------- +# use the following .soft environment +####~/.soft### +# @default +# +mpiwrapper-xl + + CXX=mpic++ + LD=$(CXX) + + PLTFLAGS += -DUSE_MPI -DSCALAPACK + PLTFLAGS += -D__linux__ -DPLT_BIG_ENDIAN + PLTFLAGS += -DUSE_XERCES + PLTFLAGS += -D_LARGEFILE_SOURCE + PLTFLAGS += -D_FILE_OFFSET_BITS=64 + PLTFLAGS += -DMPICH_IGNORE_CXX_SEEK + PLTFLAGS += -DUSE_MASSV + +# FFT must be FFTW2, FFTW3, ESSL or NOLIB + FFT=ESSL + +ifeq ($(FFT),FFTW2) + PLTFLAGS += -DUSE_FFTW2 + PLTFLAGS += -DUSE_DFFTW + PLTFLAGS += -DFFTWMEASURE + FFTWDIR=/soft/libraries/alcf/current/xl/FFTW2 + FFTWINCLUDEDIR=$(FFTWDIR)/include + FFTWLIBDIR=$(FFTWDIR)/lib + INCLUDE += -I$(FFTWINCLUDEDIR) + LIBPATH += -L$(FFTWLIBDIR) + LIBS += -ldfftw +endif + +ifeq ($(FFT),FFTW3) + PLTFLAGS += -DUSE_FFTW3 + PLTFLAGS += -DFFTWMEASURE + FFTWDIR=/soft/libraries/alcf/current/xl/FFTW3 + FFTWINCLUDEDIR=$(FFTWDIR)/include + FFTWLIBDIR=$(FFTWDIR)/lib + INCLUDE += -I$(FFTWINCLUDEDIR) + LIBPATH += -L$(FFTWLIBDIR) + LIBS += -lfftw3 +endif + +ifeq ($(FFT),ESSL) + PLTFLAGS += -DUSE_ESSL_FFT + #PLTFLAGS += -DUSE_ESSL_2DFFT +endif + +ifeq ($(FFT),NOLIB) + PLTFLAGS += -DFFT_NOLIB +endif + + XERCESCDIR=$(HOME)/software/xerces/xerces-c-src_2_8_0 + XERCESCLIBDIR=$(XERCESCDIR)/lib + + SCALAPACKLIBDIR=/soft/libraries/alcf/current/xl/SCALAPACK/lib + SCALAPACKLIB=-lscalapack + LAPACKLIBDIR=/soft/libraries/alcf/current/xl/LAPACK/lib + LAPACKLIB=-llapack + BLASLIBDIR=/soft/libraries/essl/current/lib64 + BLASLIB=-lesslsmpbg + + INCLUDE += -I$(XERCESCDIR)/include + + CXXFLAGS= -g -O3 -qsmp=omp -qarch=qp -qtune=qp \ + -D$(PLT) $(INCLUDE) $(PLTFLAGS) $(DFLAGS) + + LIBPATH += -L$(XERCESCLIBDIR) -L$(SCALAPACKLIBDIR) \ + -L$(LAPACKLIBDIR) -L$(BLASLIBDIR) \ + -L${IBM_FCMP}/bglib64 \ + -L${IBM_MAIN_DIR}/xlmass/bg/7.3/bglib64 \ + -L${IBM_MAIN_DIR}/xlf/bg/14.1/bglib64 \ + -L${IBM_MAIN_DIR}/xlsmp/bg/3.1/bglib64 \ + -L$(IBM_MAIN_DIR)/xlf/bg/14.1/bglib64 + + LIBS += $(PLIBS) $(SCALAPACKLIB) $(LAPACKLIB) $(BLASLIB) \ + -lmass -lmassv -lxerces-c \ + -lxlf90_r -lxlfmath -lxl \ + -lxlsmp -lxlomp_ser \ + -lpthread -lgomp + + LDFLAGS = $(LIBPATH) $(LIBS) +#------------------------------------------------------------------------------- diff --git a/build/centos6.mk b/build/centos6.mk new file mode 100644 index 0000000..628c89b --- /dev/null +++ b/build/centos6.mk @@ -0,0 +1,83 @@ +#------------------------------------------------------------------------------- +# +# Copyright (c) 2008-2015 The Regents of the University of California +# +# This file is part of Qbox +# +# Qbox is distributed under the terms of the GNU General Public License +# as published by the Free Software Foundation, either version 2 of +# the License, or (at your option) any later version. +# See the file COPYING in the root directory of this distribution +# or . +# +#------------------------------------------------------------------------------- +# +# centos6.mk +# +#------------------------------------------------------------------------------- +# Prerequisites: +# On a Centos-6.6 system, install the following packages: +# yum install xerces-c xerces-c-devel +# yum install openmpi openmpi-devel +# yum install lapack lapack-devel +# yum install fftw fftw-devel +# yum install scalapack-common scalapack-openmpi \ +# scalapack-openmpi-devel scalapack-openmpi-static +# yum install libuuid libuuid-devel +# +#------------------------------------------------------------------------------- + PLT=Linux_x8664 +#------------------------------------------------------------------------------- + + PLTOBJECTS = readTSC.o + SVN_VER :=$(shell svnversion -n) + DFLAGS += -DSVN_VERSION='"$(SVN_VER)"' + + CXX=mpicxx + LD=$(CXX) + + PLTFLAGS += -DIA32 -D_LARGEFILE_SOURCE \ + -D_FILE_OFFSET_BITS=64 -DUSE_MPI -DSCALAPACK -DADD_ \ + -DAPP_NO_THREADS -DXML_USE_NO_THREADS -DUSE_XERCES \ + -DXERCESC_3 -DMPICH_IGNORE_CXX_SEEK -DUSE_UUID + +# FFT must be FFTW2, FFTW3, ESSL or NOLIB + FFT=FFTW3 + +ifeq ($(FFT),FFTW2) + PLTFLAGS += -DUSE_FFTW2 + PLTFLAGS += -DFFTWMEASURE + FFTWDIR=$(HOME)/software/fftw/Linux_x8664/fftw-2.1.5/fftw + FFTWINCLUDEDIR=$(FFTWDIR) + FFTWLIBDIR=$(FFTWDIR)/.libs + INCLUDE += -I$(FFTWINCLUDEDIR) + LIBPATH += -L$(FFTWLIBDIR) + LIBS += -lfftw +endif + +ifeq ($(FFT),FFTW3) + PLTFLAGS += -DUSE_FFTW3 + PLTFLAGS += -DFFTWMEASURE +#PLTFLAGS += -DFFTW_TRANSPOSE + PLTFLAGS += -DFFTW3_2D +#FFTWDIR=$(HOME)/software/fftw/fftw-3.3.4 +#FFTWINCLUDEDIR=$(FFTWDIR)/api +#FFTWLIBDIR=$(FFTWDIR)/.libs +#INCLUDE += -I$(FFTWINCLUDEDIR) +#LIBPATH += -L$(FFTWLIBDIR) + LIBS += -lfftw3 +endif + +ifeq ($(FFT),ESSL) +$(error ESSL library not available) +endif + +ifeq ($(FFT),NOLIB) + PLTFLAGS += -DFFT_NOLIB +endif + + CXXFLAGS= -g -O3 -Wunused -D$(PLT) $(INCLUDE) $(PLTFLAGS) $(DFLAGS) + LIBS += -lpthread -lxerces-c -lscalapack -llapack -lblas -luuid + LDFLAGS = $(LIBPATH) $(LIBS) + +#------------------------------------------------------------------------------- diff --git a/build/gcc_atlas.mk b/build/gcc_atlas.mk new file mode 100644 index 0000000..6d3c84f --- /dev/null +++ b/build/gcc_atlas.mk @@ -0,0 +1,82 @@ +#------------------------------------------------------------------------------- +# +# Copyright (c) 2008 The Regents of the University of California +# +# This file is part of Qbox +# +# Qbox is distributed under the terms of the GNU General Public License +# as published by the Free Software Foundation, either version 2 of +# the License, or (at your option) any later version. +# See the file COPYING in the root directory of this distribution +# or . +# +#------------------------------------------------------------------------------- +# +# gcc_atlas.mk +# +#------------------------------------------------------------------------------- + PLT=Linux_x8664 +#------------------------------------------------------------------------------- + SCALAPACKDIR = $(HOME)/software/scalapack/scalapack-2.0.2 + XERCESCDIR=$(HOME)/software/xerces/xerces-c-src_2_8_0 + ATLASDIR=/usr/lib64/atlas + + PLTOBJECTS = readTSC.o + SVN_VER :=$(shell svnversion -n) + DFLAGS += -DSVN_VERSION='"$(SVN_VER)"' + + CXX=mpicxx + LD=$(CXX) + + PLTFLAGS += -DIA32 -D_LARGEFILE_SOURCE \ + -D_FILE_OFFSET_BITS=64 -DUSE_MPI -DSCALAPACK -DADD_ \ + -DAPP_NO_THREADS -DXML_USE_NO_THREADS -DUSE_XERCES \ + -DMPICH_IGNORE_CXX_SEEK + +# FFT must be FFTW2, FFTW3, ESSL or NOLIB + FFT=FFTW2 + +ifeq ($(FFT),FFTW2) + PLTFLAGS += -DUSE_FFTW2 + PLTFLAGS += -DFFTWMEASURE + FFTWDIR=$(HOME)/software/fftw/Linux_x8664/fftw-2.1.5/fftw + FFTWINCLUDEDIR=$(FFTWDIR) + FFTWLIBDIR=$(FFTWDIR)/.libs + INCLUDE += -I$(FFTWINCLUDEDIR) + LIBPATH += -L$(FFTWLIBDIR) + LIBS += -lfftw +endif + +ifeq ($(FFT),FFTW3) + PLTFLAGS += -DUSE_FFTW3 + PLTFLAGS += -DFFTWMEASURE +#PLTFLAGS += -DFFTW_TRANSPOSE + PLTFLAGS += -DFFTW3_2D + FFTWDIR=$(HOME)/software/fftw/fftw-3.3.4 + FFTWINCLUDEDIR=$(FFTWDIR)/api + FFTWLIBDIR=$(FFTWDIR)/.libs + INCLUDE += -I$(FFTWINCLUDEDIR) + LIBPATH += -L$(FFTWLIBDIR) + LIBS += -lfftw3 +endif + +ifeq ($(FFT),ESSL) +$(error ESSL library not available) +endif + +ifeq ($(FFT),NOLIB) + PLTFLAGS += -DFFT_NOLIB +endif + + INCLUDE += -I$(XERCESCDIR)/include + + CXXFLAGS= -g -O3 -Wunused -D$(PLT) $(INCLUDE) $(PLTFLAGS) $(DFLAGS) + + LIBPATH += -L$(XERCESCDIR)/lib \ + -L$(ATLASDIR) -L$(SCALAPACKDIR) -L/usr/lib64 + + LIBS += -lpthread -lxerces-c -lscalapack -llapack -lf77blas -latlas + + LDFLAGS = $(LIBPATH) $(LIBS) + +#------------------------------------------------------------------------------- diff --git a/build/icc_mkl.mk b/build/icc_mkl.mk new file mode 100644 index 0000000..f08db60 --- /dev/null +++ b/build/icc_mkl.mk @@ -0,0 +1,75 @@ +#------------------------------------------------------------------------------- +# +# icc_mkl.mk +# +#------------------------------------------------------------------------------- +# + PLT=x86_64 +#------------------------------------------------------------------------------- + MPIDIR=/opt/openmpi + XERCESCDIR=/share/apps/xerces/xerces-c-src_2_8_0 + PLTOBJECTS = readTSC.o + INCLUDE = -I$(MKLROOT)/include + + CXX=icc + LD=mpicxx + + PLTFLAGS += -DIA32 -D_LARGEFILE_SOURCE \ + -D_FILE_OFFSET_BITS=64 -DUSE_MPI -DSCALAPACK -DADD_ \ + -DAPP_NO_THREADS -DXML_USE_NO_THREADS -DUSE_XERCES + +# FFT must be FFTW2, FFTW3, ESSL or NOLIB + FFT=FFTW2 + +ifeq ($(FFT),FFTW2) + PLTFLAGS += -DUSE_FFTW2 + PLTFLAGS += -DFFTWMEASURE + FFTWDIR=/share/apps/fftw/fftw-2.1.5/fftw + FFTWINCLUDEDIR=$(FFTWDIR) + FFTWLIBDIR=$(FFTWDIR)/.libs + INCLUDE += -I$(FFTWINCLUDEDIR) + LIBPATH += -L$(FFTWLIBDIR) + LIBS += -lfftw +endif + +ifeq ($(FFT),FFTW3) + PLTFLAGS += -DUSE_FFTW3 + PLTFLAGS += -DFFTWMEASURE +#PLTFLAGS += -DFFTW_TRANSPOSE + PLTFLAGS += -DFFTW3_2D + FFTWDIR=$(HOME)/software/fftw/fftw-3.3.4 + FFTWINCLUDEDIR=$(FFTWDIR)/api + FFTWLIBDIR=$(FFTWDIR)/.libs + INCLUDE += -I$(FFTWINCLUDEDIR) + LIBPATH += -L$(FFTWLIBDIR) + LIBS += -lfftw3 +endif + +ifeq ($(FFT),ESSL) +$(error ESSL library not available) +endif + +ifeq ($(FFT),NOLIB) + PLTFLAGS += -DFFT_NOLIB +endif + + + INCLUDE += -I$(MPIDIR)/include -I$(XERCESCDIR)/include + + CXXFLAGS= -g -O3 -vec-report1 -D$(PLT) $(INCLUDE) $(PLTFLAGS) $(DFLAGS) + + LIBPATH += -L$(MPIDIR)/lib64 \ + -L$(MKLDIR)/lib/intel64 \ + -L$(XERCESCDIR)/lib + + LIBS += $(PLIBS) \ + -lmkl_intel_lp64 \ + -lmkl_lapack95_lp64 -lmkl_sequential -lmkl_core \ + -lirc -lifcore -lsvml \ + -luuid $(XERCESCDIR)/lib/libxerces-c.a -lpthread + +# Parallel libraries + PLIBS = -lmkl_scalapack_lp64 -lmkl_blacs_openmpi_lp64 + + LDFLAGS = $(LIBPATH) $(LIBS) +#------------------------------------------------------------------------------- diff --git a/build/icc_mkl_omp.mk b/build/icc_mkl_omp.mk new file mode 100644 index 0000000..c650626 --- /dev/null +++ b/build/icc_mkl_omp.mk @@ -0,0 +1,73 @@ +#------------------------------------------------------------------------------- +# +# icc_mkl_omp.mk +# +#------------------------------------------------------------------------------- +# + PLT=x86_64 +#------------------------------------------------------------------------------- + MPIDIR=/opt/openmpi + XERCESCDIR=/share/apps/xerces/xerces-c-src_2_8_0 + PLTOBJECTS = readTSC.o + + CXX=icc + LD=mpicxx + + PLTFLAGS += -DIA32 -D_LARGEFILE_SOURCE \ + -D_FILE_OFFSET_BITS=64 -DUSE_MPI -DSCALAPACK -DADD_ \ + -DAPP_NO_THREADS -DXML_USE_NO_THREADS -DUSE_XERCES + +# FFT must be FFTW2, FFTW3, ESSL or NOLIB + FFT=FFTW2 + +ifeq ($(FFT),FFTW2) + PLTFLAGS += -DUSE_FFTW2 + PLTFLAGS += -DFFTWMEASURE + FFTWDIR=/share/apps/fftw/fftw-2.1.5/fftw + FFTWINCLUDEDIR=$(FFTWDIR) + FFTWLIBDIR=$(FFTWDIR)/.libs + INCLUDE += -I$(FFTWINCLUDEDIR) + LIBPATH += -L$(FFTWLIBDIR) + LIBS += -lfftw +endif + +ifeq ($(FFT),FFTW3) + PLTFLAGS += -DUSE_FFTW3 + PLTFLAGS += -DFFTWMEASURE +#PLTFLAGS += -DFFTW_TRANSPOSE + PLTFLAGS += -DFFTW3_2D + FFTWDIR=$(HOME)/software/fftw/fftw-3.3.4 + FFTWINCLUDEDIR=$(FFTWDIR)/api + FFTWLIBDIR=$(FFTWDIR)/.libs + INCLUDE += -I$(FFTWINCLUDEDIR) + LIBPATH += -L$(FFTWLIBDIR) + LIBS += -lfftw3 +endif + +ifeq ($(FFT),ESSL) +$(error ESSL library not available) +endif + +ifeq ($(FFT),NOLIB) + PLTFLAGS += -DFFT_NOLIB +endif + + INCLUDE += -I$(MPIDIR)/include -I$(XERCESCDIR)/include + + CXXFLAGS= -g -openmp -O3 -vec-report1 -D$(PLT) \ + $(INCLUDE) $(PLTFLAGS) $(DFLAGS) + + LIBPATH += -L$(MPIDIR)/lib64 \ + -L$(MKLDIR)/lib/intel64 \ + -L$(XERCESCDIR)/lib + + LIBS += $(PLIBS) \ + -lmkl_intel_lp64 -lmkl_core -lmkl_intel_thread \ + -lifcoremt -lirc -lsvml \ + -luuid $(XERCESCDIR)/lib/libxerces-c.a -liomp5 -lpthread + +# Parallel libraries + PLIBS = -lmkl_scalapack_lp64 -lmkl_blacs_openmpi_lp64 + + LDFLAGS = $(LIBPATH) $(LIBS) +#------------------------------------------------------------------------------- diff --git a/src/AlphaPBE0.h b/src/AlphaPBE0.h new file mode 100644 index 0000000..2919297 --- /dev/null +++ b/src/AlphaPBE0.h @@ -0,0 +1,74 @@ +//////////////////////////////////////////////////////////////////////////////// +// +// Copyright (c) 2014 The Regents of the University of California +// +// This file is part of Qbox +// +// Qbox is distributed under the terms of the GNU General Public License +// as published by the Free Software Foundation, either version 2 of +// the License, or (at your option) any later version. +// See the file COPYING in the root directory of this distribution +// or . +// +//////////////////////////////////////////////////////////////////////////////// +// +// AlphaPBE0.h +// +//////////////////////////////////////////////////////////////////////////////// + +#ifndef ALPHAPBE0_H +#define ALPHAPBE0_H + +#include +#include +#include +#include + +#include "Sample.h" + +class AlphaPBE0 : public Var +{ + Sample *s; + + public: + + const char *name ( void ) const { return "alpha_PBE0"; }; + + int set ( int argc, char **argv ) + { + if ( argc != 2 ) + { + if ( ui->onpe0() ) + cout << " alpha_PBE0 takes only one value" << endl; + return 1; + } + + double v = atof(argv[1]); + if ( v < 0.0 ) + { + if ( ui->onpe0() ) + cout << " alpha_PBE0 must be non-negative" << endl; + return 1; + } + + s->ctrl.alpha_PBE0 = v; + + return 0; + } + + string print (void) const + { + ostringstream st; + st.setf(ios::left,ios::adjustfield); + st << setw(10) << name() << " = "; + st.setf(ios::right,ios::adjustfield); + st << s->ctrl.alpha_PBE0; + return st.str(); + } + + AlphaPBE0(Sample *sample) : s(sample) + { + s->ctrl.alpha_PBE0 = 0.25; + } +}; +#endif diff --git a/src/AngleCmd.h b/src/AngleCmd.h index 296c83c..9ff1447 100644 --- a/src/AngleCmd.h +++ b/src/AngleCmd.h @@ -15,7 +15,6 @@ // AngleCmd.h: // //////////////////////////////////////////////////////////////////////////////// -// $Id: AngleCmd.h,v 1.5 2008-09-08 15:56:17 fgygi Exp $ #ifndef ANGLECMD_H #define ANGLECMD_H @@ -38,24 +37,48 @@ class AngleCmd : public Cmd { return "\n angle\n\n" - " syntax: angle name1 name2 name3\n\n" - " The angle command prints the angle defined by three atoms.\n\n"; + " syntax: angle [-pbc] name1 name2 name3\n\n" + " The angle command prints the angle defined by three atoms.\n" + " If the -pbc option is used, the angle is computed using the\n" + " nearest atoms taking into account periodic boundary conditions.\n\n"; } int action(int argc, char **argv) { - if ( argc != 4 ) + if ( ! ( argc == 4 || argc == 5 ) ) { if ( ui->onpe0() ) { - cout << " use: angle name1 name2 name3" << endl; + cout << " use: angle [-pbc] name1 name2 name3" << endl; } return 1; } - string name1(argv[1]); - string name2(argv[2]); - string name3(argv[3]); + string name1, name2, name3; + bool use_pbc = false; + + if ( argc == 4 ) + { + name1 = argv[1]; + name2 = argv[2]; + name3 = argv[3]; + } + if ( argc == 5 ) + { + if ( strcmp(argv[1],"-pbc") ) + { + if ( ui->onpe0() ) + { + cout << " use: angle [-pbc] name1 name2 name3" << endl; + } + return 1; + } + use_pbc = true; + name1 = argv[2]; + name2 = argv[3]; + name3 = argv[4]; + } + Atom* a1 = s->atoms.findAtom(name1); Atom* a2 = s->atoms.findAtom(name2); Atom* a3 = s->atoms.findAtom(name3); @@ -83,22 +106,25 @@ class AngleCmd : public Cmd return 1; } - D3vector r12(a1->position()-a2->position()); - D3vector r32(a3->position()-a2->position()); - if ( norm(r12) == 0.0 || norm(r32) == 0.0 ) + if ( ui->onpe0() ) { - if ( ui->onpe0() ) + D3vector r12(a1->position()-a2->position()); + D3vector r32(a3->position()-a2->position()); + if ( norm2(r12) == 0.0 || norm2(r32) == 0.0 ) { cout << " AngleCmd: atoms are too close" << endl; + return 1; } - return 1; - } - const double sp = normalized(r12) * normalized(r32); - const double c = max(-1.0,min(1.0,sp)); - const double a = (180.0/M_PI)*acos(c); - if ( ui->onpe0() ) - { + if ( use_pbc ) + { + const UnitCell& cell = s->wf.cell(); + cell.fold_in_ws(r12); + cell.fold_in_ws(r32); + } + const double sp = normalized(r12) * normalized(r32); + const double c = max(-1.0,min(1.0,sp)); + const double a = (180.0/M_PI)*acos(c); cout.setf(ios::fixed,ios::floatfield); cout << " angle " << name1 << "-" << name2 << "-" << name3 << ": " diff --git a/src/AngleConstraint.C b/src/AngleConstraint.C index 325649a..e84d526 100644 --- a/src/AngleConstraint.C +++ b/src/AngleConstraint.C @@ -12,10 +12,9 @@ // //////////////////////////////////////////////////////////////////////////////// // -// AngleConstraint.C +// AngleConstraint.C // //////////////////////////////////////////////////////////////////////////////// -// $Id: AngleConstraint.C,v 1.6 2008-09-08 15:56:17 fgygi Exp $ #include "AngleConstraint.h" #include "AtomSet.h" @@ -323,8 +322,8 @@ void AngleConstraint::grad_sigma(const D3vector &r1, const D3vector &r2, { D3vector r12(r1-r2); D3vector r32(r3-r2); - assert(norm(r12) > 0.0); - assert(norm(r32) > 0.0); + assert(norm2(r12) > 0.0); + assert(norm2(r32) > 0.0); D3vector e12(normalized(r12)); D3vector e32(normalized(r32)); const double ss = length(e12^e32); @@ -344,7 +343,7 @@ void AngleConstraint::grad_sigma(const D3vector &r1, const D3vector &r2, { // choose direction e as e12+e32 D3vector e(e12+e32); - assert(norm(e)>0.0); + assert(norm2(e)>0.0); e = normalized(e); const double r12_inv = 1.0/length(r12); const double r32_inv = 1.0/length(r32); @@ -408,9 +407,10 @@ ostream& AngleConstraint::print( ostream &os ) os << name2_ << " " << name3_ << "\"\n"; os.setf(ios::fixed,ios::floatfield); os.setf(ios::right,ios::adjustfield); - os << " value=\"" << setprecision(6) << angle_; - os << "\" velocity=\"" << setprecision(6) << velocity_ << "\"\n"; - os << " force=\"" << setprecision(6) << force_; - os << "\" weight=\"" << setprecision(6) << weight_ << "\"/>"; + os << " velocity=\"" << setprecision(6) << velocity_ << "\""; + os << " weight=\"" << setprecision(6) << weight_ << "\">\n"; + os << " " << setprecision(6) << angle_ << " "; + os << " " << setprecision(6) << force_ << " \n"; + os << " "; return os; } diff --git a/src/AngleConstraint.h b/src/AngleConstraint.h index e301ef4..ee34522 100644 --- a/src/AngleConstraint.h +++ b/src/AngleConstraint.h @@ -12,10 +12,9 @@ // //////////////////////////////////////////////////////////////////////////////// // -// AngleConstraint.h +// AngleConstraint.h // //////////////////////////////////////////////////////////////////////////////// -// $Id: AngleConstraint.h,v 1.8 2009-05-15 04:38:48 fgygi Exp $ #ifndef ANGLECONSTRAINT_H #define ANGLECONSTRAINT_H diff --git a/src/Atom.C b/src/Atom.C index 24c085f..2500ef8 100644 --- a/src/Atom.C +++ b/src/Atom.C @@ -15,7 +15,6 @@ // Atom.C: // //////////////////////////////////////////////////////////////////////////////// -// $Id: Atom.C,v 1.5 2008-09-08 15:56:17 fgygi Exp $ #include "Atom.h" #include diff --git a/src/Atom.h b/src/Atom.h index 01eec56..12cc7a0 100644 --- a/src/Atom.h +++ b/src/Atom.h @@ -15,7 +15,6 @@ // Atom.h: // //////////////////////////////////////////////////////////////////////////////// -// $Id: Atom.h,v 1.6 2008-09-08 15:56:17 fgygi Exp $ #ifndef ATOM_H #define ATOM_H diff --git a/src/AtomCmd.h b/src/AtomCmd.h index 196c484..5b3949f 100644 --- a/src/AtomCmd.h +++ b/src/AtomCmd.h @@ -15,7 +15,6 @@ // AtomCmd.h: // //////////////////////////////////////////////////////////////////////////////// -// $Id: AtomCmd.h,v 1.11 2009-08-14 17:06:43 fgygi Exp $ #ifndef ATOMCMD_H #define ATOMCMD_H diff --git a/src/AtomSet.C b/src/AtomSet.C index e4b0662..d34304f 100644 --- a/src/AtomSet.C +++ b/src/AtomSet.C @@ -15,7 +15,6 @@ // AtomSet.C // //////////////////////////////////////////////////////////////////////////////// -// $Id: AtomSet.C,v 1.29 2010-04-16 21:40:50 fgygi Exp $ #include "AtomSet.h" #include "Species.h" @@ -358,8 +357,9 @@ void AtomSet::sync_positions(vector >& tau) } //////////////////////////////////////////////////////////////////////////////// -void AtomSet::set_positions(const vector >& tau) +void AtomSet::set_positions(vector >& tau) { + sync_positions(tau); assert(tau.size() == atom_list.size()); for ( int is = 0; is < atom_list.size(); is++ ) { @@ -412,8 +412,9 @@ void AtomSet::sync_velocities(vector >& vel) } //////////////////////////////////////////////////////////////////////////////// -void AtomSet::set_velocities(const vector >& vel) +void AtomSet::set_velocities(vector >& vel) { + sync_velocities(vel); assert(vel.size() == atom_list.size()); for ( int is = 0; is < atom_list.size(); is++ ) { @@ -498,6 +499,7 @@ void AtomSet::randomize_velocities(double temp) } } set_velocities(v); + reset_vcm(); } //////////////////////////////////////////////////////////////////////////////// @@ -575,9 +577,28 @@ D3vector AtomSet::dipole(void) const } //////////////////////////////////////////////////////////////////////////////// +D3tensor AtomSet::quadrupole(void) const +{ + D3tensor sum; + for ( int is = 0; is < atom_list.size(); is++ ) + { + double charge = species_list[is]->zval(); + for ( int ia = 0; ia < atom_list[is].size(); ia++ ) + { + D3vector p = atom_list[is][ia]->position(); + for ( int idir = 0; idir < 3; idir++ ) + for ( int jdir = 0; jdir < 3; jdir++ ) + sum[idir*3+jdir] += charge * p[idir] * p[jdir]; + } + } + return sum; +} + +//////////////////////////////////////////////////////////////////////////////// void AtomSet::set_cell(const D3vector& a, const D3vector& b, const D3vector& c) { cell_.set(a,b,c); + sync_cell(); } //////////////////////////////////////////////////////////////////////////////// diff --git a/src/AtomSet.h b/src/AtomSet.h index 27ddf15..b70f32d 100644 --- a/src/AtomSet.h +++ b/src/AtomSet.h @@ -15,7 +15,6 @@ // AtomSet.h // //////////////////////////////////////////////////////////////////////////////// -// $Id: AtomSet.h,v 1.26 2010-05-10 20:52:54 fgygi Exp $ #ifndef ATOMSET_H #define ATOMSET_H @@ -23,6 +22,7 @@ #include "Context.h" #include "Atom.h" #include "UnitCell.h" +#include "D3tensor.h" #include #include #include @@ -72,10 +72,10 @@ class AtomSet int nsp(void) const { return species_list.size(); } void get_positions(std::vector >& tau) const; void sync_positions(std::vector >& tau); - void set_positions(const std::vector >& tau); + void set_positions(std::vector >& tau); void get_velocities(std::vector >& vel) const; void sync_velocities(std::vector >& vel); - void set_velocities(const std::vector >& vel); + void set_velocities(std::vector >& vel); const UnitCell& cell(void) const { return cell_; } void set_cell(const UnitCell& cell) { cell_ = cell; } void set_cell(const D3vector& a, const D3vector& b, const D3vector& c); @@ -87,6 +87,7 @@ class AtomSet void randomize_positions(double amplitude); D3vector vcm(void) const; D3vector dipole(void) const; + D3tensor quadrupole(void) const; void reset_vcm(void); void fold_in_ws(void); int size(void) const; diff --git a/src/AtomSetHandler.h b/src/AtomSetHandler.h index a98942a..cac9353 100644 --- a/src/AtomSetHandler.h +++ b/src/AtomSetHandler.h @@ -15,7 +15,6 @@ // AtomSetHandler.h // //////////////////////////////////////////////////////////////////////////////// -// $Id: AtomSetHandler.h,v 1.5 2008-09-08 15:56:18 fgygi Exp $ #ifndef ATOMSETHANDLER_H #define ATOMSETHANDLER_H diff --git a/src/AtomicExtForce.C b/src/AtomicExtForce.C index 3ea8c7c..ccd0f28 100644 --- a/src/AtomicExtForce.C +++ b/src/AtomicExtForce.C @@ -12,10 +12,9 @@ // //////////////////////////////////////////////////////////////////////////////// // -// AtomicExtForce.C +// AtomicExtForce.C // //////////////////////////////////////////////////////////////////////////////// -// $Id: AtomicExtForce.C,v 1.1 2010-02-20 23:13:02 fgygi Exp $ #include "AtomicExtForce.h" #include "AtomSet.h" diff --git a/src/AtomicExtForce.h b/src/AtomicExtForce.h index e0a811b..e57665f 100644 --- a/src/AtomicExtForce.h +++ b/src/AtomicExtForce.h @@ -12,10 +12,9 @@ // //////////////////////////////////////////////////////////////////////////////// // -// AtomicExtForce.h +// AtomicExtForce.h // //////////////////////////////////////////////////////////////////////////////// -// $Id: AtomicExtForce.h,v 1.1 2010-02-20 23:13:02 fgygi Exp $ #ifndef ATOMICEXTFORCE_H #define ATOMICEXTFORCE_H diff --git a/src/AtomsDyn.h b/src/AtomsDyn.h index 6bc0de5..4849c7b 100644 --- a/src/AtomsDyn.h +++ b/src/AtomsDyn.h @@ -15,7 +15,6 @@ // AtomsDyn.h // //////////////////////////////////////////////////////////////////////////////// -// $Id: AtomsDyn.h,v 1.7 2009-04-30 22:22:23 fgygi Exp $ #ifndef ATOMSDYN_H #define ATOMSDYN_H diff --git a/src/B3LYPFunctional.C b/src/B3LYPFunctional.C index 5b736ba..2c57940 100644 --- a/src/B3LYPFunctional.C +++ b/src/B3LYPFunctional.C @@ -19,6 +19,8 @@ #include #include #include "B3LYPFunctional.h" +#include "BLYPFunctional.h" +#include "VWNFunctional.h" using namespace std; //////////////////////////////////////////////////////////////////////////////// @@ -46,8 +48,37 @@ B3LYPFunctional::B3LYPFunctional(const vector > &rhoe) } else { - // not implemented - assert(false); + _exc_up.resize(_np); + _exc_dn.resize(_np); + _vxc1_up.resize(_np); + _vxc1_dn.resize(_np); + _vxc2_upup.resize(_np); + _vxc2_updn.resize(_np); + _vxc2_dnup.resize(_np); + _vxc2_dndn.resize(_np); + _grad_rho_up[0].resize(_np); + _grad_rho_up[1].resize(_np); + _grad_rho_up[2].resize(_np); + _grad_rho_dn[0].resize(_np); + _grad_rho_dn[1].resize(_np); + _grad_rho_dn[2].resize(_np); + + rho_up = &rhoe[0][0]; + rho_dn = &rhoe[1][0]; + grad_rho_up[0] = &_grad_rho_up[0][0]; + grad_rho_up[1] = &_grad_rho_up[1][0]; + grad_rho_up[2] = &_grad_rho_up[2][0]; + grad_rho_dn[0] = &_grad_rho_dn[0][0]; + grad_rho_dn[1] = &_grad_rho_dn[1][0]; + grad_rho_dn[2] = &_grad_rho_dn[2][0]; + exc_up = &_exc_up[0]; + exc_dn = &_exc_dn[0]; + vxc1_up = &_vxc1_up[0]; + vxc1_dn = &_vxc1_dn[0]; + vxc2_upup = &_vxc2_upup[0]; + vxc2_updn = &_vxc2_updn[0]; + vxc2_dnup = &_vxc2_dnup[0]; + vxc2_dndn = &_vxc2_dndn[0]; } } @@ -55,6 +86,7 @@ B3LYPFunctional::B3LYPFunctional(const vector > &rhoe) void B3LYPFunctional::setxc(void) { if ( _np == 0 ) return; + if ( _nspin == 1 ) { assert( rho != 0 ); @@ -72,7 +104,6 @@ void B3LYPFunctional::setxc(void) } else { -#if 0 // not implemented assert( rho_up != 0 ); assert( rho_dn != 0 ); assert( grad_rho_up[0] != 0 && grad_rho_up[1] != 0 && grad_rho_up[2] != 0 ); @@ -94,178 +125,47 @@ void B3LYPFunctional::setxc(void) double grx_dn = grad_rho_dn[0][i]; double gry_dn = grad_rho_dn[1][i]; double grz_dn = grad_rho_dn[2][i]; - double grx = grx_up + grx_dn; - double gry = gry_up + gry_dn; - double grz = grz_up + grz_dn; - double grad_up = sqrt(grx_up*grx_up + gry_up*gry_up + grz_up*grz_up); - double grad_dn = sqrt(grx_dn*grx_dn + gry_dn*gry_dn + grz_dn*grz_dn); - double grad = sqrt(grx*grx + gry*gry + grz*grz); - excpbe_sp(rho_up[i],rho_dn[i],grad_up,grad_dn,grad,&exc_up[i],&exc_dn[i], - &vxc1_up[i],&vxc1_dn[i],&vxc2_upup[i],&vxc2_dndn[i], - &vxc2_updn[i], &vxc2_dnup[i]); + double grad_up2 = grx_up*grx_up + gry_up*gry_up + grz_up*grz_up; + double grad_dn2 = grx_dn*grx_dn + gry_dn*gry_dn + grz_dn*grz_dn; + double grad_up_grad_dn = grx_up*grx_dn + gry_up*gry_dn + grz_up*grz_dn; + + excb3lyp_sp(rho_up[i],rho_dn[i],grad_up2,grad_dn2,grad_up_grad_dn, + &exc_up[i],&exc_dn[i],&vxc1_up[i],&vxc1_dn[i], + &vxc2_upup[i],&vxc2_dndn[i], + &vxc2_updn[i],&vxc2_dnup[i]); } -#endif } } + //////////////////////////////////////////////////////////////////////////////// void B3LYPFunctional::excb3lyp(double rho, double grad, double *exc, double *vxc1, double *vxc2) { - - *exc = 0.0; - *vxc1 = 0.0; - *vxc2 = 0.0; - - if ( rho < 1.e-18 ) - { - return; - } - - // LDA correlation - // Perdew-Zunger parametrization of Ceperley-Alder data - // const double third=1.0/3.0; - // c1 is (3.D0/(4.D0*pi))**third - const double c1 = 0.6203504908994001; - // alpha = (4/(9*pi))**third = 0.521061761198 - // const double alpha = 0.521061761198; - // c2 = -(3/(4*pi)) / alpha = -0.458165293283 - // const double c2 = -0.458165293283; - // c3 = (4/3) * c2 = -0.610887057711 - const double c3 = -0.610887057711; - - const double A = 0.0311; - const double B = -0.048; - const double b1 = 1.0529; - const double b2 = 0.3334; - const double G = -0.1423; - - // C from the PZ paper: const double C = 0.0020; - // D from the PZ paper: const double D = -0.0116; - // C and D by matching Ec and Vc at rs=1 - const double D = G / ( 1.0 + b1 + b2 ) - B; - const double C = -A - D - G * ( (b1/2.0 + b2) / ((1.0+b1+b2)*(1.0+b1+b2))); - - double ro13 = cbrt(rho); - double rs = c1 / ro13; - - double ec_lda=0.0,vc_lda=0.0; - - // Next line : exchange in Hartree units - double vx_lda = c3 / rs; - double ex_lda = 0.75 * vx_lda; - - // Next lines : Ceperley & Alder correlation (Zunger & Perdew) - if ( rs < 1.0 ) - { - double logrs = log(rs); - ec_lda = A * logrs + B + C * rs * logrs + D * rs; - vc_lda = A * logrs + ( B - A / 3.0 ) + - (2.0/3.0) * C * rs * logrs + - ( ( 2.0 * D - C ) / 3.0 ) * rs; - } - else - { - double sqrtrs = sqrt(rs); - double den = 1.0 + b1 * sqrtrs + b2 * rs; - ec_lda = G / den; - vc_lda = ec_lda * ( 1.0 + (7.0/6.0) * b1 * sqrtrs + - (4.0/3.0) * b2 * rs ) / den; - } - - // Becke88 exchange: A.D.Becke, Phys.Rev. B38, 3098 (1988) - // Becke88 exchange constants - const double beta=0.0042; - //const double ax = -0.7385587663820224058; /* -0.75*pow(3.0/pi,third) */ - const double axa = -0.9305257363490999; /* -1.5*pow(3.0/(4*pi),third) */ - - const double rha = 0.5 * rho; - const double grada = 0.5 * grad; - - const double rha13 = pow ( rha, 1.0/3.0 ); - const double rha43 = rha * rha13; - const double xa = grada / rha43; - const double xa2 = xa*xa; - const double asinhxa = asinh(xa); - const double frac = 1.0 / ( 1.0 + 6.0 * beta * xa * asinhxa ); - const double ga = axa - beta * xa2 * frac; - - const double ex_b88 = rha13 * ga; - - // Becke88 GGA exchange correction - const double dex_b88 = ex_b88 - ex_lda; - - // Becke88 potential - const double gpa = ( 6.0*beta*beta*xa2 * ( xa/sqrt(xa2+1.0) - asinhxa ) - - 2.0*beta*xa ) * frac*frac; - const double vx1_b88 = rha13 * (4.0/3.0) * ( ga - xa * gpa ); - const double vx2_b88 = - 0.5 * gpa / grada; - - // Becke88 GGA exchange correction potential - const double dvx1_b88 = vx1_b88 - vx_lda; - const double dvx2_b88 = vx2_b88; - - //------------------------------------------------------------ - // LYP correlation - // Phys. Rev. B 37, 785 (1988). - // LYP constants - const double a = 0.04918; - const double b = 0.132; - const double ab36 = a * b / 36.0; - const double c = 0.2533; - const double c_third = c / 3.0; - const double d = 0.349; - const double d_third = d / 3.0; - const double cf = 2.87123400018819; /* (3/10)*pow(3*pi*pi,2/3) */ - const double cfb = cf * b; - - // next lines specialized to the unpolarized case - const double rh13 = pow ( rho, 1.0/3.0 ); - const double rhm13 = 1.0 / rh13; - const double rhm43 = rhm13 / rho; - const double e = exp ( - c * rhm13 ); - const double num = 1.0 + cfb * e; - const double den = 1.0 + d * rhm13; - const double deninv = 1.0 / den; - const double cfrac = num * deninv; - - const double delta = rhm13 * ( c + d * deninv ); - const double rhm53 = rhm43 * rhm13; - const double t1 = e * deninv; - const double t2 = rhm53; - const double t3 = 6.0 + 14.0 * delta; - - const double g = ab36 * t1 * t2 * t3; - - /* next line, ec is the energy density, hence divide the energy by rho */ - const double ec_lyp = - a * cfrac + 0.25 * g * grad * grad / rho; - - /* energy done, now the potential */ - const double de = c_third * rhm43 * e; - const double dnum = cfb * de; - const double dden = - d_third * rhm43; - const double dfrac = ( dnum * den - dden * num ) * deninv * deninv; - - const double ddelta = - (1.0/3.0) * rhm43 * ( c + d * deninv ) - - rhm13 * d * dden * deninv * deninv; - const double dt1 = de * deninv - e * dden * deninv * deninv; - const double dt2 = - (5.0/3.0) * rhm53/rho; - const double dt3 = 14.0 * ddelta; - - const double dg = ab36 * ( dt1 * t2 * t3 + t1 * dt2 * t3 + t1 * t2 * dt3 ); - - const double vc1_lyp = - a * (cfrac + rho * dfrac) + 0.25 * dg * grad * grad; - const double vc2_lyp = -0.5 * g; - + // B3LYP unpolarized // Coefficients of the B3LYP functional // A. Becke, JCP 98, 5648 (1993) // See also X.Xu and W. Goddard, J.Phys.Chem. A108, 2305 (2004) - // EcLSDA is the Perdew-Zunger parametrization of Ceperley-Alder data + // EcLSDA is the Vosko-Wilk-Nusair correlation energy + // dExBecke88 is the difference ExB88 - ExLDA // 0.2 ExHF + 0.80 ExSlater + 0.19 EcLSDA + 0.81 EcLYP + 0.72 dExBecke88 const double xlda_coeff = 0.80; // Slater exchange const double clda_coeff = 0.19; // LSDA correlation const double xb88_coeff = 0.72; // Becke88 exchange gradient correction const double clyp_coeff = 1.0 - clda_coeff; + double ex_lda,vx_lda,ec_lda,vc_lda; + double ex_b88,vx1_b88,vx2_b88; + double ec_lyp,vc1_lyp,vc2_lyp; + + VWNFunctional::exvwn(rho,ex_lda,vx_lda); + VWNFunctional::ecvwn(rho,ec_lda,vc_lda); + BLYPFunctional::exb88(rho,grad,&ex_b88,&vx1_b88,&vx2_b88); + BLYPFunctional::eclyp(rho,grad,&ec_lyp,&vc1_lyp,&vc2_lyp); + + const double dex_b88 = ex_b88 - ex_lda; + const double dvx1_b88 = vx1_b88 - vx_lda; + const double dvx2_b88 = vx2_b88; + *exc = xlda_coeff * ex_lda + clda_coeff * ec_lda + xb88_coeff * dex_b88 + @@ -279,3 +179,72 @@ void B3LYPFunctional::excb3lyp(double rho, double grad, *vxc2 = xb88_coeff * dvx2_b88 + clyp_coeff * vc2_lyp; } + +//////////////////////////////////////////////////////////////////////////////// +void B3LYPFunctional::excb3lyp_sp(double rho_up, double rho_dn, + double grad_up2, double grad_dn2, double grad_up_grad_dn, + double *exc_up, double *exc_dn, double *vxc1_up, double *vxc1_dn, + double *vxc2_upup, double *vxc2_dndn, double *vxc2_updn, double *vxc2_dnup) +{ + // B3LYP spin-polarized + // Coefficients of the B3LYP functional + // A. Becke, JCP 98, 5648 (1993) + // See also X.Xu and W. Goddard, J.Phys.Chem. A108, 2305 (2004) + // EcLSDA is the Vosko-Wilk-Nusair correlation energy + // dExBecke88 is the difference ExB88 - ExLDA + // 0.2 ExHF + 0.80 ExSlater + 0.19 EcLSDA + 0.81 EcLYP + 0.72 dExBecke88 + const double xlda_coeff = 0.80; // Slater exchange + const double clda_coeff = 0.19; // LSDA correlation + const double xb88_coeff = 0.72; // Becke88 exchange gradient correction + const double clyp_coeff = 1.0 - clda_coeff; + + double ex_lda,vx_lda_up,vx_lda_dn; + double ec_lda,vc_lda_up,vc_lda_dn; + double ex_b88_up,ex_b88_dn,vx1_b88_up,vx1_b88_dn, + vx2_b88_upup,vx2_b88_dndn,vx2_b88_updn,vx2_b88_dnup; + double ec_lyp_up,ec_lyp_dn,vc1_lyp_up,vc1_lyp_dn, + vc2_lyp_upup,vc2_lyp_dndn,vc2_lyp_updn,vc2_lyp_dnup; + + VWNFunctional::exvwn_sp(rho_up,rho_dn,ex_lda,vx_lda_up,vx_lda_dn); + VWNFunctional::ecvwn_sp(rho_up,rho_dn,ec_lda,vc_lda_up,vc_lda_dn); + BLYPFunctional::exb88_sp(rho_up,rho_dn,grad_up2,grad_dn2,grad_up_grad_dn, + &ex_b88_up,&ex_b88_dn,&vx1_b88_up,&vx1_b88_dn, + &vx2_b88_upup,&vx2_b88_dndn,&vx2_b88_updn,&vx2_b88_dnup); + BLYPFunctional::eclyp_sp(rho_up,rho_dn,grad_up2,grad_dn2,grad_up_grad_dn, + &ec_lyp_up,&ec_lyp_dn,&vc1_lyp_up,&vc1_lyp_dn, + &vc2_lyp_upup,&vc2_lyp_dndn,&vc2_lyp_updn,&vc2_lyp_dnup); + + const double dex_b88_up = ex_b88_up - ex_lda; + const double dex_b88_dn = ex_b88_dn - ex_lda; + const double dvx1_b88_up = vx1_b88_up - vx_lda_up; + const double dvx1_b88_dn = vx1_b88_dn - vx_lda_dn; + const double dvx2_b88_upup = vx2_b88_upup; + const double dvx2_b88_dndn = vx2_b88_dndn; + const double dvx2_b88_updn = vx2_b88_updn; + const double dvx2_b88_dnup = vx2_b88_dnup; + + *exc_up = xlda_coeff * ex_lda + + clda_coeff * ec_lda + + xb88_coeff * dex_b88_up + + clyp_coeff * ec_lyp_up; + + *exc_dn = xlda_coeff * ex_lda + + clda_coeff * ec_lda + + xb88_coeff * dex_b88_dn + + clyp_coeff * ec_lyp_dn; + + *vxc1_up = xlda_coeff * vx_lda_up + + clda_coeff * vc_lda_up + + xb88_coeff * dvx1_b88_up + + clyp_coeff * vc1_lyp_up; + + *vxc1_dn = xlda_coeff * vx_lda_dn + + clda_coeff * vc_lda_dn + + xb88_coeff * dvx1_b88_dn + + clyp_coeff * vc1_lyp_dn; + + *vxc2_upup = xb88_coeff * dvx2_b88_upup + clyp_coeff * vc2_lyp_upup; + *vxc2_dndn = xb88_coeff * dvx2_b88_dndn + clyp_coeff * vc2_lyp_dndn; + *vxc2_updn = xb88_coeff * dvx2_b88_updn + clyp_coeff * vc2_lyp_updn; + *vxc2_dnup = xb88_coeff * dvx2_b88_dnup + clyp_coeff * vc2_lyp_dnup; +} diff --git a/src/B3LYPFunctional.h b/src/B3LYPFunctional.h index 5efe696..b9ba84e 100644 --- a/src/B3LYPFunctional.h +++ b/src/B3LYPFunctional.h @@ -35,10 +35,9 @@ class B3LYPFunctional : public XCFunctional double *exc, double *vxc1, double *vxc2); void excb3lyp_sp(double rho_up, double rho_dn, - double grad_up, double grad_dn, double grad, - double *exc_up, double *exc_dn, - double *vxc1_up, double *vxc1_dn, double *vxc2_upup, double *vxc2_dndn, - double *vxc2_updn, double *vxc2_dnup); + double grad_up2, double grad_dn2, double grad_up_grad_dn, + double *exc_up, double *exc_dn, double *vxc1_up, double *vxc1_dn, + double *vxc2_upup, double *vxc2_dndn, double *vxc2_updn, double *vxc2_dnup); public: diff --git a/src/BLYPFunctional.C b/src/BLYPFunctional.C index 9a60e17..c04165f 100644 --- a/src/BLYPFunctional.C +++ b/src/BLYPFunctional.C @@ -15,7 +15,6 @@ // BLYPFunctional.C // //////////////////////////////////////////////////////////////////////////////// -// $Id: BLYPFunctional.C,v 1.6 2008-09-08 15:56:18 fgygi Exp $ #include #include @@ -47,8 +46,37 @@ BLYPFunctional::BLYPFunctional(const vector > &rhoe) } else { - // not implemented - assert(false); + _exc_up.resize(_np); + _exc_dn.resize(_np); + _vxc1_up.resize(_np); + _vxc1_dn.resize(_np); + _vxc2_upup.resize(_np); + _vxc2_updn.resize(_np); + _vxc2_dnup.resize(_np); + _vxc2_dndn.resize(_np); + _grad_rho_up[0].resize(_np); + _grad_rho_up[1].resize(_np); + _grad_rho_up[2].resize(_np); + _grad_rho_dn[0].resize(_np); + _grad_rho_dn[1].resize(_np); + _grad_rho_dn[2].resize(_np); + + rho_up = &rhoe[0][0]; + rho_dn = &rhoe[1][0]; + grad_rho_up[0] = &_grad_rho_up[0][0]; + grad_rho_up[1] = &_grad_rho_up[1][0]; + grad_rho_up[2] = &_grad_rho_up[2][0]; + grad_rho_dn[0] = &_grad_rho_dn[0][0]; + grad_rho_dn[1] = &_grad_rho_dn[1][0]; + grad_rho_dn[2] = &_grad_rho_dn[2][0]; + exc_up = &_exc_up[0]; + exc_dn = &_exc_dn[0]; + vxc1_up = &_vxc1_up[0]; + vxc1_dn = &_vxc1_dn[0]; + vxc2_upup = &_vxc2_upup[0]; + vxc2_updn = &_vxc2_updn[0]; + vxc2_dnup = &_vxc2_dnup[0]; + vxc2_dndn = &_vxc2_dndn[0]; } } @@ -63,17 +91,24 @@ void BLYPFunctional::setxc(void) assert( exc != 0 ); assert( vxc1 != 0 ); assert( vxc2 != 0 ); + + double ex,vx1,vx2,ec,vc1,vc2; for ( int i = 0; i < _np; i++ ) { double grad = sqrt(grad_rho[0][i]*grad_rho[0][i] + grad_rho[1][i]*grad_rho[1][i] + grad_rho[2][i]*grad_rho[2][i] ); - excblyp(rho[i],grad,&exc[i],&vxc1[i],&vxc2[i]); + + exb88(rho[i],grad,&ex,&vx1,&vx2); + eclyp(rho[i],grad,&ec,&vc1,&vc2); + + exc[i] = ex + ec; + vxc1[i] = vx1 + vc1; + vxc2[i] = vc1 + vc2; } } else { -#if 0 // not implemented assert( rho_up != 0 ); assert( rho_dn != 0 ); assert( grad_rho_up[0] != 0 && grad_rho_up[1] != 0 && grad_rho_up[2] != 0 ); @@ -87,6 +122,8 @@ void BLYPFunctional::setxc(void) assert( vxc2_dnup != 0 ); assert( vxc2_dndn != 0 ); + double ex_up,ex_dn,vx1_up,vx1_dn,vx2_upup,vx2_dndn,vx2_updn,vx2_dnup; + double ec_up,ec_dn,vc1_up,vc1_dn,vc2_upup,vc2_dndn,vc2_updn,vc2_dnup; for ( int i = 0; i < _np; i++ ) { double grx_up = grad_rho_up[0][i]; @@ -95,80 +132,92 @@ void BLYPFunctional::setxc(void) double grx_dn = grad_rho_dn[0][i]; double gry_dn = grad_rho_dn[1][i]; double grz_dn = grad_rho_dn[2][i]; - double grx = grx_up + grx_dn; - double gry = gry_up + gry_dn; - double grz = grz_up + grz_dn; - double grad_up = sqrt(grx_up*grx_up + gry_up*gry_up + grz_up*grz_up); - double grad_dn = sqrt(grx_dn*grx_dn + gry_dn*gry_dn + grz_dn*grz_dn); - double grad = sqrt(grx*grx + gry*gry + grz*grz); - excpbe_sp(rho_up[i],rho_dn[i],grad_up,grad_dn,grad,&exc_up[i],&exc_dn[i], - &vxc1_up[i],&vxc1_dn[i],&vxc2_upup[i],&vxc2_dndn[i], - &vxc2_updn[i], &vxc2_dnup[i]); + double grad_up2 = grx_up*grx_up + gry_up*gry_up + grz_up*grz_up; + double grad_dn2 = grx_dn*grx_dn + gry_dn*gry_dn + grz_dn*grz_dn; + double grad_up_grad_dn = grx_up*grx_dn + gry_up*gry_dn + grz_up*grz_dn; + + exb88_sp(rho_up[i],rho_dn[i],grad_up2,grad_dn2,grad_up_grad_dn, + &ex_up,&ex_dn,&vx1_up,&vx1_dn, + &vx2_upup,&vx2_dndn,&vx2_updn,&vx2_dnup); + eclyp_sp(rho_up[i],rho_dn[i],grad_up2,grad_dn2,grad_up_grad_dn, + &ec_up,&ec_dn,&vc1_up,&vc1_dn, + &vc2_upup,&vc2_dndn,&vc2_updn,&vc2_dnup); + + exc_up[i] = ex_up + ec_up; + exc_dn[i] = ex_dn + ec_dn; + vxc1_up[i] = vx1_up + vc1_up; + vxc1_dn[i] = vx1_dn + vc1_dn; + vxc2_upup[i] = vx2_upup + vc2_upup; + vxc2_dndn[i] = vx2_dndn + vc2_dndn; + vxc2_updn[i] = vx2_updn + vc2_updn; + vxc2_dnup[i] = vx2_dnup + vc2_dnup; } -#endif } } + //////////////////////////////////////////////////////////////////////////////// -void BLYPFunctional::excblyp(double rho, double grad, - double *exc, double *vxc1, double *vxc2) +void BLYPFunctional::exb88(double rho, double grad, + double *ex, double *vx1, double *vx2) { - /* Becke exchange constants */ - const double third = 1.0 / 3.0; + // Becke exchange constants const double fourthirds = 4.0 / 3.0; - const double fivethirds = 5.0 / 3.0; const double beta=0.0042; - const double ax = -0.7385587663820224058; /* -0.75*pow(3.0/pi,third) */ - const double axa = -0.9305257363490999; /* -1.5*pow(3.0/(4*pi),third) */ + const double axa = -0.9305257363490999; // -1.5*pow(3.0/(4*pi),third) - /* LYP constants */ - const double a = 0.04918; - const double b = 0.132; - const double ab36 = a * b / 36.0; - const double c = 0.2533; - const double c_third = c / 3.0; - const double d = 0.349; - const double d_third = d / 3.0; - const double cf = 2.87123400018819; /* (3/10)*pow(3*pi*pi,2/3) */ - const double cfb = cf * b; - - *exc = 0.0; - *vxc1 = 0.0; - *vxc2 = 0.0; + *ex = 0.0; + *vx1 = 0.0; + *vx2 = 0.0; - if ( rho < 1.e-18 ) - { - return; - } + if ( rho < 1.e-10 ) return; - /* - * Becke's exchange - * A.D.Becke, Phys.Rev. B38, 3098 (1988) - */ + // Becke's exchange + // A.D.Becke, Phys.Rev. B38, 3098 (1988) const double rha = 0.5 * rho; const double grada = 0.5 * grad; - const double rha13 = pow ( rha, third ); + const double rha13 = cbrt(rha); const double rha43 = rha * rha13; const double xa = grada / rha43; const double xa2 = xa*xa; const double asinhxa = asinh(xa); const double frac = 1.0 / ( 1.0 + 6.0 * beta * xa * asinhxa ); const double ga = axa - beta * xa2 * frac; - /* N.B. in next line, ex is the energy density, hence rh13 */ - const double ex = rha13 * ga; + // in next line, ex is the energy density, hence rh13 + *ex = rha13 * ga; - /* energy done, now the potential */ + // potential const double gpa = ( 6.0*beta*beta*xa2 * ( xa/sqrt(xa2+1.0) - asinhxa ) - 2.0*beta*xa ) * frac*frac; - const double vx1 = rha13 * fourthirds * ( ga - xa * gpa ); - const double vx2 = - 0.5 * gpa / grada; - - /*------------------------------------------------------------*/ - /* LYP correlation */ - /* Phys. Rev. B 37, 785 (1988). */ - /* next lines specialized to the unpolarized case */ - const double rh13 = pow ( rho, third ); + *vx1 = rha13 * fourthirds * ( ga - xa * gpa ); + *vx2 = - 0.5 * gpa / grada; +} + +//////////////////////////////////////////////////////////////////////////////// +void BLYPFunctional::eclyp(double rho, double grad, + double *ec, double *vc1, double *vc2) +{ + // LYP constants + const double a = 0.04918; + const double b = 0.132; + const double ab36 = a * b / 36.0; + const double c = 0.2533; + const double c_third = c / 3.0; + const double d = 0.349; + const double d_third = d / 3.0; + const double cf = 2.87123400018819; // (3/10)*pow(3*pi*pi,2/3) + const double cfb = cf * b; + + *ec = 0.0; + *vc1 = 0.0; + *vc2 = 0.0; + + if ( rho < 1.e-10 ) return; + + // LYP correlation + // Phys. Rev. B 37, 785 (1988). + // next lines specialized to the unpolarized case + const double rh13 = cbrt(rho); const double rhm13 = 1.0 / rh13; const double rhm43 = rhm13 / rho; const double e = exp ( - c * rhm13 ); @@ -178,6 +227,8 @@ void BLYPFunctional::excblyp(double rho, double grad, const double cfrac = num * deninv; const double delta = rhm13 * ( c + d * deninv ); + const double ddelta = - (1.0/3.0) * ( c * rhm43 + + d * rhm13 * rhm13 / ((d+rh13)*(d+rh13)) ); const double rhm53 = rhm43 * rhm13; const double t1 = e * deninv; const double t2 = rhm53; @@ -185,27 +236,242 @@ void BLYPFunctional::excblyp(double rho, double grad, const double g = ab36 * t1 * t2 * t3; - /* next line, ec is the energy density, hence divide the energy by rho */ - const double ec = - a * cfrac + 0.25 * g * grad * grad / rho; + // ec is the energy density, hence divide the energy by rho + *ec = - a * cfrac + 0.25 * g * grad * grad / rho; - /* energy done, now the potential */ + // potential const double de = c_third * rhm43 * e; const double dnum = cfb * de; const double dden = - d_third * rhm43; const double dfrac = ( dnum * den - dden * num ) * deninv * deninv; - const double ddelta = - third * rhm43 * ( c + d * deninv ) - - rhm13 * d * dden * deninv * deninv; const double dt1 = de * deninv - e * dden * deninv * deninv; - const double dt2 = - fivethirds * rhm53/rho; + const double dt2 = - (5.0/3.0) * rhm53/rho; const double dt3 = 14.0 * ddelta; const double dg = ab36 * ( dt1 * t2 * t3 + t1 * dt2 * t3 + t1 * t2 * dt3 ); - const double vc1 = - a * ( cfrac + rho * dfrac ) + 0.25 * dg * grad * grad; - const double vc2 = -0.5 * g; + *vc1 = - a * ( cfrac + rho * dfrac ) + 0.25 * dg * grad * grad; + *vc2 = -0.5 * g; +} + +//////////////////////////////////////////////////////////////////////////////// +void BLYPFunctional::exb88_sp(double rho_up, double rho_dn, + double grad_up2, double grad_dn2, double grad_up_grad_dn, + double *ex_up, double *ex_dn, double *vx1_up, double *vx1_dn, + double *vx2_upup, double *vx2_dndn, double *vx2_updn, double *vx2_dnup) +{ + *ex_up = 0.0; + *ex_dn = 0.0; + *vx1_up = 0.0; + *vx1_dn = 0.0; + *vx2_upup = 0.0; + *vx2_updn = 0.0; + *vx2_dnup = 0.0; + *vx2_dndn = 0.0; + + if ( rho_up < 1.e-10 && rho_dn < 1.e-10 ) return; + + // Becke exchange constants + const double fourthirds = 4.0 / 3.0; + const double beta = 0.0042; + const double cx = -0.9305257363490999; // -1.5*pow(3.0/(4*pi),third) + + // Becke's exchange + // A.D.Becke, Phys.Rev. B38, 3098 (1988) + + double ex1a = 0.0; + double ex1b = 0.0; + double vx1a = 0.0; + double vx1b = 0.0; + double vx2a = 0.0; + double vx2b = 0.0; + + if ( rho_up > 1.e-10 ) + { + const double& rha = rho_up; + const double rha13 = cbrt(rha); + const double rha43 = rha * rha13; + const double grada = sqrt(grad_up2); + const double xa = grada / rha43; + const double xa2 = xa*xa; + const double asinhxa = asinh(xa); + const double fraca = 1.0 / ( 1.0 + 6.0 * beta * xa * asinhxa ); + const double ga = cx - beta * xa2 * fraca; + // next line, ex is the energy density, hence rh13 + ex1a = rha13 * ga; + const double gpa = ( 6.0*beta*beta*xa2 * ( xa/sqrt(xa2+1.0) - asinhxa ) + - 2.0*beta*xa ) * fraca * fraca; + vx1a = rha13 * fourthirds * ( ga - xa * gpa ); + vx2a = - gpa / grada; + } + + if ( rho_dn > 1.e-10 ) + { + const double& rhb = rho_dn; + const double rhb13 = cbrt(rhb); + const double rhb43 = rhb * rhb13; + const double gradb = sqrt(grad_dn2); + const double xb = gradb / rhb43; + const double xb2 = xb*xb; + const double asinhxb = asinh(xb); + const double fracb = 1.0 / ( 1.0 + 6.0 * beta * xb * asinhxb ); + const double gb = cx - beta * xb2 * fracb; + // next line, ex is the energy density, hence rh13 + ex1b = rhb13 * gb; + const double gpb = ( 6.0*beta*beta*xb2 * ( xb/sqrt(xb2+1.0) - asinhxb ) + - 2.0*beta*xb ) * fracb * fracb; + vx1b = rhb13 * fourthirds * ( gb - xb * gpb ); + vx2b = - gpb / gradb; + } + + *ex_up = ex1a; + *ex_dn = ex1b; + *vx1_up = vx1a; + *vx1_dn = vx1b; + *vx2_upup = vx2a; + *vx2_updn = 0.0; + *vx2_dnup = 0.0; + *vx2_dndn = vx2b; +} + +//////////////////////////////////////////////////////////////////////////////// +void BLYPFunctional::eclyp_sp(double rho_up, double rho_dn, + double grad_up2, double grad_dn2, double grad_up_grad_dn, + double *ec_up, double *ec_dn, double *vc1_up, double *vc1_dn, + double *vc2_upup, double *vc2_dndn, double *vc2_updn, double *vc2_dnup) +{ + *ec_up = 0.0; + *ec_dn = 0.0; + *vc1_up = 0.0; + *vc1_dn = 0.0; + *vc2_upup = 0.0; + *vc2_updn = 0.0; + *vc2_dnup = 0.0; + *vc2_dndn = 0.0; + + if ( rho_up < 1.e-10 && rho_dn < 1.e-10 ) return; + if ( rho_up + rho_dn < 1.e-10 ) return; + if ( rho_up < 0.0 ) rho_up = 0.0; + if ( rho_dn < 0.0 ) rho_dn = 0.0; + + // LYP constants + const double a = 0.04918; + const double b = 0.132; + const double c = 0.2533; + const double d = 0.349; + const double cf = 2.87123400018819; // (3/10)*pow(3*pi*pi,2/3) + const double ninth = 1.0 / 9.0; + const double two113 = pow(2.0,11.0/3.0); + const double fourthirds = 4.0 / 3.0; + + const double& rha = rho_up; + const double& rhb = rho_dn; + + const double rha13 = cbrt(rha); + const double rhb13 = cbrt(rhb); + const double rha43 = rha * rha13; + const double rhb43 = rhb * rhb13; + const double rha83 = rha43 * rha43; + const double rhb83 = rhb43 * rhb43; + const double rha113 = rha83 * rha; + const double rhb113 = rhb83 * rhb; + + // LYP correlation + // Phys. Rev. B 37, 785 (1988). + + const double rho = rha + rhb; + const double rhoinv = 1.0/rho; + const double rhab = rha * rhb; + const double rhabrhm = rhab * rhoinv; + const double rh13 = cbrt(rho); + const double rhm13 = 1.0 / rh13; + const double rhm43 = rhm13 / rho; + const double rhm113 = rhm43 * rhm43 * rhm43 * rh13; + const double rhm143 = rhm113 * rhoinv; + const double e = exp ( - c * rhm13 ); + const double den = 1.0 + d * rhm13; + const double deninv = 1.0 / den; + + const double w = e * deninv * rhm113; + const double dw = (1.0/3.0)*(c*d+(c-10.0*d)*rh13-11.0*rh13*rh13)*e*rhm143/ + ((d+rh13)*(d+rh13)); + const double abw = a * b * w; + const double f1 = -4.0 * a * deninv * rhabrhm; + const double f2 = - two113 * cf * abw * rha * rhb * ( rha83 + rhb83 ); + + const double delta = rhm13 * ( c + d * deninv ); + const double ddelta = - (1.0/3.0) * ( c * rhm43 + + d * rhm13 * rhm13 / ((d+rh13)*(d+rh13)) ); + const double taa = 1.0 - 3.0 * delta + ( 11.0 - delta ) * rha * rhoinv; + const double tbb = 1.0 - 3.0 * delta + ( 11.0 - delta ) * rhb * rhoinv; + + const double dtaa_drha = -3.0*ddelta - ddelta*rha*rhoinv + + (11.0-delta)*rhoinv*(1.0-rha*rhoinv); + const double dtaa_drhb = -3.0*ddelta - ddelta*rha*rhoinv - + (11.0-delta)*rhoinv*rha*rhoinv; + const double dtbb_drha = -3.0*ddelta - ddelta*rhb*rhoinv - + (11.0-delta)*rhoinv*rhb*rhoinv; + const double dtbb_drhb = -3.0*ddelta - ddelta*rhb*rhoinv + + (11.0-delta)*rhoinv*(1.0-rhb*rhoinv); + const double gaa = - abw * ( rhab * ninth * taa - rhb * rhb ); + const double gbb = - abw * ( rhab * ninth * tbb - rha * rha ); + const double gab = - abw * ( rhab * ninth * ( 47.0 - 7.0 * delta ) + - fourthirds * rho * rho ); + + // next line, ec is the energy density, hence divide the energy by rho + const double ec1a = ( f1 + f2 + gaa * grad_up2 + + gab * grad_up_grad_dn + + gbb * grad_dn2 ) / rho; + const double ec1b = ( f1 + f2 + gaa * grad_up2 + + gab * grad_up_grad_dn + + gbb * grad_dn2 ) / rho; + + const double A = -two113*cf*a*b; + const double df1_drha = -fourthirds*a*d*deninv*deninv*rhm43*rhabrhm + - 4.0*a*deninv*rhoinv*(rhb-rhabrhm); + const double df2_drha = A*dw*(rha113*rhb+rha*rhb113) + + A*w*((11.0/3.0)*rha83*rhb+rhb113); + + const double df1_drhb = -fourthirds*a*d*deninv*deninv*rhm43*rhabrhm + - 4.0*a*deninv*rhoinv*(rha-rhabrhm); + const double df2_drhb = A*dw*(rha113*rhb+rha*rhb113) + + A*w*(rha113+rha*(11.0/3.0)*rhb83); + + const double dgaa_drha = -a*b*dw*(rhab*ninth*taa-rhb*rhb) + - abw*(rhb*ninth*taa+rhab*ninth*dtaa_drha); + const double dgaa_drhb = -a*b*dw*(rhab*ninth*taa-rhb*rhb) + - abw*(rha*ninth*taa+rhab*ninth*dtaa_drhb-2.0*rhb); + + const double dgbb_drha = -a*b*dw*(rhab*ninth*tbb-rha*rha) + - abw*(rhb*ninth*tbb+rhab*ninth*dtbb_drha-2.0*rha); + const double dgbb_drhb = -a*b*dw*(rhab*ninth*tbb-rha*rha) + - abw*(rha*ninth*tbb+rhab*ninth*dtbb_drhb); + + const double dgab_drha = -a*b*dw*(rhab*ninth*(47.0-7.0*delta) + -fourthirds*rho*rho) + -abw*(rhb*ninth*(47.0-7.0*delta) + -rhab*ninth*7.0*ddelta-2.0*fourthirds*rho); + const double dgab_drhb = -a*b*dw*(rhab*ninth*(47.0-7.0*delta) + -fourthirds*rho*rho) + -abw*(rha*ninth*(47.0-7.0*delta) + -rhab*ninth*7.0*ddelta-2.0*fourthirds*rho); + + const double vc1a = df1_drha + df2_drha + + dgaa_drha * grad_up2 + + dgab_drha * grad_up_grad_dn + + dgbb_drha * grad_dn2; + const double vc1b = df1_drhb + df2_drhb + + dgaa_drhb * grad_up2 + + dgab_drhb * grad_up_grad_dn + + dgbb_drhb * grad_dn2; - *exc = ex + ec; - *vxc1 = vx1 + vc1; - *vxc2 = vx2 + vc2; + *ec_up = ec1a; + *ec_dn = ec1b; + *vc1_up = vc1a; + *vc1_dn = vc1b; + *vc2_upup = - 2.0 * gaa; + *vc2_updn = - gab; + *vc2_dnup = - gab; + *vc2_dndn = - 2.0 * gbb; } diff --git a/src/BLYPFunctional.h b/src/BLYPFunctional.h index 99f5bfa..83a9554 100644 --- a/src/BLYPFunctional.h +++ b/src/BLYPFunctional.h @@ -15,8 +15,6 @@ // BLYPFunctional.h // //////////////////////////////////////////////////////////////////////////////// -// $Id: BLYPFunctional.h,v 1.7 2009-06-29 09:57:57 fgygi Exp $ - #ifndef BLYPFUNCTIONAL_H #define BLYPFUNCTIONAL_H @@ -32,19 +30,24 @@ class BLYPFunctional : public XCFunctional _vxc2, _vxc2_upup, _vxc2_updn, _vxc2_dnup, _vxc2_dndn; std::vector _grad_rho[3], _grad_rho_up[3], _grad_rho_dn[3]; - void excblyp(double rho, double grad, - double *exc, double *vxc1, double *vxc2); - - void excblyp_sp(double rho_up, double rho_dn, - double grad_up, double grad_dn, double grad, - double *exc_up, double *exc_dn, - double *vxc1_up, double *vxc1_dn, double *vxc2_upup, double *vxc2_dndn, - double *vxc2_updn, double *vxc2_dnup); - public: BLYPFunctional(const std::vector > &rhoe); + static void exb88(double rho, double grad, + double *ex, double *vx1, double *vx2); + static void eclyp(double rho, double grad, + double *ec, double *vc1, double *vc2); + + static void exb88_sp(double rho_up, double rho_dn, + double grad_up2, double grad_dn2, double grad_up_grad_dn, + double *ex_up, double *ex_dn, double *vx1_up, double *vx1_dn, + double *vx2_upup, double *vx2_dndn, double *vx2_updn, double *vx2_dnup); + static void eclyp_sp(double rho_up, double rho_dn, + double grad_up2, double grad_dn2, double grad_up_grad_dn, + double *ec_up, double *ec_dn, double *vc1_up, double *vc1_dn, + double *vc2_upup, double *vc2_dndn, double *vc2_updn, double *vc2_dnup); + bool isGGA() const { return true; }; std::string name() const { return "BLYP"; }; void setxc(void); diff --git a/src/BMDIonicStepper.C b/src/BMDIonicStepper.C index e302c55..8ff3dba 100644 --- a/src/BMDIonicStepper.C +++ b/src/BMDIonicStepper.C @@ -36,7 +36,6 @@ void BMDIonicStepper::compute_r(double e0, const vector >& f0) fm_ = f0; em_ = e0_; r0_ = rp_; - atoms_.sync_positions(r0_); atoms_.set_positions(r0_); } diff --git a/src/BOSampleStepper.C b/src/BOSampleStepper.C index ff24986..099a1c0 100644 --- a/src/BOSampleStepper.C +++ b/src/BOSampleStepper.C @@ -25,6 +25,7 @@ #include "PSDWavefunctionStepper.h" #include "PSDAWavefunctionStepper.h" #include "JDWavefunctionStepper.h" +#include "UserInterface.h" #include "Preconditioner.h" #include "SDIonicStepper.h" #include "SDAIonicStepper.h" @@ -35,10 +36,7 @@ #include "CGCellStepper.h" #include "AndersonMixer.h" #include "MLWFTransform.h" - -#ifdef USE_APC -#include "apc.h" -#endif +#include "D3tensor.h" #include #include @@ -62,10 +60,10 @@ BOSampleStepper::~BOSampleStepper() s_.ctxt_.dmax(1,1,&tmax,1); if ( s_.ctxt_.myproc()==0 ) { - cout << "" + string s = "name=\"" + (*i).first + "\""; + cout << "" << endl; } } @@ -181,8 +179,6 @@ void BOSampleStepper::step(int niter) Wavefunction& wf = s_.wf; const int nspin = wf.nspin(); - const UnitCell& cell = wf.cell(); - const double dt = s_.ctrl.dt; const string wf_dyn = s_.ctrl.wf_dyn; @@ -207,7 +203,6 @@ void BOSampleStepper::step(int niter) cell_dyn != "LOCKED" ); // GS-only calculation: const bool gs_only = !atoms_move && !cell_moves; - const bool use_confinement = ( s_.ctrl.ecuts > 0.0 ); Timer tm_iter; @@ -351,15 +346,15 @@ void BOSampleStepper::step(int niter) wkerker[i] = 1.0; } + if ( onpe0 ) + cout << " " << atoms.nel()-wf.nel() << " \n"; + // Next line: special case of niter=0: compute GS only for ( int iter = 0; iter < max(niter,1); iter++ ) { // ionic iteration tm_iter.start(); -#ifdef USE_APC - ApcStart(1); -#endif if ( onpe0 ) cout << "\n"; @@ -372,39 +367,22 @@ void BOSampleStepper::step(int niter) cd_.update_density(); tmap["charge"].stop(); + tmap["update_vhxc"].start(); ef_.update_vhxc(compute_stress); + tmap["update_vhxc"].stop(); const bool compute_forces = true; + tmap["energy"].start(); double energy = ef_.energy(false,dwf,compute_forces,fion,compute_stress,sigma_eks); - double enthalpy = energy; + tmap["energy"].stop(); + double enthalpy = ef_.enthalpy(); if ( onpe0 ) { - cout.setf(ios::fixed,ios::floatfield); - cout.setf(ios::right,ios::adjustfield); - cout << " " << setprecision(8) - << setw(15) << ef_.ekin() << " \n"; - if ( use_confinement ) - cout << " " << setw(15) << ef_.econf() << " \n"; - cout << " " << setw(15) << ef_.eps() << " \n" - << " " << setw(15) << ef_.enl() << " \n" - << " " << setw(15) << ef_.ecoul() << " \n" - << " " << setw(15) << ef_.exc() << " \n" - << " " << setw(15) << ef_.esr() << " \n" - << " " << setw(15) << ef_.eself() << " \n" - << " " << setw(15) << ef_.ets() << " \n"; - if ( s_.extforces.size() > 0 ) - cout << " " << setw(15) << ef_.eexf() << " \n"; - cout << " " << setw(15) << ef_.etotal() << " \n"; - if ( compute_stress ) - { - const double pext = (sigma_ext[0]+sigma_ext[1]+sigma_ext[2])/3.0; - enthalpy = ef_.etotal() + pext * cell.volume(); - cout << " " << setw(15) << pext * cell.volume() - << " " << endl; - cout << " " << setw(15) << enthalpy << " \n" - << flush; - } + cout << cd_; + cout << ef_; + if ( ef_.el_enth() ) + cout << *ef_.el_enth(); } if ( iter > 0 && ionic_stepper ) @@ -413,6 +391,30 @@ void BOSampleStepper::step(int niter) } // at this point, positions r0, velocities v0 and forces fion are // consistent + + // execute commands in iter_cmd if defined + if ( !iter_cmd_.empty() ) + { + if ( iter % iter_cmd_period_ == 0 ) + { + // copy positions and velocities from IonicStepper to AtomSet + if ( ionic_stepper ) + { + ionic_stepper->set_positions(); + ionic_stepper->set_velocities(); + } + // command must be terminated with \n + istringstream cmdstream(iter_cmd_ + "\n"); + s_.ui->processCmds(cmdstream,"[iter_cmd]",true); + // copy positions and velocities back from AtomSet + if ( ionic_stepper ) + { + ionic_stepper->get_positions(); + ionic_stepper->get_velocities(); + } + } + } + double ekin_ion = 0.0, temp_ion = 0.0; if ( ionic_stepper ) { @@ -468,7 +470,8 @@ void BOSampleStepper::step(int niter) if ( ionic_stepper != 0 ) ekin_stepper = ionic_stepper->ekin_stepper(); cout << setprecision(8); - cout << " " << energy+ekin_ion+ekin_stepper << " \n"; + cout << " " << enthalpy+ekin_ion+ekin_stepper + << " \n"; cout << " " << ekin_ion << " \n"; cout << " " << temp_ion << " \n"; } @@ -740,8 +743,11 @@ void BOSampleStepper::step(int niter) mixer.restart(); double ehart, ehart_m; + bool scf_converged = false; + int itscf = 0; + double etotal = 0.0, etotal_m = 0.0, etotal_mm = 0.0; - for ( int itscf = 0; itscf < nitscf_; itscf++ ) + while ( !scf_converged && itscf < nitscf_ ) { if ( nite_ > 0 && onpe0 ) cout << " BOSampleStepper: start scf iteration" << endl; @@ -754,6 +760,9 @@ void BOSampleStepper::step(int niter) cd_.update_density(); tmap["charge"].stop(); + if ( onpe0 ) + cout << cd_; + // charge mixing if ( nite_ > 0 ) { @@ -833,15 +842,16 @@ void BOSampleStepper::step(int niter) } } // if nite_ > 0 + tmap["update_vhxc"].start(); ef_.update_vhxc(compute_stress); + tmap["update_vhxc"].stop(); // reset stepper only if multiple non-selfconsistent steps if ( nite_ > 0 ) wf_stepper->preprocess(); // non-self-consistent loop - // repeat until the change in etotal_int or in the - // eigenvalue sum is smaller than a fraction of the change in - // Hartree energy in the last scf iteration + // repeat until the change in eigenvalue_sum is smaller than a + // fraction of the change in Hartree energy in the last scf iteration bool nonscf_converged = false; if ( itscf > 0 ) ehart_m = ehart; @@ -852,19 +862,17 @@ void BOSampleStepper::step(int niter) // if ( onpe0 && nite_ > 0 ) // cout << " delta_ehart = " << delta_ehart << endl; int ite = 0; - double etotal_int, etotal_int_m; - double eigenvalue_sum, eigenvalue_sum_m; + double energy, etotal_int; + + double eigenvalue_sum, eigenvalue_sum_m = 0.0; // if nite == 0: do 1 iteration, no screening in charge mixing // if nite > 0: do nite iterations, use screening in charge mixing + // while ( !nonscf_converged && ite < max(nite_,1) ) { - double energy = ef_.energy(true,dwf,false,fion,false,sigma_eks); - double enthalpy = energy; - - if ( ite > 0 ) - etotal_int_m = etotal_int; - - etotal_int = energy; + tmap["energy"].start(); + energy = ef_.energy(true,dwf,false,fion,false,sigma_eks); + tmap["energy"].stop(); // compute the sum of eigenvalues (with fixed weight) // to measure convergence of the subspace update @@ -878,41 +886,16 @@ void BOSampleStepper::step(int niter) eigenvalue_sum = real(s_.wf.dot(dwf)); if ( onpe0 ) - cout << " " + cout << " " << eigenvalue_sum << " " << endl; + tmap["wf_update"].start(); wf_stepper->update(dwf); + tmap["wf_update"].stop(); - if ( onpe0 ) - { - cout.setf(ios::fixed,ios::floatfield); - cout.setf(ios::right,ios::adjustfield); - cout << " " << setprecision(8) << setw(15) - << energy << " \n"; - } - if ( compute_stress ) - { - const double pext = (sigma_ext[0]+sigma_ext[1]+sigma_ext[2])/3.0; - enthalpy = energy + pext * cell.volume(); - if ( onpe0 ) - cout << " " << setw(15) - << enthalpy << " \n" << flush; - } - - // compare delta_etotal_int only after first iteration + // compare delta_eig_sum only after first iteration if ( ite > 0 ) { -#if 0 - double delta_etotal_int = fabs(etotal_int - etotal_int_m); - nonscf_converged |= (delta_etotal_int < 0.01 * delta_ehart); - if ( onpe0 ) - { - cout << " BOSampleStepper::step: delta_e_int: " - << delta_etotal_int << endl; - cout << " BOSampleStepper::step: delta_ehart: " - << delta_ehart << endl; - } -#else double delta_eig_sum = fabs(eigenvalue_sum - eigenvalue_sum_m); nonscf_converged |= (delta_eig_sum < 0.01 * delta_ehart); #ifdef DEBUG @@ -924,21 +907,19 @@ void BOSampleStepper::step(int niter) << delta_ehart << endl; } #endif -#endif - } ite++; } - // if ( onpe0 && nite_ > 0 && ite >= nite_ ) - // cout << " BOSampleStepper::step: nscf loop not converged after " - // << nite_ << " iterations" << endl; - // subspace diagonalization if ( compute_eigvec || s_.ctrl.wf_diag == "EIGVAL" ) { + tmap["energy"].start(); ef_.energy(true,dwf,false,fion,false,sigma_eks); + tmap["energy"].stop(); + tmap["wfdiag"].start(); s_.wf.diag(dwf,compute_eigvec); + tmap["wfdiag"].stop(); if ( onpe0 ) { cout << "" << endl; @@ -972,27 +953,82 @@ void BOSampleStepper::step(int niter) } // update occupation numbers if fractionally occupied states + // compute weighted sum of eigenvalues + // default value if no fractional occupation + double w_eigenvalue_sum = 2.0 * eigenvalue_sum; + if ( fractional_occ ) { wf.update_occ(s_.ctrl.fermi_temp); - const double wf_entropy = wf.entropy(); +#if 0 if ( onpe0 ) { cout << " Wavefunction entropy: " << wf_entropy << endl; - const double boltz = 1.0 / ( 11605.0 * 2.0 * 13.6058 ); cout << " Entropy contribution to free energy: " << - wf_entropy * s_.ctrl.fermi_temp * boltz << endl; } +#endif + w_eigenvalue_sum = 0.0; + for ( int ispin = 0; ispin < wf.nspin(); ispin++ ) + { + for ( int ikp = 0; ikp < wf.nkp(); ikp++ ) + { + const int nst = wf.sd(ispin,ikp)->nst(); + const double wkp = wf.weight(ikp); + for ( int n = 0; n < nst; n++ ) + { + const double occ = wf.sd(ispin,ikp)->occ(n); + w_eigenvalue_sum += wkp * occ * wf.sd(ispin,ikp)->eig(n); + } + } + } } + // Harris-Foulkes estimate of the total energy + etotal_int = w_eigenvalue_sum - ef_.ehart_e() + ef_.ehart_p() + + ef_.esr() - ef_.eself() + ef_.dxc() + ef_.ets(); +#ifdef DEBUG + if ( onpe0 ) + { + cout << setprecision(8); + cout << "w_eigenvalue_sum = " << setw(15) << w_eigenvalue_sum << endl; + cout << "ef.dxc() = " << setw(15) << ef_.dxc() << endl; + cout << "ef.ehart() = " << setw(15) << ef_.ehart() << endl; + cout << "ef.ehart_e() = " << setw(15) << ef_.ehart_e() << endl; + cout << "ef.ehart_ep() = " << setw(15) << ef_.ehart_ep() << endl; + cout << "ef.esr() = " << setw(15) << ef_.esr() << endl; + } +#endif + + if ( onpe0 ) + { + cout.setf(ios::fixed,ios::floatfield); + cout.setf(ios::right,ios::adjustfield); + cout << " " << setprecision(8) << setw(15) + << etotal_int << " \n"; + } + + etotal_mm = etotal_m; + etotal_m = etotal; + etotal = etotal_int; + if ( nite_ > 0 && onpe0 ) cout << " BOSampleStepper: end scf iteration" << endl; - } // for itscf + + // delta_etotal = interval containing etotal, etotal_m and etotal_mm + double delta_etotal = fabs(etotal - etotal_m); + delta_etotal = max(delta_etotal,fabs(etotal - etotal_mm)); + delta_etotal = max(delta_etotal,fabs(etotal_m - etotal_mm)); + scf_converged |= (delta_etotal < s_.ctrl.scf_tol); + itscf++; + } // while scf if ( compute_mlwf || compute_mlwfc ) { + tmap["mlwf"].start(); for ( int ispin = 0; ispin < nspin; ispin++ ) { + mlwft[ispin]->update(); mlwft[ispin]->compute_transform(); } @@ -1014,20 +1050,36 @@ void BOSampleStepper::step(int niter) SlaterDet& sd = *(wf.sd(ispin,0)); cout << " " << endl; + double total_spread[6]; + for ( int j = 0; j < 6; j++ ) + total_spread[j] = 0.0; for ( int i = 0; i < sd.nst(); i++ ) { D3vector ctr = mlwft[ispin]->center(i); - double sp = mlwft[ispin]->spread(i); + double spi[6]; + for (int j=0; j<3; j++) + { + spi[j] = mlwft[ispin]->spread2(i,j); + total_spread[j] += spi[j]; + } + cout.setf(ios::fixed, ios::floatfield); cout.setf(ios::right, ios::adjustfield); cout << " " + << " \" spread=\" " + << setw(12) << spi[0] + << setw(12) << spi[1] + << setw(12) << spi[2] << " \"/>" << endl; } + cout << " "; + for ( int j = 0; j < 3; j++ ) + cout << setw(10) << total_spread[j]; + cout << " " << endl; D3vector edipole = mlwft[ispin]->dipole(); cout << " " << edipole << " " << endl; @@ -1043,6 +1095,7 @@ void BOSampleStepper::step(int niter) << " " << endl; cout << "" << endl; } + tmap["mlwf"].stop(); } // If GS calculation only, print energy and atomset at end of iterations @@ -1052,13 +1105,19 @@ void BOSampleStepper::step(int niter) cd_.update_density(); tmap["charge"].stop(); + tmap["update_vhxc"].start(); ef_.update_vhxc(compute_stress); + tmap["update_vhxc"].stop(); const bool compute_forces = true; + tmap["energy"].start(); ef_.energy(false,dwf,compute_forces,fion,compute_stress,sigma_eks); + tmap["energy"].stop(); if ( onpe0 ) { cout << ef_; + if ( ef_.el_enth() ) + cout << *ef_.el_enth(); cout << "" << endl; cout << atoms.cell(); for ( int is = 0; is < atoms.atom_list.size(); is++ ) @@ -1113,17 +1172,24 @@ void BOSampleStepper::step(int niter) tmap["charge"].start(); cd_.update_density(); tmap["charge"].stop(); + tmap["update_vhxc"].start(); ef_.update_vhxc(compute_stress); + tmap["update_vhxc"].stop(); + tmap["energy"].start(); ef_.energy(true,dwf,false,fion,false,sigma_eks); + tmap["energy"].stop(); if ( onpe0 ) { + cout << cd_; cout << ef_; + if ( ef_.el_enth() ) + cout << *ef_.el_enth(); } } -#ifdef USE_APC - ApcStop(1); -#endif + if ( atoms_move ) + s_.constraints.update_constraints(dt); + // print iteration time double time = tm_iter.real(); double tmin = time; @@ -1132,14 +1198,13 @@ void BOSampleStepper::step(int niter) s_.ctxt_.dmax(1,1,&tmax,1); if ( onpe0 ) { - cout << " " + string s = "name=\"iteration\""; + cout << "" << endl; cout << "" << endl; } - if ( atoms_move ) - s_.constraints.update_constraints(dt); } // for iter if ( atoms_move ) @@ -1150,10 +1215,14 @@ void BOSampleStepper::step(int niter) cd_.update_density(); tmap["charge"].stop(); + tmap["update_vhxc"].start(); ef_.update_vhxc(compute_stress); + tmap["update_vhxc"].stop(); const bool compute_forces = true; + tmap["energy"].start(); double energy = ef_.energy(false,dwf,compute_forces,fion,compute_stress,sigma_eks); + tmap["energy"].stop(); ionic_stepper->compute_v(energy,fion); // positions r0 and velocities v0 are consistent @@ -1164,7 +1233,9 @@ void BOSampleStepper::step(int niter) // compute wavefunction velocity after last iteration // s_.wfv contains the previous wavefunction + tmap["align"].start(); s_.wfv->align(s_.wf); + tmap["align"].stop(); for ( int ispin = 0; ispin < s_.wf.nspin(); ispin++ ) { @@ -1210,10 +1281,14 @@ void BOSampleStepper::step(int niter) cd_.update_density(); tmap["charge"].stop(); + tmap["update_vhxc"].start(); ef_.update_vhxc(compute_stress); + tmap["update_vhxc"].stop(); const bool compute_forces = true; + tmap["energy"].start(); double energy = ef_.energy(false,dwf,compute_forces,fion,compute_stress,sigma_eks); + tmap["energy"].stop(); ionic_stepper->compute_v(energy,fion); // positions r0 and velocities v0 are consistent @@ -1228,7 +1303,7 @@ void BOSampleStepper::step(int niter) for ( int ispin = 0; ispin < nspin; ispin++ ) { - delete mlwft[ispin]; + delete mlwft[ispin]; } // delete steppers diff --git a/src/BOSampleStepper.h b/src/BOSampleStepper.h index ae32222..73bec66 100644 --- a/src/BOSampleStepper.h +++ b/src/BOSampleStepper.h @@ -15,7 +15,6 @@ // BOSampleStepper.h // //////////////////////////////////////////////////////////////////////////////// -// $Id: BOSampleStepper.h,v 1.9 2009-09-08 05:35:39 fgygi Exp $ #ifndef BOSAMPLESTEPPER_H #define BOSAMPLESTEPPER_H diff --git a/src/Base64Transcoder.h b/src/Base64Transcoder.h index 1eec731..843294e 100644 --- a/src/Base64Transcoder.h +++ b/src/Base64Transcoder.h @@ -15,7 +15,6 @@ // Base64Transcoder.h // //////////////////////////////////////////////////////////////////////////////// -// $Id: Base64Transcoder.h,v 1.7 2008-09-08 15:56:18 fgygi Exp $ #ifndef BASE64TRANSCODER_H #define BASE64TRANSCODER_H diff --git a/src/Basis.C b/src/Basis.C index 223f8e6..2cb3a96 100644 --- a/src/Basis.C +++ b/src/Basis.C @@ -307,7 +307,7 @@ bool Basis::resize(const UnitCell& cell, const UnitCell& refcell, // defcell: cell used to define which vectors are contained in the Basis // if refcell is defined, defcell = refcell // otherwise, defcell = cell - if ( norm(refcell.b(0)) + norm(refcell.b(1)) + norm(refcell.b(2)) == 0.0 ) + if ( norm2(refcell.b(0)) + norm2(refcell.b(1)) + norm2(refcell.b(2)) == 0.0 ) { defcell = cell; } @@ -320,7 +320,7 @@ bool Basis::resize(const UnitCell& cell, const UnitCell& refcell, const D3vector b1 = defcell.b(1); const D3vector b2 = defcell.b(2); - const double normb2 = norm(b2); + const double normb2 = norm2(b2); const double b2inv2 = 1.0 / normb2; const D3vector kp = kpx*b0 + kpy*b1 + kpz*b2; @@ -380,7 +380,7 @@ bool Basis::resize(const UnitCell& cell, const UnitCell& refcell, bool found = false; for ( int l = lmin; l <= lmax; l++ ) { - const double two_e = norm(k*b1+l*b2); + const double two_e = norm2(k*b1+l*b2); if ( two_e < two_ecut ) { lstart = min(l,lstart); @@ -410,7 +410,7 @@ bool Basis::resize(const UnitCell& cell, const UnitCell& refcell, bool found = false; for ( int l = lmin; l <= lmax; l++ ) { - const double two_e = norm(h*b0+k*b1+l*b2); + const double two_e = norm2(h*b0+k*b1+l*b2); if ( two_e < two_ecut ) { lstart = min(l,lstart); @@ -450,7 +450,7 @@ bool Basis::resize(const UnitCell& cell, const UnitCell& refcell, bool found = false; for ( int l = lmin; l <= lmax; l++ ) { - const double two_e = norm((kpx+h)*b0 + (kpy+k)*b1 + (kpz+l)*b2); + const double two_e = norm2((kpx+h)*b0 + (kpy+k)*b1 + (kpz+l)*b2); if ( two_e < two_ecut ) { lstart = min(l,lstart); @@ -658,10 +658,10 @@ void Basis::update_g(void) kpgx_[locsize+i] = kpgt.y; kpgx_[locsize+locsize+i] = kpgt.z; - g2_[i] = norm(gt); + g2_[i] = norm2(gt); g_[i] = sqrt( g2_[i] ); - kpg2_[i] = norm(kpgt); + kpg2_[i] = norm2(kpgt); kpg_[i] = sqrt( kpg2_[i] ); gi_[i] = g_[i] > 0.0 ? 1.0 / g_[i] : 0.0; diff --git a/src/Basis.h b/src/Basis.h index 0595063..ca1ff43 100644 --- a/src/Basis.h +++ b/src/Basis.h @@ -12,7 +12,7 @@ // //////////////////////////////////////////////////////////////////////////////// // -// Basis.h +// Basis.h // //////////////////////////////////////////////////////////////////////////////// diff --git a/src/BasisMapping.C b/src/BasisMapping.C index 6f456f5..5ffbe68 100644 --- a/src/BasisMapping.C +++ b/src/BasisMapping.C @@ -12,7 +12,7 @@ // //////////////////////////////////////////////////////////////////////////////// // -// BasisMapping.C +// BasisMapping.C // //////////////////////////////////////////////////////////////////////////////// @@ -33,7 +33,6 @@ BasisMapping::BasisMapping (const Basis &basis) : basis_(basis) np0_ = basis.np(0); np1_ = basis.np(1); np2_ = basis.np(2); - np012_ = np0_ * np1_ * np2_; np2_loc_.resize(nprocs_); np2_first_.resize(nprocs_); diff --git a/src/BasisMapping.h b/src/BasisMapping.h index 3f34b1e..8d05273 100644 --- a/src/BasisMapping.h +++ b/src/BasisMapping.h @@ -12,7 +12,7 @@ // //////////////////////////////////////////////////////////////////////////////// // -// BasisMapping.h +// BasisMapping.h // //////////////////////////////////////////////////////////////////////////////// @@ -31,7 +31,7 @@ class BasisMapping const Basis& basis_; int nprocs_, myproc_; - int np0_, np1_, np2_, np012_, np012loc_; + int np0_, np1_, np2_, np012loc_; int nvec_; std::vector np2_loc_; // np2_loc_[iproc], iproc=0, nprocs_-1 @@ -50,7 +50,6 @@ class BasisMapping int np1(void) const { return np1_; } int np2(void) const { return np2_; } int np2loc(void) const { return np2_loc_[myproc_]; } - int np012(void) const { return np012_; } int np012loc(void) const { return np012loc_; } int nvec(void) const { return nvec_; } int zvec_size(void) const { return nvec_ * np2_; } diff --git a/src/Bisection.C b/src/Bisection.C index cc548e1..c9d8dc2 100644 --- a/src/Bisection.C +++ b/src/Bisection.C @@ -74,12 +74,11 @@ int walsh(int l, int n, int i) } //////////////////////////////////////////////////////////////////////////////// -Bisection::Bisection(const SlaterDet& sd, int nlevels[3]) +Bisection::Bisection(const SlaterDet& sd, int nlevels[3]) : ctxt_(sd.context()) { // localization vectors are long int assert(sizeof(long int) >= 4); - gcontext_ = sd.context(); nst_ = sd.nst(); nstloc_ = sd.nstloc(); @@ -110,6 +109,9 @@ Bisection::Bisection(const SlaterDet& sd, int nlevels[3]) if ( np_[idim] % base != 0 ) np_[idim] += base/2; } } + while (!sd.basis().factorizable(np_[0])) np_[0] += (1< max_xyproj_rsize( ndiv_[0]*ndiv_[1] , 0 ); MPI_Allreduce((void*)&xyproj_rsize[0] , (void*)&max_xyproj_rsize[0], - (int)xyproj_rsize.size(), MPI_INT ,MPI_MAX , gcontext_.comm()); + (int)xyproj_rsize.size(), MPI_INT ,MPI_MAX , ctxt_.comm()); // allocate matrices rmat_[i] for ( int i = 0; i < rmat_.size(); i++ ) @@ -400,11 +402,11 @@ void Bisection::compute_transform(const SlaterDet& sd, int maxsweep, double tol) // diagonalize projectors int nsweep = jade(maxsweep,tol,amat_,*u_,adiag_); - if ( nsweep >= maxsweep ) - if ( gcontext_.onpe0() ) - cout << "Bisection::compute_transform: nsweep=" << nsweep - << " maxsweep=" << maxsweep << " tol=" << tol << endl; - +#ifdef TIMING + if ( ctxt_.onpe0() ) + cout << "Bisection::compute_transform: nsweep=" << nsweep + << " maxsweep=" << maxsweep << " tol=" << tol << endl; +#endif #ifdef TIMING tmap["jade"].stop(); @@ -419,7 +421,7 @@ void Bisection::compute_transform(const SlaterDet& sd, int maxsweep, double tol) { // broadcast diagonal of all matrices a to all tasks MPI_Bcast( (void *) &adiag_[imat][0], adiag_[imat].size(), - MPI_DOUBLE, 0, gcontext_.comm() ); + MPI_DOUBLE, 0, ctxt_.comm() ); } // print timers #ifdef TIMING @@ -428,14 +430,14 @@ void Bisection::compute_transform(const SlaterDet& sd, int maxsweep, double tol) double time = (*i).second.real(); double tmin = time; double tmax = time; - gcontext_.dmin(1,1,&tmin,1); - gcontext_.dmax(1,1,&tmax,1); - if ( gcontext_.myproc()==0 ) + ctxt_.dmin(1,1,&tmin,1); + ctxt_.dmax(1,1,&tmax,1); + if ( ctxt_.myproc()==0 ) { - cout << "" + string s = "name=\"" + (*i).first + "\""; + cout << "" << endl; } } @@ -463,7 +465,7 @@ void Bisection::compute_localization(double epsilon) #ifdef DEBUG // print localization vector and number of overlaps (including self) // for each state - if ( gcontext_.onpe0() ) + if ( ctxt_.onpe0() ) { int sum = 0; for ( int i = 0; i < nst_; i++ ) @@ -487,7 +489,7 @@ void Bisection::compute_localization(double epsilon) // broadcast localization to all tasks to ensure consistency MPI_Bcast( (void *) &localization_[0], localization_.size(), - MPI_LONG, 0, gcontext_.comm() ); + MPI_LONG, 0, ctxt_.comm() ); } @@ -672,6 +674,7 @@ void Bisection::trim_amat(const vector& occ) // set to zero the matrix elements of the matrices amat_[k] if they couple // states with differing occupation numbers + const double trim_tol = 1.e-6; // check if all occupation numbers are the same double occ_max = 0.0, occ_min = 2.0; for ( int i = 0; i < occ.size(); i++ ) @@ -680,7 +683,7 @@ void Bisection::trim_amat(const vector& occ) occ_min = min(occ_min, occ[i]); } // return if all occupation numbers are equal - if ( fabs(occ_max-occ_min) < 1.e-12 ) + if ( fabs(occ_max-occ_min) < trim_tol ) return; const int mloc = amat_[0]->mloc(); @@ -694,7 +697,7 @@ void Bisection::trim_amat(const vector& occ) const int jglobal = amat_[0]->jglobal(j); const int ival = i + mloc * j; - if ( fabs(occ[iglobal] - occ[jglobal]) > 1.e-12 ) + if ( fabs(occ[iglobal] - occ[jglobal]) > trim_tol ) for ( int k = 0; k < amat_.size(); k++ ) (*amat_[k])[ival] = 0.0; } @@ -702,13 +705,13 @@ void Bisection::trim_amat(const vector& occ) } //////////////////////////////////////////////////////////////////////////////// -bool Bisection::overlap(int i, int j) +bool Bisection::overlap(int i, int j) const { return overlap(localization_,i,j); } //////////////////////////////////////////////////////////////////////////////// -bool Bisection::overlap(vector loc_, int i, int j) +bool Bisection::overlap(const vector& loc_, int i, int j) const { // overlap: return true if the functions i and j overlap according // to the localization vector loc_ @@ -735,25 +738,28 @@ bool Bisection::overlap(vector loc_, int i, int j) } //////////////////////////////////////////////////////////////////////////////// -double Bisection::pair_fraction(void) +double Bisection::pair_fraction(void) const { // pair_fraction: return fraction of pairs having non-zero overlap + // count pairs (i,j) having non-zero overlap for i != j only int sum = 0; - for ( int i = 0; i < nst_; i++ ) + for ( int i = 1; i < nst_; i++ ) { int count = 0; - for ( int j = 0; j < nst_; j++ ) + for ( int j = i+1; j < nst_; j++ ) { if ( overlap(i,j) ) count++; } sum += count; } - return ((double) sum)/(nst_*nst_); + // add overlap with self: (i,i) + sum += nst_; + return ((double) sum)/((nst_*(nst_+1))/2); } //////////////////////////////////////////////////////////////////////////////// -double Bisection::size(int i) +double Bisection::size(int i) const { // size: return fraction of the domain on which state i is non-zero long int loc = localization_[i]; @@ -776,7 +782,7 @@ double Bisection::size(int i) } //////////////////////////////////////////////////////////////////////////////// -double Bisection::total_size(void) +double Bisection::total_size(void) const { // total_size: return normalized sum of sizes double sum = 0.0; diff --git a/src/Bisection.h b/src/Bisection.h index ee00c9c..5d5ff4c 100644 --- a/src/Bisection.h +++ b/src/Bisection.h @@ -33,7 +33,7 @@ class Bisection { private: - Context gcontext_; + Context ctxt_; // bisection levels in each directions int nlevels_[3]; // bisection level @@ -79,13 +79,14 @@ class Bisection int nmat(void) const { return nmat_; } long int localization(int i) const { return localization_[i]; } - std::vector localization(void) const { return localization_; } - bool overlap(int i, int j); - bool overlap(std::vector loc, int i, int j); + const std::vector& localization(void) const + { return localization_; } + bool overlap(int i, int j) const; + bool overlap(const std::vector& loc, int i, int j) const; const DoubleMatrix& u(void) const { return *u_; } - double pair_fraction(void); - double size(int i); - double total_size(void); + double pair_fraction(void) const; + double size(int i) const; + double total_size(void) const; ~Bisection(); }; #endif diff --git a/src/BisectionCmd.h b/src/BisectionCmd.h index efe5fc0..baec69a 100644 --- a/src/BisectionCmd.h +++ b/src/BisectionCmd.h @@ -83,8 +83,8 @@ class BisectionCmd : public Cmd { cout << " BisectionCmd: lx=" << nLevels[0] << " ly=" << nLevels[1] - << " lz=" << nLevels[2] - << " threshold=" << epsilon << endl; + << " lz=" << nLevels[2] + << " threshold=" << epsilon << endl; // print localization vectors and overlaps int sum = 0; diff --git a/src/CGCellStepper.C b/src/CGCellStepper.C index 773f6ee..f202625 100644 --- a/src/CGCellStepper.C +++ b/src/CGCellStepper.C @@ -25,8 +25,8 @@ CGCellStepper::CGCellStepper(Sample& s) : CellStepper(s), cgopt_(CGOptimizer(3*s.atoms.size()+9)), cell0(s_.atoms.cell()) { nat_ = atoms_.size(); - cgopt_.set_alpha_start(0.05); - cgopt_.set_alpha_max(1.0); + cgopt_.set_alpha_start(0.002); + cgopt_.set_alpha_max(0.5); cgopt_.set_beta_max(10.0); #ifdef DEBUG if ( s.ctxt_.onpe0() ) @@ -177,9 +177,7 @@ void CGCellStepper::compute_new_cell(double e, const valarray& sigma, //////////////////////////////////////////////////////////////////////////////// void CGCellStepper::update_cell(void) { - s_.atoms.sync_positions(rp_); s_.atoms.set_positions(rp_); - s_.atoms.sync_cell(cellp); s_.atoms.set_cell(cellp); u_ = up_; diff --git a/src/CGIonicStepper.C b/src/CGIonicStepper.C index 9041606..3326632 100644 --- a/src/CGIonicStepper.C +++ b/src/CGIonicStepper.C @@ -25,7 +25,7 @@ CGIonicStepper::CGIonicStepper(Sample& s) : IonicStepper(s), cgopt_(CGOptimizer(3*natoms_)) { cgopt_.set_alpha_start(1.0); - cgopt_.set_alpha_max(10.0); + cgopt_.set_alpha_max(5.0); cgopt_.set_beta_max(10.0); #ifdef DEBUG if ( s.ctxt_.onpe0() ) @@ -72,8 +72,11 @@ void CGIonicStepper::compute_r(double e0, const vector >& f0) if ( s_.ctxt_.onpe0() ) cout << " CGIonicStepper: displacement exceeds limit, rescaling" << endl; // rescale displacement and reset the CG optimizer - xp = x + (max_disp/largest_disp) * (xp - x); + double fac = max_disp/largest_disp; + xp = x + fac * (xp - x); + // reduce alpha starting value in CG optmizer + cgopt_.set_alpha_start(fac*cgopt_.alpha_start()); cgopt_.reset(); } @@ -89,6 +92,6 @@ void CGIonicStepper::compute_r(double e0, const vector >& f0) constraints_.enforce_r(r0_,rp_); rm_ = r0_; r0_ = rp_; - atoms_.sync_positions(r0_); atoms_.set_positions(r0_); + atoms_.reset_velocities(); } diff --git a/src/CGOptimizer.C b/src/CGOptimizer.C index 057496e..9554450 100644 --- a/src/CGOptimizer.C +++ b/src/CGOptimizer.C @@ -156,6 +156,12 @@ void CGOptimizer::compute_xp(const valarray& x, const double f, fp0_ = fp; } + // set the starting alpha of the minimizer to be the current alpha_ + if ( alpha_ < linmin_.alpha_max() ) + linmin_.set_alpha_start(alpha_); + else + linmin_.set_alpha_start(0.5*linmin_.alpha_max()); + // reset the line minimizer linmin_.reset(); alpha_ = linmin_.next_alpha(alpha_,f,fp); diff --git a/src/CPSampleStepper.C b/src/CPSampleStepper.C index 7e25f31..2fdccda 100644 --- a/src/CPSampleStepper.C +++ b/src/CPSampleStepper.C @@ -69,10 +69,10 @@ CPSampleStepper::~CPSampleStepper(void) s_.ctxt_.dmax(1,1,&tmax,1); if ( s_.ctxt_.myproc()==0 ) { - cout << "" + string s = "name=\"" + (*i).first + "\""; + cout << "" << endl; } } @@ -109,7 +109,6 @@ void CPSampleStepper::step(int niter) const bool compute_hpsi = true; const bool compute_forces = ( atoms_dyn != "LOCKED" ); const bool compute_stress = ( s_.ctrl.stress == "ON" ); - const bool use_confinement = ( s_.ctrl.ecuts > 0.0 ); CellStepper* cell_stepper = 0; if ( cell_dyn == "SD" ) @@ -135,6 +134,7 @@ void CPSampleStepper::step(int niter) ef_.update_vhxc(compute_stress); double energy = ef_.energy(compute_hpsi,dwf,compute_forces,fion,compute_stress,sigma_eks); + double enthalpy = ef_.enthalpy(); mdwf_stepper->compute_wfm(dwf); @@ -151,34 +151,9 @@ void CPSampleStepper::step(int niter) if ( onpe0 ) { - cout.setf(ios::fixed,ios::floatfield); - cout.setf(ios::right,ios::adjustfield); - cout << " " << setprecision(8) - << setw(15) << ef_.ekin() << " \n"; - if ( use_confinement ) - { - cout << " " << setw(15) << ef_.econf() - << " \n"; - } - cout << " " << setw(15) << ef_.eps() << " \n" - << " " << setw(15) << ef_.enl() << " \n" - << " " << setw(15) << ef_.ecoul() << " \n" - << " " << setw(15) << ef_.exc() << " \n" - << " " << setw(15) << ef_.esr() << " \n" - << " " << setw(15) << ef_.eself() << " \n"; - if ( s_.extforces.size() > 0 ) - cout << " " << setw(15) << ef_.eexf() << " \n"; - cout << " " << setw(15) << ef_.etotal() << " \n" - << flush; - if ( compute_stress ) - { - const double pext = (sigma_ext[0]+sigma_ext[1]+sigma_ext[2])/3.0; - const double enthalpy = ef_.etotal() + pext * s_.wf.cell().volume(); - cout << " " << setw(15) << pext * s_.wf.cell().volume() - << " " << endl; - cout << " " << setw(15) << enthalpy << " \n" - << flush; - } + cout << ef_; + if ( ef_.el_enth() ) + cout << *ef_.el_enth(); } if ( compute_forces ) @@ -244,8 +219,11 @@ void CPSampleStepper::step(int niter) cout << " " << mdionic_stepper->temp() << " \n"; cout << " " << mdionic_stepper->eta() << " \n"; } - cout << " " << energy+ekin_ion+ekin_e << " \n"; - cout << " " << energy+ekin_ion+2*ekin_e << " \n"; + double econst = energy + ekin_ion + ekin_e; + if ( mdionic_stepper ) + econst += mdionic_stepper->ekin_stepper(); + cout << " " << econst << " \n"; + cout << " " << econst + ekin_e << " \n"; } if ( compute_stress ) @@ -276,6 +254,7 @@ void CPSampleStepper::step(int niter) ef_.update_vhxc(compute_stress); energy = ef_.energy(compute_hpsi,dwf,compute_forces,fion,compute_stress,sigma_eks); + enthalpy = ef_.enthalpy(); if ( s_.ctxt_.mype() == 0 ) cout << "" << endl; @@ -289,9 +268,10 @@ void CPSampleStepper::step(int niter) s_.ctxt_.dmax(1,1,&tmax,1); if ( s_.ctxt_.myproc()==0 ) { - cout << " " + string s = "name=\"iteration\""; + cout << "" << endl; } if ( compute_forces ) diff --git a/src/CPSampleStepper.h b/src/CPSampleStepper.h index 5eba229..9155961 100644 --- a/src/CPSampleStepper.h +++ b/src/CPSampleStepper.h @@ -15,7 +15,6 @@ // CPSampleStepper.h // //////////////////////////////////////////////////////////////////////////////// -// $Id: CPSampleStepper.h,v 1.7 2008-09-08 15:56:18 fgygi Exp $ #ifndef CPSAMPLESTEPPER_H #define CPSAMPLESTEPPER_H diff --git a/src/Cell.h b/src/Cell.h index a85296e..7d0c738 100644 --- a/src/Cell.h +++ b/src/Cell.h @@ -15,7 +15,6 @@ // Cell.h // //////////////////////////////////////////////////////////////////////////////// -// $Id: Cell.h,v 1.9 2008-09-08 15:56:18 fgygi Exp $ #ifndef CELL_H #define CELL_H diff --git a/src/CellLock.h b/src/CellLock.h index e333215..7e99b27 100644 --- a/src/CellLock.h +++ b/src/CellLock.h @@ -15,7 +15,6 @@ // CellLock.h // //////////////////////////////////////////////////////////////////////////////// -// $Id: CellLock.h,v 1.6 2008-09-08 15:56:18 fgygi Exp $ #ifndef CELLLOCK_H #define CELLLOCK_H diff --git a/src/CellMass.h b/src/CellMass.h index 488e610..74959df 100644 --- a/src/CellMass.h +++ b/src/CellMass.h @@ -15,7 +15,6 @@ // CellMass.h // //////////////////////////////////////////////////////////////////////////////// -// $Id: CellMass.h,v 1.4 2008-09-08 15:56:18 fgygi Exp $ #ifndef CELLMASS_H #define CELLMASS_H diff --git a/src/ChargeDensity.C b/src/ChargeDensity.C index 8151ee3..0841d7e 100644 --- a/src/ChargeDensity.C +++ b/src/ChargeDensity.C @@ -24,12 +24,12 @@ #include #include // fill +#include using namespace std; //////////////////////////////////////////////////////////////////////////////// ChargeDensity::ChargeDensity(const Wavefunction& wf) : ctxt_(wf.context()), wf_(wf), vcomm_(wf.sd(0,0)->basis().comm()) - { vbasis_ = new Basis(vcomm_, D3vector(0,0,0)); vbasis_->resize(wf.cell(),wf.refcell(),4.0*wf.ecut()); @@ -55,6 +55,7 @@ wf_(wf), vcomm_(wf.sd(0,0)->basis().comm()) #endif vft_ = new FourierTransform(*vbasis_,np0v,np1v,np2v); + total_charge_.resize(wf.nspin()); rhor.resize(wf.nspin()); rhog.resize(wf.nspin()); for ( int ispin = 0; ispin < wf.nspin(); ispin++ ) @@ -89,6 +90,8 @@ wf_(wf), vcomm_(wf.sd(0,0)->basis().comm()) ft_[ikp] = new FourierTransform(wb,np0v,np1v,np2v); } } + // initialize core density ptr to null ptr + rhocore_r = 0; } //////////////////////////////////////////////////////////////////////////////// @@ -107,10 +110,10 @@ ChargeDensity::~ChargeDensity(void) ctxt_.dmax(1,1,&tmax,1); if ( ctxt_.myproc()==0 ) { - cout << "" + string s = "name=\"" + (*i).first + "\""; + cout << "" << endl; } } @@ -160,19 +163,20 @@ void ChargeDensity::update_density(void) // sum on all indices except spin: sum along columns of spincontext wf_.spincontext()->dsum('c',1,1,&sum,1); tmap["charge_integral"].stop(); - if ( ctxt_.onpe0() ) - { - cout.setf(ios::fixed,ios::floatfield); - cout.setf(ios::right,ios::adjustfield); - cout << " total_electronic_charge: " << setprecision(8) << sum - << endl; - } + total_charge_[ispin] = sum; tmap["charge_vft"].start(); vft_->forward(&rhotmp[0],&rhog[ispin][0]); tmap["charge_vft"].stop(); + + // add core correction charge + if ( rhocore_r ) + for ( int i = 0; i < rhor_size; i++ ) + rhor[ispin][i] += rhocore_r[i]; + } } + //////////////////////////////////////////////////////////////////////////////// void ChargeDensity::update_rhor(void) { @@ -191,10 +195,45 @@ void ChargeDensity::update_rhor(void) const int rhor_size = rhor[ispin].size(); double *const prhor = &rhor[ispin][0]; - #pragma omp parallel for - for ( int i = 0; i < rhor_size; i++ ) + if ( rhocore_r ) + { + #pragma omp parallel for + for ( int i = 0; i < rhor_size; i++ ) + prhor[i] = ( rhotmp[i].real() + rhocore_r[i] ) * omega_inv; + } + else { - prhor[i] = rhotmp[i].real() * omega_inv; + #pragma omp parallel for + for ( int i = 0; i < rhor_size; i++ ) + prhor[i] = rhotmp[i].real() * omega_inv; } } } + +//////////////////////////////////////////////////////////////////////////////// +double ChargeDensity::total_charge(void) const +{ + assert((wf_.nspin()==1)||(wf_.nspin()==2)); + if ( wf_.nspin() == 1 ) + return total_charge_[0]; + else + return total_charge_[0] + total_charge_[1]; +} + +//////////////////////////////////////////////////////////////////////////////// +void ChargeDensity::print(ostream& os) const +{ + os.setf(ios::fixed,ios::floatfield); + os.setf(ios::right,ios::adjustfield); + for ( int ispin = 0; ispin < wf_.nspin(); ispin++ ) + os << " " + << setprecision(8) << total_charge(ispin) + << " \n"; +} + +//////////////////////////////////////////////////////////////////////////////// +ostream& operator<< ( ostream& os, const ChargeDensity& cd ) +{ + cd.print(os); + return os; +} diff --git a/src/ChargeDensity.h b/src/ChargeDensity.h index e5e1681..66dc3bd 100644 --- a/src/ChargeDensity.h +++ b/src/ChargeDensity.h @@ -44,6 +44,7 @@ class ChargeDensity FourierTransform* vft_; std::vector ft_; // ft_[ikp]; std::valarray > rhotmp; + std::vector total_charge_; public: @@ -51,7 +52,8 @@ class ChargeDensity std::vector > rhor; // rhor[ispin][i] std::vector > > rhog; // rhog[ispin][ig] - + // core density ptr. If non-zero, contains the real-space core density + double* rhocore_r; void update_density(void); void update_rhor(void); @@ -60,8 +62,13 @@ class ChargeDensity Basis* vbasis(void) const { return vbasis_; } FourierTransform* vft(void) const { return vft_; } FourierTransform* ft(int ikp) const { return ft_[ikp]; } + double total_charge(int ispin) const { return total_charge_[ispin]; } + double total_charge(void) const; + + void print(std::ostream& os) const; ChargeDensity(const Wavefunction& wf); ~ChargeDensity(); }; +std::ostream& operator << ( std::ostream& os, const ChargeDensity& cd ); #endif diff --git a/src/ChargeMixCoeff.h b/src/ChargeMixCoeff.h index b00350e..fa15087 100644 --- a/src/ChargeMixCoeff.h +++ b/src/ChargeMixCoeff.h @@ -15,7 +15,6 @@ // ChargeMixCoeff.h // //////////////////////////////////////////////////////////////////////////////// -// $Id: ChargeMixCoeff.h,v 1.6 2008-09-08 15:56:18 fgygi Exp $ #ifndef CHARGEMIXCOEFF_H #define CHARGEMIXCOEFF_H diff --git a/src/ChargeMixNdim.h b/src/ChargeMixNdim.h index a0a0294..9b4e1b4 100644 --- a/src/ChargeMixNdim.h +++ b/src/ChargeMixNdim.h @@ -15,7 +15,6 @@ // ChargeMixNdim.h // //////////////////////////////////////////////////////////////////////////////// -// $Id: ChargeMixNdim.h,v 1.1 2009-03-08 01:09:58 fgygi Exp $ #ifndef CHARGEMIXNDIM_H #define CHARGEMIXNDIM_H diff --git a/src/ChargeMixRcut.h b/src/ChargeMixRcut.h index 096a225..3a5abf0 100644 --- a/src/ChargeMixRcut.h +++ b/src/ChargeMixRcut.h @@ -15,7 +15,6 @@ // ChargeMixRcut.h // //////////////////////////////////////////////////////////////////////////////// -// $Id: ChargeMixRcut.h,v 1.4 2008-09-08 15:56:18 fgygi Exp $ #ifndef CHARGEMIXRCUT_H #define CHARGEMIXRCUT_H diff --git a/src/ComputeMLWFCmd.C b/src/ComputeMLWFCmd.C index 69414ab..2477c9e 100644 --- a/src/ComputeMLWFCmd.C +++ b/src/ComputeMLWFCmd.C @@ -15,7 +15,6 @@ // ComputeMLWFCmd.C: // //////////////////////////////////////////////////////////////////////////////// -// $Id: ComputeMLWFCmd.C,v 1.8 2008-11-14 22:11:30 fgygi Exp $ #include "ComputeMLWFCmd.h" #include @@ -48,6 +47,7 @@ int ComputeMLWFCmd::action(int argc, char **argv) SlaterDet& sd = *(wf.sd(ispin,0)); MLWFTransform* mlwft = new MLWFTransform(sd); + mlwft->update(); mlwft->compute_transform(); mlwft->apply_transform(sd); diff --git a/src/ComputeMLWFCmd.h b/src/ComputeMLWFCmd.h index d2b89db..ccacad3 100644 --- a/src/ComputeMLWFCmd.h +++ b/src/ComputeMLWFCmd.h @@ -15,7 +15,6 @@ // ComputeMLWFCmd.h // //////////////////////////////////////////////////////////////////////////////// -// $Id: ComputeMLWFCmd.h,v 1.5 2008-11-14 22:11:30 fgygi Exp $ #ifndef COMPUTEMLWFCMD_H #define COMPUTEMLWFCMD_H diff --git a/src/ConfinementPotential.C b/src/ConfinementPotential.C index 5f46367..27e933e 100644 --- a/src/ConfinementPotential.C +++ b/src/ConfinementPotential.C @@ -15,7 +15,6 @@ // ConfinementPotential.C // //////////////////////////////////////////////////////////////////////////////// -// $Id: ConfinementPotential.C,v 1.4 2008-09-08 15:56:18 fgygi Exp $ #include "ConfinementPotential.h" #include "Basis.h" diff --git a/src/ConfinementPotential.h b/src/ConfinementPotential.h index 1af4e0a..713cfa8 100644 --- a/src/ConfinementPotential.h +++ b/src/ConfinementPotential.h @@ -15,7 +15,6 @@ // ConfinementPotential.h // //////////////////////////////////////////////////////////////////////////////// -// $Id: ConfinementPotential.h,v 1.7 2008-09-08 15:56:18 fgygi Exp $ #ifndef CONFINEMENTPOTENTIAL_H #define CONFINEMENTPOTENTIAL_H diff --git a/src/Constraint.C b/src/Constraint.C index ed8a054..9da11fb 100644 --- a/src/Constraint.C +++ b/src/Constraint.C @@ -12,10 +12,9 @@ // //////////////////////////////////////////////////////////////////////////////// // -// Constraint.C +// Constraint.C // //////////////////////////////////////////////////////////////////////////////// -// $Id: Constraint.C,v 1.4 2008-09-08 15:56:18 fgygi Exp $ #include "Constraint.h" #include diff --git a/src/Constraint.h b/src/Constraint.h index 5ccdc95..6e113c5 100644 --- a/src/Constraint.h +++ b/src/Constraint.h @@ -12,10 +12,9 @@ // //////////////////////////////////////////////////////////////////////////////// // -// Constraint.h +// Constraint.h // //////////////////////////////////////////////////////////////////////////////// -// $Id: Constraint.h,v 1.8 2009-05-15 04:38:48 fgygi Exp $ #ifndef CONSTRAINT_H #define CONSTRAINT_H diff --git a/src/ConstraintCmd.h b/src/ConstraintCmd.h index 9f9198a..47cb3e3 100644 --- a/src/ConstraintCmd.h +++ b/src/ConstraintCmd.h @@ -15,7 +15,6 @@ // ConstraintCmd.h: // //////////////////////////////////////////////////////////////////////////////// -// $Id: ConstraintCmd.h,v 1.6 2009-04-30 22:24:08 fgygi Exp $ #ifndef CONSTRAINTCMD_H #define CONSTRAINTCMD_H @@ -46,6 +45,7 @@ class ConstraintCmd : public Cmd " constraint list\n" " constraint enforce\n\n" " Constraints are enforced at each MD step if ions are allowed to move.\n" + " If a distance or angle is replaced by '*', the current value is used.\n" " Velocity parameters are optional.\n\n"; } diff --git a/src/ConstraintSet.C b/src/ConstraintSet.C index bf9f07a..f496850 100644 --- a/src/ConstraintSet.C +++ b/src/ConstraintSet.C @@ -15,14 +15,12 @@ // ConstraintSet.C // //////////////////////////////////////////////////////////////////////////////// -// $Id: ConstraintSet.C,v 1.11 2010-02-20 23:13:02 fgygi Exp $ #include "ConstraintSet.h" #include "PositionConstraint.h" #include "DistanceConstraint.h" #include "AngleConstraint.h" #include "TorsionConstraint.h" -//#include "MultiDistanceConstraint.h" #include "Atom.h" #include "AtomSet.h" #include "Context.h" @@ -31,7 +29,7 @@ #include // atof using namespace std; -const int constraints_maxiter = 10; +const int constraints_maxiter = 50; //////////////////////////////////////////////////////////////////////////////// ConstraintSet::~ConstraintSet(void) @@ -194,7 +192,18 @@ bool ConstraintSet::define_constraint(AtomSet &atoms, int argc, char **argv) return false; } - distance = atof(argv[6]); + // define distance value + if ( !strcmp(argv[6],"*") ) + { + // use current distance + distance = length(a1->position()-a2->position()); + if ( onpe0 ) + cout << "ConstraintSet::define_constraint: using current distance " + << distance << endl; + } + else + distance = atof(argv[6]); + if ( argc == 8 ) { velocity = atof(argv[7]); @@ -225,8 +234,10 @@ bool ConstraintSet::define_constraint(AtomSet &atoms, int argc, char **argv) if ( found ) { if ( onpe0 ) - cout << " ConstraintSet: constraint is already defined:\n" - << " cannot define constraint" << endl; + cout << " ConstraintSet: a distance constraint named " << name << endl + << " or involving atoms " << name1 << " " << name2 << endl + << " is already defined" << endl + << " ConstraintSet: cannot define constraint" << endl; return false; } else @@ -282,7 +293,33 @@ bool ConstraintSet::define_constraint(AtomSet &atoms, int argc, char **argv) return false; } - const double angle = atof(argv[7]); + // define angle value + double angle = 0.0; + if ( !strcmp(argv[7],"*") ) + { + // use current angle + D3vector r12(a1->position()-a2->position()); + D3vector r32(a3->position()-a2->position()); + if ( norm2(r12) == 0.0 || norm2(r32) == 0.0 ) + { + if ( onpe0 ) + { + cout << " ConstraintSet: cannot define angle constraint." << endl; + cout << " ConstraintSet: atoms are too close" << endl; + return false; + } + } + const double sp = normalized(r12) * normalized(r32); + const double c = max(-1.0,min(1.0,sp)); + angle = (180.0/M_PI)*acos(c); + + if ( onpe0 ) + cout << "ConstraintSet::define_constraint: using current angle " + << angle << endl; + } + else + angle = atof(argv[7]); + double velocity = 0.0; if ( argc == 9 ) { @@ -319,9 +356,10 @@ bool ConstraintSet::define_constraint(AtomSet &atoms, int argc, char **argv) if ( found ) { if ( onpe0 ) - cout << " ConstraintSet:set_constraint: an angle constraint " - << name1 << " " << name2 << " " << name3 - << " was found" << endl + cout << " ConstraintSet: an angle constraint named " << name << endl + << " or involving atoms " + << name1 << " " << name2 << " " << name3 << endl + << " is already defined" << endl << " ConstraintSet: cannot define constraint" << endl; return false; } @@ -382,7 +420,48 @@ bool ConstraintSet::define_constraint(AtomSet &atoms, int argc, char **argv) return false; } - double angle = atof(argv[8]); + // define angle value + double angle = 0.0; + if ( !strcmp(argv[8],"*") ) + { + // use current angle + D3vector r12(a1->position()-a2->position()); + D3vector r32(a3->position()-a2->position()); + D3vector r43(a4->position()-a3->position()); + if ( norm2(r12) == 0.0 || norm2(r32) == 0.0 || norm2(r43) == 0.0 ) + { + if ( onpe0 ) + { + cout << " ConstraintSet: cannot define torsion constraint." << endl; + cout << " ConstraintSet: atoms are too close" << endl; + return false; + } + } + + D3vector e12(normalized(r12)); + D3vector e32(normalized(r32)); + D3vector e23(-e32); + D3vector e43(normalized(r43)); + + const double sin123 = length(e12^e32); + const double sin234 = length(e23^e43); + + if ( sin123 != 0.0 && sin234 != 0.0 ) + { + D3vector e123 = normalized(e12^e32); + D3vector e234 = normalized(e23^e43); + double cc = max(min(e123*e234,1.0),-1.0); + double ss = max(min((e123^e234)*e32,1.0),-1.0); + angle = (180.0/M_PI) * atan2(ss,cc); + } + + if ( onpe0 ) + cout << "ConstraintSet::define_constraint: using current angle " + << angle << endl; + } + else + angle = atof(argv[8]); + if ( angle > 180.0 ) while ( angle > 180.0 ) angle -= 360.0; else if ( angle < -180.0 ) @@ -403,7 +482,7 @@ bool ConstraintSet::define_constraint(AtomSet &atoms, int argc, char **argv) assert(pc != 0); // check if an equivalent constraint (name1,name2,name3,name4) or // (name4,name3,name2,name1) is defined - if ( pc->type() == "angle" ) + if ( pc->type() == "torsion" ) found = ( pc->names(0) == name1 && pc->names(1) == name2 && pc->names(2) == name3 && @@ -418,8 +497,9 @@ bool ConstraintSet::define_constraint(AtomSet &atoms, int argc, char **argv) if ( found ) { if ( onpe0 ) - cout << " ConstraintSet: a torsion constraint " - << name1 << " " << name2 << " " << name3 << " " << name4 + cout << " ConstraintSet: a torsion constraint named " << name << endl + << " or involving atoms " + << name1 << " " << name2 << " " << name3 << " " << name4 << endl << " is already defined" << endl << " ConstraintSet: cannot define constraint" << endl; return false; @@ -560,10 +640,8 @@ void ConstraintSet::enforce(AtomSet& atoms) rp=r0; atoms.get_velocities(v0); enforce_r(r0,rp); - atoms.sync_positions(rp); atoms.set_positions(rp); enforce_v(r0,v0); - atoms.sync_velocities(v0); atoms.set_velocities(v0); } //////////////////////////////////////////////////////////////////////////////// diff --git a/src/ConstraintSet.h b/src/ConstraintSet.h index 8469921..33f3bd1 100644 --- a/src/ConstraintSet.h +++ b/src/ConstraintSet.h @@ -15,7 +15,6 @@ // ConstraintSet.h // //////////////////////////////////////////////////////////////////////////////// -// $Id: ConstraintSet.h,v 1.10 2010-02-20 23:13:02 fgygi Exp $ #ifndef CONSTRAINTSET_H #define CONSTRAINTSET_H diff --git a/src/Context.C b/src/Context.C index ad112a4..3a0ead9 100644 --- a/src/Context.C +++ b/src/Context.C @@ -15,369 +15,31 @@ // Context.C // //////////////////////////////////////////////////////////////////////////////// -// $Id: Context.C,v 1.18 2009-11-30 02:33:49 fgygi Exp $ #include "Context.h" #include #include #include #include +#include #include - -#ifdef SCALAPACK #include "blacs.h" -#endif - -using namespace std; - -#ifndef SCALAPACK -void Cblacs_pinfo(int *mypnum, int *nprocs) -{ - *mypnum = 0; - *nprocs = 1; - return; -} - -void Cblacs_get(int icontxt, int what, int *val) -{ - *val=0; - return; -} - -void Cblacs_gridinit(int *icontxt, char order[], int nprow, int npcol) -{ - *icontxt=0; - return; -} - -void Cblacs_gridmap(int *icontxt, int *pmap, int ldpmap, int nprow, int npcol) -{ - pmap[0]=0; - return; -} - -void Cblacs_barrier(int icontxt, char* scope) -{ - return; -} - -void Cblacs_abort(int icontxt, int errornum) -{ - return; -} - -void Cblacs_gridexit(int icontxt) -{ - return; -} - -void Cblacs_gridinfo(int icontxt, int *nprow, int *npcol, - int *myprow, int *mypcol) -{ - *nprow=1; *npcol=1; *myprow=0; *mypcol=0; return; -} - -int Cblacs_pnum(int icontxt, int prow, int pcol) -{ return 0;} - -void Cdgesd2d(int icontxt,int m,int n,double *A,int lda,int rdest,int cdest) -{ return; } - -void Cdgerv2d(int icontxt,int m,int n,double *A,int lda,int rdest,int cdest) -{ return; } - -void Cdgsum2d(int icontxt, char* scope, char* top, - int m, int n, double *a, int lda, int rdest, int cdest) -{ return; } - -void Cdgamx2d(int icontxt, char scope[], char top[],int m,int n,double *A, - int lda, int *ra, int *ca, int rcflag, int rdest, int cdest) -{ if ( rcflag > 0 ) { *ca = 0; *ra = 0; } return; } - -void Cdgamn2d(int icontxt, char scope[], char top[],int m,int n,double *A, - int lda, int *ra, int *ca, int rcflag, int rdest, int cdest) -{ if ( rcflag > 0 ) { *ca = 0; *ra = 0; } return; } - -void Cdgebs2d(int icontxt, char scope[], char top[],int m,int n,double *A, - int lda) -{ return; } - -void Cdgebr2d(int icontxt, char scope[], char top[],int m,int n,double *A, - int lda, int rsrc, int csrc) -{ return; } - -void Cigesd2d(int icontxt,int m,int n,int *A,int lda,int rdest,int cdest) -{ return; } - -void Cigerv2d(int icontxt,int m,int n,int *A,int lda,int rdest,int cdest) -{ return; } - -void Cigsum2d(int icontxt, char* scope, char* top, - int m, int n, int *a, int lda, int rdest, int cdest) -{ return; } - -void Cigamx2d(int icontxt, char scope[], char top[],int m,int n,int *A,int lda, - int *ra, int *ca, int rcflag, int rdest, int cdest) -{ if ( rcflag > 0 ) { *ca = 0; *ra = 0; } return; } - -void Cigamn2d(int icontxt, char scope[], char top[],int m,int n,int *A,int lda, - int *ra, int *ca, int rcflag, int rdest, int cdest) -{ if ( rcflag > 0 ) { *ca = 0; *ra = 0; } return; } - -void Cigebs2d(int icontxt, char scope[], char top[],int m,int n,int *A, - int lda) -{ return; } - -void Cigebr2d(int icontxt, char scope[], char top[],int m,int n,int *A, - int lda, int rsrc, int csrc) -{ return; } - -#endif - -#if USE_MPI #include -#else -typedef int MPI_Comm; -const int MPI_COMM_NULL = 0; -#endif - -struct ContextRep -{ - private: - - int ictxt_; - int myrow_; - int mycol_; - int nprow_; - int npcol_; - int size_; - int myproc_; - int mype_; - bool onpe0_; - bool active_; - - vector pmap_; - MPI_Comm comm_; - - // keep assignment and copy constructors private - ContextRep& operator=(const Context& c); - ContextRep(const Context& c); - - public: - - int ictxt() const { return ictxt_; } - int myrow() const { return myrow_; } - int mycol() const { return mycol_; } - int nprow() const { return nprow_; } - int npcol() const { return npcol_; } - - // number of processes in the context - // returns -1 if current process is not part of this context - int size() const { return size_; } - // position of current process in row-major order - // returns -1 if current process is not part of this context - int myproc() const { return myproc_; } - int mype() const { return mype_; } - int pmap(int irow, int icol) const { return pmap_[irow+nprow_*icol]; } - - bool onpe0(void) const { return onpe0_; } - bool active(void) const { return active_; } - void abort(int ierr) const { Cblacs_abort(ictxt_,ierr); } - void barrier(void) const { Cblacs_barrier(ictxt_,"A"); } - void barrier(char scope) const { Cblacs_barrier(ictxt_,&scope); } - - void dsend(int m, int n, double* a, int lda, int rdest, int cdest) const - { - Cdgesd2d(ictxt_,m,n,a,lda,rdest,cdest); - } - - void drecv(int m, int n, double* a, int lda, int rsrc, int csrc) const - { - Cdgerv2d(ictxt_,m,n,a,lda,rsrc,csrc); - } - - void dsum(char scope, char topology, int m, int n, double* a, int lda, - int rdest, int cdest) const - { - Cdgsum2d(ictxt_,&scope,&topology,m,n,a,lda,rdest,cdest); - } - - void dmax(char scope, char topology, int m, int n, double* a, int lda, - int rdest, int cdest) const - { - Cdgamx2d(ictxt_,&scope,&topology,m,n,a,lda,(int*)0,(int*)0,-1,rdest,cdest); - } - - void dmax(char scope, char topology, int m, int n, double* a, int lda, - int* ra, int* ca, int rcflag, int rdest, int cdest) const - { - Cdgamx2d(ictxt_,&scope,&topology,m,n,a,lda,ra,ca,rcflag,rdest,cdest); - } - - void dmin(char scope, char topology, int m, int n, double* a, int lda, - int* ra, int* ca, int rcflag, int rdest, int cdest) const - { - Cdgamn2d(ictxt_,&scope,&topology,m,n,a,lda,ra,ca,rcflag,rdest,cdest); - } - - void dmin(char scope, char topology, int m, int n, double* a, int lda, - int rdest, int cdest) const - { - Cdgamn2d(ictxt_,&scope,&topology,m,n,a,lda,(int*)0,(int*)0,-1,rdest,cdest); - } - - void dbcast_send(char scope, char topology, - int m, int n, double* a,int lda) const - { - Cdgebs2d(ictxt_,&scope,&topology,m,n,a,lda); - } - - void dbcast_recv(char scope, char topology, int m, int n, double* a, int lda, - int rsrc, int csrc) const - { - Cdgebr2d(ictxt_,&scope,&topology,m,n,a,lda,rsrc,csrc); - } - - void isend(int m, int n, int* a, int lda, int rdest, int cdest) const - { - Cigesd2d(ictxt_,m,n,a,lda,rdest,cdest); - } - - void irecv(int m, int n, int* a, int lda, int rsrc, int csrc) const - { - Cigerv2d(ictxt_,m,n,a,lda,rsrc,csrc); - } - - void isum(char scope, char topology, int m, int n, int* a, int lda, - int rdest, int cdest) const - { - Cigsum2d(ictxt_,&scope,&topology,m,n,a,lda,rdest,cdest); - } - - void imax(char scope, char topology, int m, int n, int* a, int lda, - int* ra, int* ca, int rcflag, int rdest, int cdest) const - { - Cigamx2d(ictxt_,&scope,&topology,m,n,a,lda,ra,ca,rcflag,rdest,cdest); - } - - void imax(char scope, char topology, int m, int n, int* a, int lda, - int rdest, int cdest) const - { - Cigamx2d(ictxt_,&scope,&topology,m,n,a,lda,(int*)0,(int*)0,-1,rdest,cdest); - } - - void imin(char scope, char topology, int m, int n, int* a, int lda, - int* ra, int* ca, int rcflag, int rdest, int cdest) const - { - Cigamn2d(ictxt_,&scope,&topology,m,n,a,lda,ra,ca,rcflag,rdest,cdest); - } - - void imin(char scope, char topology, int m, int n, int* a, int lda, - int rdest, int cdest) const - { - Cigamn2d(ictxt_,&scope,&topology,m,n,a,lda,(int*)0,(int*)0,-1,rdest,cdest); - } - - void ibcast_send(char scope, char topology, - int m, int n, int* a,int lda) const - { - Cigebs2d(ictxt_,&scope,&topology,m,n,a,lda); - } - - void ibcast_recv(char scope, char topology, int m, int n, int* a, int lda, - int rsrc, int csrc) const - { - Cigebr2d(ictxt_,&scope,&topology,m,n,a,lda,rsrc,csrc); - } - - void string_send(string& s, int rdest, int cdest) const - { -#if USE_MPI - int len = s.size(); - isend(1,1,&len,1,rdest,cdest); - int ilen = len/(sizeof(int)/sizeof(char)); - if ( len%(sizeof(int)/sizeof(char)) != 0 ) ilen++; - int* ibuf = new int[ilen]; - s.copy((char*)ibuf,string::npos); - isend(ilen,1,ibuf,ilen,rdest,cdest); - delete [] ibuf; -#endif - } - - void string_recv(string& s, int rsrc, int csrc) const - { -#if USE_MPI - int len = -1; - irecv(1,1,&len,1,rsrc,csrc); - int ilen = len/(sizeof(int)/sizeof(char)); - if ( len%(sizeof(int)/sizeof(char)) != 0 ) ilen++; - int* ibuf = new int[ilen]; - irecv(ilen,1,ibuf,ilen,rsrc,csrc); - s.resize(len); - s.assign((char*)ibuf,len); - delete [] ibuf; -#endif - } - - void string_bcast(string& s, int isrc) const - { -#if USE_MPI - int len; - if ( mype() == isrc ) - { - len = s.length(); - } - MPI_Bcast(&len,1,MPI_INT,isrc,comm()); - char* buf = new char[len+1]; - // s is initialized only on task isrc - if ( mype() == isrc ) - { - s.copy(buf,string::npos); - buf[len]=0; - assert(buf[len]=='\0'); - } - MPI_Bcast(buf,len+1,MPI_CHAR,isrc,comm()); - s = buf; - delete [] buf; -#endif - } - - bool operator==(const ContextRep& c) const - { return (ictxt_==c.ictxt());} - - // MPI communicator for this context. Returns MPI_COMM_NULL if - // this process is not part of the context - MPI_Comm comm(void) const { return comm_; } - - // Constructors - - // default global context: construct a single-row global ContextRep - explicit ContextRep(); - - // global ContextRep of size nprow * npcol with column major order - explicit ContextRep(int nprow, int npcol); - - // construct a ContextRep of size nprow*npcol using the processes - // in context c starting at process (irstart,icstart) - explicit ContextRep(const ContextRep &c, int nprow, int npcol, - int irstart, int icstart); - - ~ContextRep(); - - void print(ostream& os) const; -}; +using namespace std; //////////////////////////////////////////////////////////////////////////////// -ContextRep::ContextRep() : ictxt_(-1), myrow_(-1), mycol_(-1) +ContextRep::ContextRep(MPI_Comm comm) : ictxt_(-1), myrow_(-1), mycol_(-1) { - // construct a single row global context + // construct a single-row Context int nprocs; char order='R'; - Cblacs_pinfo( &mype_, &nprocs ); + MPI_Comm_dup(comm,&comm_); + MPI_Comm_size(comm_,&nprocs); + MPI_Comm_rank(comm_,&mype_); + ictxt_ = Csys2blacs_handle(comm_); nprow_ = 1; npcol_ = nprocs; - Cblacs_get(0, 0, &ictxt_ ); Cblacs_gridinit( &ictxt_, &order, nprow_, npcol_ ); // get values of nprow_, npcol_, myrow_ and mycol_ in the new context @@ -393,32 +55,30 @@ ContextRep::ContextRep() : ictxt_(-1), myrow_(-1), mycol_(-1) for ( int i = 0; i < size_; i++ ) pmap_[i] = i; -#if USE_MPI MPI_Group group_world, subgroup; - MPI_Comm_group(MPI_COMM_WORLD,&group_world); + MPI_Comm_group(comm,&group_world); MPI_Group_incl(group_world,size_,&pmap_[0],&subgroup); - MPI_Comm_create(MPI_COMM_WORLD,subgroup,&comm_); + MPI_Comm_create(comm,subgroup,&comm_); MPI_Group_free(&group_world); MPI_Group_free(&subgroup); -#else - comm_ = 0; -#endif } //////////////////////////////////////////////////////////////////////////////// -ContextRep::ContextRep(int nprow, int npcol) : +ContextRep::ContextRep(MPI_Comm comm, int nprow, int npcol) : ictxt_(-1), myrow_(-1), mycol_(-1), nprow_(nprow), npcol_(npcol) { int nprocs; char order = 'C'; - Cblacs_pinfo( &mype_, &nprocs ); + MPI_Comm_dup(comm,&comm_); + MPI_Comm_size(comm_,&nprocs); + MPI_Comm_rank(comm_,&mype_); + ictxt_ = Csys2blacs_handle(comm_); if ( nprocs < nprow * npcol ) { cout << " nprocs=" << nprocs << endl; cout << " Context nprow*npcol > nprocs" << endl; Cblacs_abort(ictxt_, 1); } - Cblacs_get(0, 0, &ictxt_ ); Cblacs_gridinit( &ictxt_, &order, nprow, npcol ); // get values of nprow_, npcol_, myrow_ and mycol_ in the new context @@ -440,84 +100,157 @@ ContextRep::ContextRep(int nprow, int npcol) : i++; } -#if USE_MPI MPI_Group group_world, subgroup; - MPI_Comm_group(MPI_COMM_WORLD,&group_world); + MPI_Comm_group(comm,&group_world); MPI_Group_incl(group_world,size_,&pmap_[0],&subgroup); - MPI_Comm_create(MPI_COMM_WORLD,subgroup,&comm_); + MPI_Comm_create(comm,subgroup,&comm_); MPI_Group_free(&group_world); MPI_Group_free(&subgroup); -#else - comm_ = 0; -#endif } //////////////////////////////////////////////////////////////////////////////// -ContextRep::ContextRep(const ContextRep& c, int nprow, int npcol, - int irstart, int icstart) : -ictxt_(-1), myrow_(-1), mycol_(-1), nprow_(nprow), npcol_(npcol) +ContextRep::~ContextRep() { - assert(c.active()); - vector gmap_; - - int nprocs; - Cblacs_pinfo( &mype_, &nprocs ); - // construct a (nprow*npcol) context using the processes in c - // located at (irstart,icstart) - if ( irstart < 0 || icstart < 0 || - irstart+nprow > c.nprow() || icstart+npcol > c.npcol() ) + if ( myrow_ != -1 ) { - cout << " Context::Context: cut rectangle: invalid parameters" << endl; - cout << irstart << " " << icstart << " " << nprow << " " << npcol << endl; - Cblacs_abort(ictxt_, 1); + // cout << " ContextRep destructor called on ictxt = " << ictxt_ << endl; + Cblacs_gridexit( ictxt_ ); + MPI_Comm_free(&comm_); } - pmap_.resize(nprow*npcol); - gmap_.resize(nprow*npcol); - // build pmap - int i = 0; - for ( int ic = icstart; ic < icstart+npcol; ic++ ) - for ( int ir = irstart; ir < irstart+nprow; ir++ ) - { - pmap_[i] = c.pmap(ir,ic); - gmap_[i] = ic + c.npcol()*ir; - i++; - } +} +//////////////////////////////////////////////////////////////////////////////// +void ContextRep::dsend(int m, int n, double* a, int lda, int rdest, int cdest) + const { Cdgesd2d(ictxt_,m,n,a,lda,rdest,cdest); } - Cblacs_get(c.ictxt(), 10, &ictxt_ ); - Cblacs_gridmap(&ictxt_,&gmap_[0],nprow,nprow,npcol); +//////////////////////////////////////////////////////////////////////////////// +void ContextRep::drecv(int m, int n, double* a, int lda, int rsrc, int csrc) + const { Cdgerv2d(ictxt_,m,n,a,lda,rsrc,csrc); } - // get values of nprow_, npcol_, myrow_ and mycol_ in the new context - if ( ictxt_ >= 0 ) - Cblacs_gridinfo(ictxt_, &nprow_, &npcol_, &myrow_, &mycol_); +//////////////////////////////////////////////////////////////////////////////// +void ContextRep::dsum(char scope, char topology, int m, int n, double* a, + int lda, int rdest, int cdest) const +{ Cdgsum2d(ictxt_,&scope,&topology,m,n,a,lda,rdest,cdest); } - size_ = nprow_ * npcol_; - myproc_ = myrow_ < 0 ? -1 : mycol_ + npcol_ * myrow_; - onpe0_ = ( mype_ == 0 ); - active_ = ( ictxt_ >= 0 ); +//////////////////////////////////////////////////////////////////////////////// +void ContextRep::dmax(char scope, char topology, int m, int n, double* a, + int lda, int rdest, int cdest) const +{ Cdgamx2d(ictxt_,&scope,&topology,m,n,a,lda,(int*)0,(int*)0,-1,rdest,cdest); } -#if USE_MPI - MPI_Group c_group, subgroup; - MPI_Comm_group(c.comm(),&c_group); - MPI_Group_incl(c_group,size_,&pmap_[0],&subgroup); - MPI_Comm_create(c.comm(),subgroup,&comm_); - MPI_Group_free(&c_group); - MPI_Group_free(&subgroup); -#else - comm_ = 0; -#endif +//////////////////////////////////////////////////////////////////////////////// +void ContextRep::dmax(char scope, char topology, int m, int n, double* a, + int lda, int* ra, int* ca, int rcflag, int rdest, int cdest) const +{ Cdgamx2d(ictxt_,&scope,&topology,m,n,a,lda,ra,ca,rcflag,rdest,cdest); } + +//////////////////////////////////////////////////////////////////////////////// +void ContextRep::dmin(char scope, char topology, int m, int n, double* a, + int lda, int* ra, int* ca, int rcflag, int rdest, int cdest) const +{ Cdgamn2d(ictxt_,&scope,&topology,m,n,a,lda,ra,ca,rcflag,rdest,cdest); } + +//////////////////////////////////////////////////////////////////////////////// +void ContextRep::dmin(char scope, char topology, int m, int n, double* a, + int lda, int rdest, int cdest) const +{ Cdgamn2d(ictxt_,&scope,&topology,m,n,a,lda,(int*)0,(int*)0,-1,rdest,cdest); } + +//////////////////////////////////////////////////////////////////////////////// +void ContextRep::dbcast_send(char scope, char topology, int m, int n, + double* a,int lda) const +{ Cdgebs2d(ictxt_,&scope,&topology,m,n,a,lda); } + +//////////////////////////////////////////////////////////////////////////////// +void ContextRep::dbcast_recv(char scope, char topology, int m, int n, + double* a, int lda, int rsrc, int csrc) const +{ Cdgebr2d(ictxt_,&scope,&topology,m,n,a,lda,rsrc,csrc); } + +//////////////////////////////////////////////////////////////////////////////// +void ContextRep::isend(int m, int n, int* a, int lda, int rdest, int cdest) + const { Cigesd2d(ictxt_,m,n,a,lda,rdest,cdest); } + +//////////////////////////////////////////////////////////////////////////////// +void ContextRep::irecv(int m, int n, int* a, int lda, int rsrc, int csrc) const +{ Cigerv2d(ictxt_,m,n,a,lda,rsrc,csrc); } + +//////////////////////////////////////////////////////////////////////////////// +void ContextRep::isum(char scope, char topology, int m, int n, int* a, int lda, + int rdest, int cdest) const +{ Cigsum2d(ictxt_,&scope,&topology,m,n,a,lda,rdest,cdest); } + +//////////////////////////////////////////////////////////////////////////////// +void ContextRep::imax(char scope, char topology, int m, int n, int* a, int lda, + int* ra, int* ca, int rcflag, int rdest, int cdest) const +{ Cigamx2d(ictxt_,&scope,&topology,m,n,a,lda,ra,ca,rcflag,rdest,cdest); } + +//////////////////////////////////////////////////////////////////////////////// +void ContextRep::imax(char scope, char topology, int m, int n, int* a, int lda, + int rdest, int cdest) const +{ Cigamx2d(ictxt_,&scope,&topology,m,n,a,lda,(int*)0,(int*)0,-1,rdest,cdest); } + +//////////////////////////////////////////////////////////////////////////////// +void ContextRep::imin(char scope, char topology, int m, int n, int* a, int lda, + int* ra, int* ca, int rcflag, int rdest, int cdest) const +{ Cigamn2d(ictxt_,&scope,&topology,m,n,a,lda,ra,ca,rcflag,rdest,cdest); } + +//////////////////////////////////////////////////////////////////////////////// +void ContextRep::imin(char scope, char topology, int m, int n, int* a, int lda, + int rdest, int cdest) const +{ Cigamn2d(ictxt_,&scope,&topology,m,n,a,lda,(int*)0,(int*)0,-1,rdest,cdest); } + +//////////////////////////////////////////////////////////////////////////////// +void ContextRep::ibcast_send(char scope, char topology, + int m, int n, int* a,int lda) const +{ Cigebs2d(ictxt_,&scope,&topology,m,n,a,lda); } + +//////////////////////////////////////////////////////////////////////////////// +void ContextRep::ibcast_recv(char scope, char topology, int m, int n, int* a, + int lda, int rsrc, int csrc) const +{ Cigebr2d(ictxt_,&scope,&topology,m,n,a,lda,rsrc,csrc); } + +//////////////////////////////////////////////////////////////////////////////// +void ContextRep::string_send(std::string& s, int rdest, int cdest) const +{ + int len = s.size(); + isend(1,1,&len,1,rdest,cdest); + int ilen = len/(sizeof(int)/sizeof(char)); + if ( len%(sizeof(int)/sizeof(char)) != 0 ) ilen++; + int* ibuf = new int[ilen]; + s.copy((char*)ibuf,std::string::npos); + isend(ilen,1,ibuf,ilen,rdest,cdest); + delete [] ibuf; } //////////////////////////////////////////////////////////////////////////////// -ContextRep::~ContextRep() +void ContextRep::string_recv(std::string& s, int rsrc, int csrc) const { - if ( myrow_ != -1 ) + int len = -1; + irecv(1,1,&len,1,rsrc,csrc); + int ilen = len/(sizeof(int)/sizeof(char)); + if ( len%(sizeof(int)/sizeof(char)) != 0 ) ilen++; + int* ibuf = new int[ilen]; + irecv(ilen,1,ibuf,ilen,rsrc,csrc); + s.resize(len); + s.assign((char*)ibuf,len); + delete [] ibuf; +} + +//////////////////////////////////////////////////////////////////////////////// +void ContextRep::string_bcast(std::string& s, int isrc) const +{ + int len; + if ( mype() == isrc ) { - // cout << " ContextRep destructor called on ictxt = " << ictxt_ << endl; - Cblacs_gridexit( ictxt_ ); -#if USE_MPI - MPI_Comm_free(&comm_); -#endif + len = s.length(); + } + MPI_Bcast(&len,1,MPI_INT,isrc,comm()); + char* buf = new char[len+1]; + // s is initialized only on task isrc + if ( mype() == isrc ) + { + s.copy(buf,std::string::npos); + buf[len]=0; + assert(buf[len]=='\0'); } + MPI_Bcast(buf,len+1,MPI_CHAR,isrc,comm()); + s = buf; + delete [] buf; } //////////////////////////////////////////////////////////////////////////////// @@ -550,275 +283,247 @@ void ContextRep::print(ostream& os) const } //////////////////////////////////////////////////////////////////////////////// -class ContextImpl -{ - private: - - ContextRep* rep; - int* pcount; - - public: - - ContextRep* operator->() { return rep; } - ContextRep* get_rep(void) { return rep; } - ContextImpl(ContextRep* pp) : rep(pp), pcount(new int(1)) {} - ContextImpl(const ContextImpl& r) : rep(r.rep), pcount(r.pcount) - { (*pcount)++; } - - ContextImpl& operator=(const ContextImpl& r) - { - if ( rep == r.rep ) return *this; - if ( --(*pcount) == 0 ) - { - delete rep; - delete pcount; - } - rep = r.rep; - pcount = r.pcount; - (*pcount)++; - return *this; - } - - ~ContextImpl(void) - { - if ( --(*pcount) == 0 ) - { - delete rep; - delete pcount; - } - } -}; - -//////////////////////////////////////////////////////////////////////////////// -Context::Context(void) : pimpl_(new ContextImpl(new ContextRep())) {} - -//////////////////////////////////////////////////////////////////////////////// -Context::Context(int nprow, int npcol): -pimpl_(new ContextImpl(new ContextRep(nprow,npcol))) {} - -//////////////////////////////////////////////////////////////////////////////// -Context::Context(const Context &c, int nprow, int npcol, - int irstart, int icstart) : -pimpl_(new ContextImpl(new ContextRep(*(c.pimpl_)->get_rep(),nprow,npcol, -irstart,icstart))) {} - -//////////////////////////////////////////////////////////////////////////////// -Context::~Context() { delete pimpl_; } - -//////////////////////////////////////////////////////////////////////////////// -Context::Context(const Context& c) : pimpl_(new ContextImpl(*(c.pimpl_))) {} - -//////////////////////////////////////////////////////////////////////////////// -Context& Context::operator=(const Context& rhs) -{ - if ( &rhs == this ) return *this; - *pimpl_ = *(rhs.pimpl_); - return *this; -} - -//////////////////////////////////////////////////////////////////////////////// -int Context::ictxt() const { return (*pimpl_)->ictxt(); } +int Context::ictxt() const { return rep->ictxt(); } //////////////////////////////////////////////////////////////////////////////// -int Context::myrow() const { return (*pimpl_)->myrow(); } +int Context::myrow() const { return rep->myrow(); } //////////////////////////////////////////////////////////////////////////////// -int Context::mycol() const { return (*pimpl_)->mycol(); } +int Context::mycol() const { return rep->mycol(); } //////////////////////////////////////////////////////////////////////////////// -int Context::nprow() const { return (*pimpl_)->nprow(); } +int Context::nprow() const { return rep->nprow(); } //////////////////////////////////////////////////////////////////////////////// -int Context::npcol() const { return (*pimpl_)->npcol(); } +int Context::npcol() const { return rep->npcol(); } //////////////////////////////////////////////////////////////////////////////// -int Context::size() const { return (*pimpl_)->size(); } +int Context::size() const { return rep->size(); } //////////////////////////////////////////////////////////////////////////////// -int Context::myproc() const { return (*pimpl_)->myproc(); } +int Context::myproc() const { return rep->myproc(); } //////////////////////////////////////////////////////////////////////////////// -int Context::mype() const { return (*pimpl_)->mype(); } +int Context::mype() const { return rep->mype(); } //////////////////////////////////////////////////////////////////////////////// int Context::pmap(int irow, int icol) const -{ return (*pimpl_)->pmap(irow,icol); } +{ return rep->pmap(irow,icol); } //////////////////////////////////////////////////////////////////////////////// -bool Context::onpe0(void) const { return (*pimpl_)->onpe0(); } +bool Context::onpe0(void) const { return rep->onpe0(); } //////////////////////////////////////////////////////////////////////////////// -bool Context::active(void) const { return (*pimpl_)->active(); } +bool Context::active(void) const { return rep->active(); } //////////////////////////////////////////////////////////////////////////////// -void Context::abort(int ierr) const { (*pimpl_)->abort(ierr); } +void Context::abort(int ierr) const { rep->abort(ierr); } //////////////////////////////////////////////////////////////////////////////// -void Context::barrier(void) const { (*pimpl_)->barrier(); } +void Context::barrier(void) const { rep->barrier(); } //////////////////////////////////////////////////////////////////////////////// -void Context::barrier(char scope) const { (*pimpl_)->barrier(scope); } +void Context::barrier(char scope) const { rep->barrier(scope); } //////////////////////////////////////////////////////////////////////////////// void Context::dsend(int m, int n, double* a, int lda, int rdest, int cdest) const -{ (*pimpl_)->dsend(m,n,a,lda,rdest,cdest); } +{ rep->dsend(m,n,a,lda,rdest,cdest); } +//////////////////////////////////////////////////////////////////////////////// void Context::drecv(int m, int n, double* a, int lda, int rsrc, int csrc) const -{ (*pimpl_)->drecv(m,n,a,lda,rsrc,csrc); } +{ rep->drecv(m,n,a,lda,rsrc,csrc); } +//////////////////////////////////////////////////////////////////////////////// void Context::dsum(char scope, char topology, int m, int n, double* a, int lda, int rdest, int cdest) const -{ (*pimpl_)->dsum(scope,topology,m,n,a,lda,rdest,cdest); } +{ rep->dsum(scope,topology,m,n,a,lda,rdest,cdest); } +//////////////////////////////////////////////////////////////////////////////// void Context::dsum(char scope, int m, int n, double* a, int lda) const -{ (*pimpl_)->dsum(scope,' ',m,n,a,lda,-1,-1); } +{ rep->dsum(scope,' ',m,n,a,lda,-1,-1); } +//////////////////////////////////////////////////////////////////////////////// void Context::dsum(int m, int n, double* a, int lda) const -{ (*pimpl_)->dsum('A',' ',m,n,a,lda,-1,-1); } +{ rep->dsum('A',' ',m,n,a,lda,-1,-1); } +//////////////////////////////////////////////////////////////////////////////// void Context::dmax(char scope, char topology, int m, int n, double* a, int lda, int* ra, int* ca, int rcflag, int rdest, int cdest) const -{ (*pimpl_)->dmax(scope,topology,m,n,a,lda,ra,ca,rcflag,rdest,cdest); } +{ rep->dmax(scope,topology,m,n,a,lda,ra,ca,rcflag,rdest,cdest); } +//////////////////////////////////////////////////////////////////////////////// void Context::dmax(char scope, char topology, int m, int n, double* a, int lda, int rdest, int cdest) const -{ (*pimpl_)->dmax(scope,topology,m,n,a,lda,rdest,cdest); } +{ rep->dmax(scope,topology,m,n,a,lda,rdest,cdest); } +//////////////////////////////////////////////////////////////////////////////// void Context::dmax(char scope, int m, int n, double* a, int lda) const -{ (*pimpl_)->dmax(scope,' ',m,n,a,lda,-1,-1); } +{ rep->dmax(scope,' ',m,n,a,lda,-1,-1); } +//////////////////////////////////////////////////////////////////////////////// void Context::dmax(int m, int n, double* a, int lda) const -{ (*pimpl_)->dmax('A',' ',m,n,a,lda,-1,-1); } +{ rep->dmax('A',' ',m,n,a,lda,-1,-1); } +//////////////////////////////////////////////////////////////////////////////// void Context::dmin(char scope, char topology, int m, int n, double* a, int lda, int* ra, int* ca, int rcflag, int rdest, int cdest) const -{ (*pimpl_)->dmin(scope,topology,m,n,a,lda,ra,ca,rcflag,rdest,cdest); } +{ rep->dmin(scope,topology,m,n,a,lda,ra,ca,rcflag,rdest,cdest); } +//////////////////////////////////////////////////////////////////////////////// void Context::dmin(char scope, char topology, int m, int n, double* a, int lda, int rdest, int cdest) const -{ (*pimpl_)->dmin(scope,topology,m,n,a,lda,rdest,cdest); } +{ rep->dmin(scope,topology,m,n,a,lda,rdest,cdest); } +//////////////////////////////////////////////////////////////////////////////// void Context::dmin(char scope, int m, int n, double* a, int lda) const -{ (*pimpl_)->dmin(scope,' ',m,n,a,lda,-1,-1); } +{ rep->dmin(scope,' ',m,n,a,lda,-1,-1); } +//////////////////////////////////////////////////////////////////////////////// void Context::dmin(int m, int n, double* a, int lda) const -{ (*pimpl_)->dmin('A',' ',m,n,a,lda,-1,-1); } +{ rep->dmin('A',' ',m,n,a,lda,-1,-1); } +//////////////////////////////////////////////////////////////////////////////// void Context::dbcast_send(char scope, char topology, int m, int n, double* a, int lda) const -{ (*pimpl_)->dbcast_send(scope,topology,m,n,a,lda); } +{ rep->dbcast_send(scope,topology,m,n,a,lda); } +//////////////////////////////////////////////////////////////////////////////// void Context::dbcast_send(char scope, int m, int n, double* a, int lda) const -{ (*pimpl_)->dbcast_send(scope,' ',m,n,a,lda); } +{ rep->dbcast_send(scope,' ',m,n,a,lda); } +//////////////////////////////////////////////////////////////////////////////// void Context::dbcast_send(int m, int n, double* a, int lda) const -{ (*pimpl_)->dbcast_send('A',' ',m,n,a,lda); } +{ rep->dbcast_send('A',' ',m,n,a,lda); } +//////////////////////////////////////////////////////////////////////////////// void Context::dbcast_recv(char scope, char topology, int m, int n, double* a, int lda, int rsrc,int csrc) const -{ (*pimpl_)->dbcast_recv(scope,topology,m,n,a,lda,rsrc,csrc); } +{ rep->dbcast_recv(scope,topology,m,n,a,lda,rsrc,csrc); } +//////////////////////////////////////////////////////////////////////////////// void Context::dbcast_recv(char scope, int m, int n, double* a, int lda, int rsrc, int csrc) const -{ (*pimpl_)->dbcast_recv(scope,' ',m,n,a,lda,rsrc,csrc); } +{ rep->dbcast_recv(scope,' ',m,n,a,lda,rsrc,csrc); } +//////////////////////////////////////////////////////////////////////////////// void Context::dbcast_recv(int m, int n, double* a, int lda, int rsrc,int csrc) const -{ (*pimpl_)->dbcast_recv('A',' ',m,n,a,lda,rsrc,csrc); } +{ rep->dbcast_recv('A',' ',m,n,a,lda,rsrc,csrc); } //////////////////////////////////////////////////////////////////////////////// void Context::isend(int m, int n, int* a, int lda, int rdest, int cdest) const -{ (*pimpl_)->isend(m,n,a,lda,rdest,cdest); } +{ rep->isend(m,n,a,lda,rdest,cdest); } +//////////////////////////////////////////////////////////////////////////////// void Context::irecv(int m, int n, int* a, int lda, int rsrc, int csrc) const -{ (*pimpl_)->irecv(m,n,a,lda,rsrc,csrc); } +{ rep->irecv(m,n,a,lda,rsrc,csrc); } +//////////////////////////////////////////////////////////////////////////////// void Context::isum(char scope, char topology, int m, int n, int* a, int lda, int rdest, int cdest) const -{ (*pimpl_)->isum(scope,topology,m,n,a,lda,rdest,cdest); } +{ rep->isum(scope,topology,m,n,a,lda,rdest,cdest); } +//////////////////////////////////////////////////////////////////////////////// void Context::isum(char scope, int m, int n, int* a, int lda) const -{ (*pimpl_)->isum(scope,' ',m,n,a,lda,-1,-1); } +{ rep->isum(scope,' ',m,n,a,lda,-1,-1); } +//////////////////////////////////////////////////////////////////////////////// void Context::isum(int m, int n, int* a, int lda) const -{ (*pimpl_)->isum('A',' ',m,n,a,lda,-1,-1); } +{ rep->isum('A',' ',m,n,a,lda,-1,-1); } +//////////////////////////////////////////////////////////////////////////////// void Context::imax(char scope, char topology, int m, int n, int* a, int lda, int* ra, int* ca, int rcflag, int rdest, int cdest) const -{ (*pimpl_)->imax(scope,topology,m,n,a,lda,ra,ca,rcflag,rdest,cdest); } +{ rep->imax(scope,topology,m,n,a,lda,ra,ca,rcflag,rdest,cdest); } +//////////////////////////////////////////////////////////////////////////////// void Context::imax(char scope, char topology, int m, int n, int* a, int lda, int rdest, int cdest) const -{ (*pimpl_)->imax(scope,topology,m,n,a,lda,rdest,cdest); } +{ rep->imax(scope,topology,m,n,a,lda,rdest,cdest); } +//////////////////////////////////////////////////////////////////////////////// void Context::imax(char scope, int m, int n, int* a, int lda) const -{ (*pimpl_)->imax(scope,' ',m,n,a,lda,-1,-1); } +{ rep->imax(scope,' ',m,n,a,lda,-1,-1); } +//////////////////////////////////////////////////////////////////////////////// void Context::imax(int m, int n, int* a, int lda) const -{ (*pimpl_)->imax('A',' ',m,n,a,lda,-1,-1); } +{ rep->imax('A',' ',m,n,a,lda,-1,-1); } +//////////////////////////////////////////////////////////////////////////////// void Context::imin(char scope, char topology, int m, int n, int* a, int lda, int* ra, int* ca, int rcflag, int rdest, int cdest) const -{ (*pimpl_)->imin(scope,topology,m,n,a,lda,ra,ca,rcflag,rdest,cdest); } +{ rep->imin(scope,topology,m,n,a,lda,ra,ca,rcflag,rdest,cdest); } +//////////////////////////////////////////////////////////////////////////////// void Context::imin(char scope, char topology, int m, int n, int* a, int lda, int rdest, int cdest) const -{ (*pimpl_)->imin(scope,topology,m,n,a,lda,rdest,cdest); } +{ rep->imin(scope,topology,m,n,a,lda,rdest,cdest); } +//////////////////////////////////////////////////////////////////////////////// void Context::imin(char scope, int m, int n, int* a, int lda) const -{ (*pimpl_)->imin(scope,' ',m,n,a,lda,-1,-1); } +{ rep->imin(scope,' ',m,n,a,lda,-1,-1); } +//////////////////////////////////////////////////////////////////////////////// void Context::imin(int m, int n, int* a, int lda) const -{ (*pimpl_)->imin('A',' ',m,n,a,lda,-1,-1); } +{ rep->imin('A',' ',m,n,a,lda,-1,-1); } +//////////////////////////////////////////////////////////////////////////////// void Context::ibcast_send(char scope, char topology, int m, int n, int* a, int lda) const -{ (*pimpl_)->ibcast_send(scope,topology,m,n,a,lda); } +{ rep->ibcast_send(scope,topology,m,n,a,lda); } +//////////////////////////////////////////////////////////////////////////////// void Context::ibcast_send(char scope, int m, int n, int* a, int lda) const -{ (*pimpl_)->ibcast_send(scope,' ',m,n,a,lda); } +{ rep->ibcast_send(scope,' ',m,n,a,lda); } +//////////////////////////////////////////////////////////////////////////////// void Context::ibcast_send(int m, int n, int* a, int lda) const -{ (*pimpl_)->ibcast_send('A',' ',m,n,a,lda); } +{ rep->ibcast_send('A',' ',m,n,a,lda); } +//////////////////////////////////////////////////////////////////////////////// void Context::ibcast_recv(char scope, char topology, int m, int n, int* a, int lda, int rsrc,int csrc) const -{ (*pimpl_)->ibcast_recv(scope,topology,m,n,a,lda,rsrc,csrc); } +{ rep->ibcast_recv(scope,topology,m,n,a,lda,rsrc,csrc); } +//////////////////////////////////////////////////////////////////////////////// void Context::ibcast_recv(char scope, int m, int n, int* a, int lda, int rsrc, int csrc) const -{ (*pimpl_)->ibcast_recv(scope,' ',m,n,a,lda,rsrc,csrc); } +{ rep->ibcast_recv(scope,' ',m,n,a,lda,rsrc,csrc); } +//////////////////////////////////////////////////////////////////////////////// void Context::ibcast_recv(int m, int n, int* a, int lda, int rsrc,int csrc) const -{ (*pimpl_)->ibcast_recv('A',' ',m,n,a,lda,rsrc,csrc); } +{ rep->ibcast_recv('A',' ',m,n,a,lda,rsrc,csrc); } -void Context::string_send(string& s, int rdest, int cdest) const -{ (*pimpl_)->string_send(s,rdest,cdest); } +//////////////////////////////////////////////////////////////////////////////// +void Context::string_send(std::string& s, int rdest, int cdest) const +{ rep->string_send(s,rdest,cdest); } -void Context::string_recv(string& s, int rsrc, int csrc) const -{ (*pimpl_)->string_recv(s,rsrc,csrc); } +//////////////////////////////////////////////////////////////////////////////// +void Context::string_recv(std::string& s, int rsrc, int csrc) const +{ rep->string_recv(s,rsrc,csrc); } -void Context::string_bcast(string& s, int isrc) const -{ (*pimpl_)->string_bcast(s,isrc); } +//////////////////////////////////////////////////////////////////////////////// +void Context::string_bcast(std::string& s, int isrc) const +{ rep->string_bcast(s,isrc); } //////////////////////////////////////////////////////////////////////////////// bool Context::operator==(const Context& ctxt) const -{ return ( (*pimpl_)->ictxt() == ctxt.ictxt() ); } +{ return ( rep->ictxt() == ctxt.ictxt() ); } //////////////////////////////////////////////////////////////////////////////// -MPI_Comm Context::comm(void) const { return (*pimpl_)->comm(); } +MPI_Comm Context::comm(void) const { return rep->comm(); } //////////////////////////////////////////////////////////////////////////////// -void Context::print(ostream& os) const { (*pimpl_)->print(os);} +void Context::print(ostream& os) const { rep->print(os);} //////////////////////////////////////////////////////////////////////////////// ostream& operator<<(ostream& os, const Context& c) @@ -826,4 +531,3 @@ ostream& operator<<(ostream& os, const Context& c) c.print(os); return os; } - diff --git a/src/Context.h b/src/Context.h index 24f82a1..0644151 100644 --- a/src/Context.h +++ b/src/Context.h @@ -21,18 +21,116 @@ #include #include - -#if USE_MPI +#include +#include +#include "blacs.h" #include -#else -typedef int MPI_Comm; -#endif + +struct ContextRep +{ + private: + + int ictxt_; + int myrow_; + int mycol_; + int nprow_; + int npcol_; + int size_; + int myproc_; + int mype_; + bool onpe0_; + bool active_; + + std::vector pmap_; + MPI_Comm comm_; + + // keep assignment and copy constructors private + ContextRep& operator=(const ContextRep& c); + ContextRep(const ContextRep& c); + + public: + + int ictxt() const { return ictxt_; } + int myrow() const { return myrow_; } + int mycol() const { return mycol_; } + int nprow() const { return nprow_; } + int npcol() const { return npcol_; } + + // number of processes in the context + // returns -1 if current process is not part of this context + int size() const { return size_; } + // position of current process in row-major order + // returns -1 if current process is not part of this context + int myproc() const { return myproc_; } + int mype() const { return mype_; } + int pmap(int irow, int icol) const { return pmap_[irow+nprow_*icol]; } + + bool onpe0(void) const { return onpe0_; } + bool active(void) const { return active_; } + void abort(int ierr) const { Cblacs_abort(ictxt_,ierr); } + void barrier(void) const { Cblacs_barrier(ictxt_,"A"); } + void barrier(char scope) const { Cblacs_barrier(ictxt_,&scope); } + + void dsend(int m, int n, double* a, int lda, int rdest, int cdest) const; + void drecv(int m, int n, double* a, int lda, int rsrc, int csrc) const; + void dsum(char scope, char topology, int m, int n, double* a, int lda, + int rdest, int cdest) const; + void dmax(char scope, char topology, int m, int n, double* a, int lda, + int rdest, int cdest) const; + void dmax(char scope, char topology, int m, int n, double* a, int lda, + int* ra, int* ca, int rcflag, int rdest, int cdest) const; + void dmin(char scope, char topology, int m, int n, double* a, int lda, + int* ra, int* ca, int rcflag, int rdest, int cdest) const; + void dmin(char scope, char topology, int m, int n, double* a, int lda, + int rdest, int cdest) const; + void dbcast_send(char scope, char topology, + int m, int n, double* a,int lda) const; + void dbcast_recv(char scope, char topology, int m, int n, double* a, int lda, + int rsrc, int csrc) const; + void isend(int m, int n, int* a, int lda, int rdest, int cdest) const; + void irecv(int m, int n, int* a, int lda, int rsrc, int csrc) const; + void isum(char scope, char topology, int m, int n, int* a, int lda, + int rdest, int cdest) const; + void imax(char scope, char topology, int m, int n, int* a, int lda, + int* ra, int* ca, int rcflag, int rdest, int cdest) const; + void imax(char scope, char topology, int m, int n, int* a, int lda, + int rdest, int cdest) const; + void imin(char scope, char topology, int m, int n, int* a, int lda, + int* ra, int* ca, int rcflag, int rdest, int cdest) const; + void imin(char scope, char topology, int m, int n, int* a, int lda, + int rdest, int cdest) const; + void ibcast_send(char scope, char topology, + int m, int n, int* a,int lda) const; + void ibcast_recv(char scope, char topology, int m, int n, int* a, int lda, + int rsrc, int csrc) const; + void string_send(std::string& s, int rdest, int cdest) const; + void string_recv(std::string& s, int rsrc, int csrc) const; + void string_bcast(std::string& s, int isrc) const; + + bool operator==(const ContextRep& c) const + { return (ictxt_==c.ictxt());} + + MPI_Comm comm(void) const { return comm_; } + + // Constructors + + // construct a single-row ContextRep + explicit ContextRep(MPI_Comm comm); + + // global ContextRep of size nprow * npcol with column major order + explicit ContextRep(MPI_Comm comm, int nprow, int npcol); + + ~ContextRep(); + + void print(std::ostream& os) const; +}; class Context { private: - struct ContextImpl* pimpl_; + ContextRep* rep; + int* pcount; public: @@ -54,6 +152,7 @@ class Context bool onpe0(void) const; bool active(void) const; + operator bool() const { return active(); } void abort(int ierr) const; void barrier(void) const; void barrier(char scope) const; @@ -139,26 +238,44 @@ class Context // this process is not part of the context MPI_Comm comm(void) const; - // Constructors + void print(std::ostream& os) const; - // default global context: construct a single-row global Context - explicit Context(); + // Constructors - // global Context of size nprow * npcol with column major order - explicit Context(int nprow, int npcol); + // single-row Context + explicit Context(MPI_Comm comm) : rep(new ContextRep(comm)), + pcount(new int(1)) {} - // construct a Context of size nprow*npcol from the processes - // in context ctxt starting at process (irstart,icstart) - explicit Context(const Context &ctxt, int nprow, int npcol, - int irstart, int icstart); + // nprow * npcol Context + explicit Context(MPI_Comm comm, int nprow, int npcol): + rep(new ContextRep(comm,nprow,npcol)), pcount(new int(1)) {} - ~Context(); + Context(const Context& c) : rep(c.rep), pcount(c.pcount) { (*pcount)++; } - Context(const Context& ctxt); - Context& operator=(const Context& rhs); + Context& operator=(const Context& c) + { + if ( rep == c.rep ) return *this; + if ( --(*pcount) == 0 ) + { + delete rep; + delete pcount; + } + rep = c.rep; + pcount = c.pcount; + (*pcount)++; + return *this; + } - void print(std::ostream& os) const; + ~Context(void) + { + if ( --(*pcount) == 0 ) + { + delete rep; + delete pcount; + } + } }; + std::ostream& operator << ( std::ostream& os, const Context& ctxt ); #endif diff --git a/src/Control.h b/src/Control.h index e4a5dc4..f26213f 100644 --- a/src/Control.h +++ b/src/Control.h @@ -15,13 +15,13 @@ // Control.h: // //////////////////////////////////////////////////////////////////////////////// -// $Id: Control.h,v 1.15 2009-03-08 01:11:31 fgygi Exp $ #ifndef CONTROL_H #define CONTROL_H #include #include +#include "D3vector.h" struct Control { @@ -53,6 +53,7 @@ struct Control double ext_stress[6]; // external stress tensor: xx,yy,zz,xy,yz,xz std::string xc; + double alpha_PBE0; std::string spin; int delta_spin; @@ -66,5 +67,13 @@ struct Control int blHF[3]; double btHF; + + double scf_tol; + + D3vector e_field; + std::string polarization; + + std::string iter_cmd; + int iter_cmd_period; }; #endif diff --git a/src/D3tensor.h b/src/D3tensor.h new file mode 100644 index 0000000..39ce4c7 --- /dev/null +++ b/src/D3tensor.h @@ -0,0 +1,321 @@ +//////////////////////////////////////////////////////////////////////////////// +// +// Copyright (c) 2014 The Regents of the University of California +// +// This file is part of Qbox +// +// Qbox is distributed under the terms of the GNU General Public License +// as published by the Free Software Foundation, either version 2 of +// the License, or (at your option) any later version. +// See the file COPYING in the root directory of this distribution +// or . +// +//////////////////////////////////////////////////////////////////////////////// +// +// D3tensor.h +// +// double 3x3 tensor +// +//////////////////////////////////////////////////////////////////////////////// + +#ifndef D3TENSOR_H +#define D3TENSOR_H +#include +#include +#include +#include +#include "blas.h" +#include "D3vector.h" + +using namespace std; + +class D3tensor +{ + public: + + double a_[9]; + + double* a(void) { return &a_[0]; } + const double* a(void) const { return &a_[0]; } + + explicit D3tensor(void) { clear(); } + + explicit D3tensor(double xx, double yy, double zz) + { a_[0]=xx; a_[4]=yy; a_[8]=zz; } + + explicit D3tensor(double xx, double yy, double zz, + double xy, double yz, double xz, char& uplo) + { + a_[0] = xx; + a_[4] = yy; + a_[8] = zz; + + if ( uplo == 'l' ) + { + a_[1] = xy; + a_[2] = xz; + a_[5] = yz; + } + else if ( uplo == 'u' ) + { + a_[3] = xy; + a_[6] = xz; + a_[7] = yz; + } + else if ( uplo == 's' ) + { + a_[1] = xy; + a_[2] = xz; + a_[5] = yz; + a_[3] = xy; + a_[6] = xz; + a_[7] = yz; + } + else + assert(false); + } + + explicit D3tensor(const D3vector& diag, const D3vector& offdiag) + { + a_[0] = diag[0]; + a_[4] = diag[1]; + a_[8] = diag[2]; + a_[1] = offdiag[0]; + a_[5] = offdiag[1]; + a_[2] = offdiag[2]; + a_[3] = offdiag[0]; + a_[7] = offdiag[1]; + a_[6] = offdiag[2]; + } + + explicit D3tensor(const double* a) + { + for ( int i = 0; i < 9; i++ ) a_[i] = a[i]; + } + + double& operator[](int i) + { + assert(i>=0 && i < 9); + return a_[i]; + } + + double operator[](int i) const + { + assert(i>=0 && i < 9); + return a_[i]; + } + + void setdiag(int i, double b) + { + assert(i>=0 && i<3); + a_[i*4] = b; + } + + void setdiag(const D3vector& b) + { + for ( int i = 0; i < 3; i++ ) + a_[i*4] = b[i]; + } + + void setoffdiag(int i, double b) + { + assert(i>=0 && i<3); + if ( i == 0 ) + { + a_[1] = b; + a_[3] = b; + } + else if ( i == 2 ) + { + a_[2] = b; + a_[6] = b; + } + else + { + a_[5] = b; + a_[7] = b; + } + } + + void setoffdiag(const D3vector& b) + { + a_[1] = b[0]; + a_[3] = b[0]; + a_[5] = b[1]; + a_[7] = b[1]; + a_[2] = b[2]; + a_[6] = b[2]; + } + + bool operator==(const D3tensor &rhs) const + { + bool eq = true; + for ( int i = 0; i < 9; i++ ) + { + if ( rhs[i] != a_[i] ) + { + eq &= false; + } + } + return eq; + } + + bool operator!=(const D3tensor &rhs) const + { + bool neq = false; + for ( int i = 0; i < 9; i++ ) + { + if ( rhs[i] != a_[i] ) + { + neq |= true; + } + } + return neq; + } + + D3tensor& operator+=(const D3tensor& rhs) + { + for ( int i = 0; i < 9; i++ ) + a_[i] += rhs[i]; + return *this; + } + + D3tensor& operator-=(const D3tensor& rhs) + { + for ( int i = 0; i < 9; i++ ) + a_[i] -= rhs[i]; + return *this; + } + + D3tensor& operator*=(const double& rhs) + { + for ( int i = 0; i < 9; i++ ) + a_[i] *= rhs; + return *this; + } + + D3tensor& operator/=(const double& rhs) + { + for ( int i = 0; i < 9; i++ ) + a_[i] /= rhs; + return *this; + } + + friend const D3tensor operator+(const D3tensor& lhs, const D3tensor& rhs) + { + return D3tensor(lhs) += rhs; + } + + friend const D3tensor operator-(const D3tensor& a, const D3tensor& b) + { + return D3tensor(a) -= b; + } + + friend D3tensor operator*(double a, const D3tensor& b) + { + return D3tensor(b) *= a; + } + + friend D3tensor operator*(const D3tensor& a, double b) + { + return D3tensor(a) *= b; + } + + friend D3tensor operator/(const D3tensor& a, double b) + { + return D3tensor(a) /= b; + } + + friend D3tensor operator*(D3tensor& a, D3tensor& b) + { + D3tensor c; + int ithree = 3; + double one = 1.0, zero = 0.0; + char t = 'n'; + dgemm ( &t, &t, &ithree, &ithree, &ithree, &one, &a[0], &ithree, + &b[0], &ithree, &zero, &c[0], &ithree ); + return c; + } + + friend D3tensor operator-(const D3tensor& a) // unary minus + { + return D3tensor()-a; + } + + double norm2(const D3tensor& a) const + { + return a_[0]*a_[0] + a_[1]*a_[1] + a_[2]*a_[2] + + a_[3]*a_[3] + a_[4]*a_[4] + a_[5]*a_[5] + + a_[6]*a_[6] + a_[7]*a_[7] + a_[8]*a_[8]; + } + + double norm(const D3tensor& a) const + { + return sqrt(norm2(a)); + } + + double trace(void) const + { + return a_[0]+a_[4]+a_[8]; + } + + void traceless(void) + { + double b = trace() / 3; + a_[0] -= b; + a_[4] -= b; + a_[8] -= b; + } + + void clear(void) + { + for ( int i = 0; i < 9; i++ ) + a_[i] = 0.0; + } + + void identity(void) + { + clear(); + a_[0] = 1.0; + a_[4] = 1.0; + a_[8] = 1.0; + } + + friend std::ostream& operator<<(std::ostream& os, const D3tensor& rhs) + { + const double * const v = rhs.a(); + os.setf(ios::fixed,ios::floatfield); + os.setf(ios::right,ios::adjustfield); + os.precision(8); + os << setw(14) << v[0] << " " << setw(14) << v[3] << " " << setw(14) << v[6] + << "\n" + << setw(14) << v[1] << " " << setw(14) << v[4] << " " << setw(14) << v[7] + << "\n" + << setw(14) << v[2] << " " << setw(14) << v[5] << " " << setw(14) << v[8] + << "\n"; + return os; + } + + void syev(char uplo, D3vector& eigval, D3tensor& eigvec) + { + double w[3]; + + int info; + char jobz = 'V'; + int lwork=-1; + double tmplwork; + int n = 3; + + dsyev(&jobz, &uplo, &n, eigvec.a(), &n, &w[0], &tmplwork, &lwork, &info); + + lwork = (int) tmplwork + 1; + double* work = new double[lwork]; + + eigvec = *this; + dsyev(&jobz, &uplo, &n, eigvec.a(), &n, &w[0], work, &lwork, &info); + delete[] work; + + eigval = D3vector(&w[0]); + } +}; +#endif diff --git a/src/D3vector.h b/src/D3vector.h index 3f611f1..42c5eba 100644 --- a/src/D3vector.h +++ b/src/D3vector.h @@ -45,6 +45,14 @@ class D3vector else return z; } + double operator[] (const int &i) const + { + assert(i>=0 && i <3); + if ( i == 0 ) return x; + else if ( i == 1 ) return y; + else return z; + } + bool operator==(const D3vector &rhs) const { return x == rhs.x && y == rhs.y && z == rhs.z; @@ -141,7 +149,7 @@ class D3vector return sqrt( a.x * a.x + a.y * a.y + a.z * a.z ); } - friend double norm( const D3vector& a ) + friend double norm2( const D3vector& a ) { return a.x * a.x + a.y * a.y + a.z * a.z; } diff --git a/src/Debug.h b/src/Debug.h index 975047b..d128ae6 100644 --- a/src/Debug.h +++ b/src/Debug.h @@ -15,7 +15,6 @@ // Debug.h // //////////////////////////////////////////////////////////////////////////////// -// $Id: Debug.h,v 1.5 2008-09-08 15:56:18 fgygi Exp $ #ifndef DEBUG_H #define DEBUG_H diff --git a/src/DistanceCmd.h b/src/DistanceCmd.h index 3d8f805..70563dd 100644 --- a/src/DistanceCmd.h +++ b/src/DistanceCmd.h @@ -15,7 +15,6 @@ // DistanceCmd.h: // //////////////////////////////////////////////////////////////////////////////// -// $Id: DistanceCmd.h,v 1.6 2008-09-08 15:56:18 fgygi Exp $ #ifndef DISTANCECMD_H #define DISTANCECMD_H @@ -38,23 +37,46 @@ class DistanceCmd : public Cmd { return "\n distance\n\n" - " syntax: distance name1 name2\n\n" - " The distance command prints the distance between two atoms.\n\n"; + " syntax: distance [-pbc] name1 name2\n\n" + " The distance command prints the distance between two atoms.\n" + " If the -pbc option is used, the smallest distance is\n" + " computed taking into account periodic boundary conditions.\n\n"; } int action(int argc, char **argv) { - if ( argc != 3 ) + if ( ! ( argc == 3 || argc == 4 ) ) { if ( ui->onpe0() ) { - cout << " use: distance name1 name2" << endl; + cout << " use: distance [-pbc] name1 name2" << endl; } return 1; } - string name1(argv[1]); - string name2(argv[2]); + string name1, name2; + bool use_pbc = false; + + if ( argc == 3 ) + { + name1 = argv[1]; + name2 = argv[2]; + } + if ( argc == 4 ) + { + if ( strcmp(argv[1],"-pbc") ) + { + if ( ui->onpe0() ) + { + cout << " use: distance [-pbc] name1 name2" << endl; + } + return 1; + } + use_pbc = true; + name1 = argv[2]; + name2 = argv[3]; + } + Atom* a1 = s->atoms.findAtom(name1); Atom* a2 = s->atoms.findAtom(name2); if ( a1 == 0 || a2 == 0 ) @@ -74,7 +96,13 @@ class DistanceCmd : public Cmd if ( ui->onpe0() ) { - const double d = length(a1->position()-a2->position()); + D3vector r12 = a1->position()-a2->position(); + if ( use_pbc ) + { + const UnitCell& cell = s->wf.cell(); + cell.fold_in_ws(r12); + } + const double d = length(r12); cout.setf(ios::fixed,ios::floatfield); cout << " distance " << name1 << "-" << name2 << ": " << setprecision(3) diff --git a/src/DistanceConstraint.C b/src/DistanceConstraint.C index f2df65b..eba6b95 100644 --- a/src/DistanceConstraint.C +++ b/src/DistanceConstraint.C @@ -12,10 +12,9 @@ // //////////////////////////////////////////////////////////////////////////////// // -// DistanceConstraint.C +// DistanceConstraint.C // //////////////////////////////////////////////////////////////////////////////// -// $Id: DistanceConstraint.C,v 1.6 2008-09-08 15:56:18 fgygi Exp $ #include "DistanceConstraint.h" #include "AtomSet.h" @@ -229,11 +228,10 @@ ostream& DistanceConstraint::print( ostream &os ) os << "\" atoms=\"" << name1_ << " " << name2_ << "\"\n"; os.setf(ios::fixed,ios::floatfield); os.setf(ios::right,ios::adjustfield); - os << " value=\"" << setprecision(6) << distance_; - os << "\" velocity=\"" << setprecision(6) << velocity_ << "\"\n"; - os << " force=\"" << setprecision(6) << force_; - os << "\" weight=\"" << setprecision(6) << weight_ << "\"/>"; + os << " velocity=\"" << setprecision(6) << velocity_ << "\""; + os << " weight=\"" << setprecision(6) << weight_ << "\">\n"; + os << " " << setprecision(6) << distance_ << " "; + os << " " << setprecision(6) << force_ << " \n"; + os << " "; return os; } - - diff --git a/src/DistanceConstraint.h b/src/DistanceConstraint.h index 85a6d4e..411d9dd 100644 --- a/src/DistanceConstraint.h +++ b/src/DistanceConstraint.h @@ -12,10 +12,9 @@ // //////////////////////////////////////////////////////////////////////////////// // -// DistanceConstraint.h +// DistanceConstraint.h // //////////////////////////////////////////////////////////////////////////////// -// $Id: DistanceConstraint.h,v 1.8 2009-05-15 04:38:48 fgygi Exp $ #ifndef DISTANCECONSTRAINT_H #define DISTANCECONSTRAINT_H diff --git a/src/Dt.h b/src/Dt.h index 1860dce..364cf0b 100644 --- a/src/Dt.h +++ b/src/Dt.h @@ -15,7 +15,6 @@ // Dt.h // //////////////////////////////////////////////////////////////////////////////// -// $Id: Dt.h,v 1.5 2008-09-08 15:56:18 fgygi Exp $ #ifndef DT_H #define DT_H diff --git a/src/Ecut.h b/src/Ecut.h index 2a40e9e..c60723c 100644 --- a/src/Ecut.h +++ b/src/Ecut.h @@ -15,7 +15,6 @@ // Ecut.h // //////////////////////////////////////////////////////////////////////////////// -// $Id: Ecut.h,v 1.8 2008-09-08 15:56:18 fgygi Exp $ #ifndef ECUT_H #define ECUT_H diff --git a/src/Ecutprec.h b/src/Ecutprec.h index b2e254a..acc5060 100644 --- a/src/Ecutprec.h +++ b/src/Ecutprec.h @@ -15,7 +15,6 @@ // Ecutprec.h // //////////////////////////////////////////////////////////////////////////////// -// $Id: Ecutprec.h,v 1.4 2008-09-08 15:56:18 fgygi Exp $ #ifndef ECUTPREC_H #define ECUTPREC_H diff --git a/src/Ecuts.h b/src/Ecuts.h index cd824e9..1c39e1e 100644 --- a/src/Ecuts.h +++ b/src/Ecuts.h @@ -15,7 +15,6 @@ // Ecuts.h // //////////////////////////////////////////////////////////////////////////////// -// $Id: Ecuts.h,v 1.4 2008-09-08 15:56:18 fgygi Exp $ #ifndef ECUTS_H #define ECUTS_H diff --git a/src/Efield.h b/src/Efield.h new file mode 100644 index 0000000..c3932b8 --- /dev/null +++ b/src/Efield.h @@ -0,0 +1,77 @@ +//////////////////////////////////////////////////////////////////////////////// +// +// Copyright (c) 2014 The Regents of the University of California +// +// This file is part of Qbox +// +// Qbox is distributed under the terms of the GNU General Public License +// as published by the Free Software Foundation, either version 2 of +// the License, or (at your option) any later version. +// See the file COPYING in the root directory of this distribution +// or . +// +//////////////////////////////////////////////////////////////////////////////// +// +// Efield.h +// +//////////////////////////////////////////////////////////////////////////////// + +#ifndef EFIELD_H +#define EFIELD_H + +#include +#include +#include +#include +#include"D3vector.h" + +#include "Sample.h" + +class Efield : public Var +{ + Sample *s; + + public: + + const char *name ( void ) const { return "e_field"; }; + + int set ( int argc, char **argv ) + { + if ( argc != 4 ) + { + if ( ui->onpe0() ) + cout << " e_field takes 3 values" << endl; + return 1; + } + + double v0 = atof(argv[1]); + double v1 = atof(argv[2]); + double v2 = atof(argv[3]); + + s->ctrl.e_field[0] = v0; + s->ctrl.e_field[1] = v1; + s->ctrl.e_field[2] = v2; + + return 0; + } + + string print (void) const + { + ostringstream st; + st.setf(ios::left,ios::adjustfield); + st << setw(10) << name() << " = "; + st.setf(ios::right,ios::adjustfield); + st << s->ctrl.e_field[0] << " " + << s->ctrl.e_field[1] << " " + << s->ctrl.e_field[2] << " "; + return st.str(); + } + + Efield(Sample *sample) : s(sample) + { + s->ctrl.e_field[0] = 0.0; + s->ctrl.e_field[1] = 0.0; + s->ctrl.e_field[2] = 0.0; + } +}; +#endif diff --git a/src/ElectricEnthalpy.C b/src/ElectricEnthalpy.C new file mode 100644 index 0000000..a90853e --- /dev/null +++ b/src/ElectricEnthalpy.C @@ -0,0 +1,738 @@ +//////////////////////////////////////////////////////////////////////////////// +// +// Copyright (c) 2014 The Regents of the University of California +// +// This file is part of Qbox +// +// Qbox is distributed under the terms of the GNU General Public License +// as published by the Free Software Foundation, either version 2 of +// the License, or (at your option) any later version. +// See the file COPYING in the root directory of this distribution +// or . +// +//////////////////////////////////////////////////////////////////////////////// +// +// ElectricEnthalpy.C +// +//////////////////////////////////////////////////////////////////////////////// + +#include +#include +#include +#include +#include +#include // fill + +#include "Timer.h" +#include "Context.h" +#include "Matrix.h" +#include "Sample.h" +#include "D3vector.h" +#include "ElectricEnthalpy.h" +#include "MLWFTransform.h" +#include "Wavefunction.h" +#include "D3vector.h" +#include "Basis.h" +#include "SlaterDet.h" +#include "ChargeDensity.h" +#include "FourierTransform.h" +#include "UnitCell.h" +using namespace std; + +/////////////////////////////////////////////////////////////////////////////// +double ElectricEnthalpy::vsst(double x) const +{ + // smooth sawtooth periodic potential function + // x in [-1/2, 1/2] + // The slope of vsst is 1 at x=0 + // + // The function vsst approximates the identity function x->x + // in the interval [-1/2+xcut, 1/2-xcut] + const double xcut = 0.05; + const double xcut2 = xcut*xcut; + // The function vsst is well represented by a + // discrete Fourier transform of length np = 2*ng + // Some aliasing error will arise if np < 2*ng + // The error is determined by the product xcut*ng + const int ng = 32; + double v = 0.0; + for ( int ig = 1; ig < ng; ig++ ) + { + const double g = 2 * M_PI * ig; + const double arg = -0.25 * g * g * xcut2; + // next line: factor sgn to shift origin by 0.5 + const int sgn = 1 - 2*(ig%2); + const double c = -2.0 * sgn * exp ( arg ) / g; + v += c * sin(x*g); + } + return v; +} + +/////////////////////////////////////////////////////////////////////////////// +ElectricEnthalpy::ElectricEnthalpy(Sample& s): s_(s), wf_(s.wf), + sd_(*(s.wf.sd(0,0))), ctxt_(s.wf.sd(0,0)->context()), + basis_(s.wf.sd(0,0)->basis()) +{ + onpe0_ = ctxt_.onpe0(); + e_field_ = s.ctrl.e_field; + finite_field_ = norm2(e_field_) != 0.0; + compute_quadrupole_ = false; + + if ( s.ctrl.polarization == "OFF" ) + pol_type_ = off; + else if ( s.ctrl.polarization == "BERRY" ) + pol_type_ = berry; + else if ( s.ctrl.polarization == "MLWF" ) + pol_type_ = mlwf; + else if ( s.ctrl.polarization == "MLWF_REF" ) + pol_type_ = mlwf_ref; + else if ( s.ctrl.polarization == "MLWF_REF_Q" ) + { + pol_type_ = mlwf_ref; + compute_quadrupole_ = true; + } + else + { + cerr << "ElectricEnthalpy: invalid polarization option" << endl; + ctxt_.abort(1); + } + + // do not allocate further objects if polarization is off + if ( pol_type_ == off ) return; + + assert(wf_.nkp()==1); + assert(wf_.nspin()==1); + + dwf_ = new Wavefunction(s.wf); + mlwft_ = new MLWFTransform(sd_); + mlwft_->set_tol(1.e-10); + + smat_[0] = smat_[1] = smat_[2] = 0; + rwf_[0] = rwf_[1] = rwf_[2] = 0; + int nst = sd_.nst(); + + if ( pol_type_ == mlwf_ref || pol_type_ == mlwf_ref_q ) + { + // allocate real space wf arrays for MLWF refinement + for ( int idir = 0; idir < 3; idir++ ) + rwf_[idir] = new Wavefunction(wf_); + correction_.resize(nst); + } + else if ( pol_type_ == berry ) + { + // allocate complex Berry phase matrix + int n = sd_.c().n(); + int nb = sd_.c().nb(); + for ( int idir = 0; idir < 3; idir++ ) + smat_[idir] = new ComplexMatrix(ctxt_,n,n,nb,nb); + } + + if ( (pol_type_ != off) && onpe0_ ) + { + cout.setf(ios::fixed,ios::floatfield); + cout.setf(ios::right,ios::adjustfield); + cout.precision(8); + cout << " " << e_field_ << " " << endl; + } + + mlwfc_.resize(nst); + mlwfs_.resize(nst); + quad_.resize(nst); +} + +/////////////////////////////////////////////////////////////////////////////// +ElectricEnthalpy::~ElectricEnthalpy(void) +{ + if ( pol_type_ == off ) return; + + delete dwf_; + delete mlwft_; + for ( int idir = 0; idir < 3; idir++ ) + { + delete rwf_[idir]; + delete smat_[idir]; + } + + for ( TimerMap::iterator i = tmap.begin(); i != tmap.end(); i++ ) + { + double time = (*i).second.real(); + double tmin = time; + double tmax = time; + s_.ctxt_.dmin(1,1,&tmin,1); + s_.ctxt_.dmax(1,1,&tmax,1); + if ( pol_type_ != off && s_.ctxt_.myproc()==0 ) + { + string s = "name=\"" + (*i).first + "\""; + cout << "" + << endl; + } + } +} + +/////////////////////////////////////////////////////////////////////////////// +void ElectricEnthalpy::update(void) +{ + if ( pol_type_ == off ) return; + + const UnitCell& cell = sd_.basis().cell(); + // compute cos and sin matrices + tmap["mlwf_update"].start(); + mlwft_->update(); + tmap["mlwf_update"].stop(); + vector sdcos(3), sdsin(3); + sdcos[0] = mlwft_->sdcosx(); + sdcos[1] = mlwft_->sdcosy(); + sdcos[2] = mlwft_->sdcosz(); + sdsin[0] = mlwft_->sdsinx(); + sdsin[1] = mlwft_->sdsiny(); + sdsin[2] = mlwft_->sdsinz(); + + dipole_ion_ = s_.atoms.dipole(); + dipole_el_ = D3vector(0,0,0); + + if ( pol_type_ == mlwf || pol_type_ == mlwf_ref || pol_type_ == mlwf_ref_q ) + { + tmap["mlwf_trans"].start(); + mlwft_->compute_transform(); + mlwft_->apply_transform(sd_); + tmap["mlwf_trans"].stop(); + for ( int i = 0; i < sd_.nst(); i++ ) + { + mlwfc_[i] = mlwft_->center(i); + mlwfs_[i] = mlwft_->spread(i); + } + + if ( pol_type_ == mlwf_ref || pol_type_ == mlwf_ref_q ) + { + tmap["correction"].start(); + compute_correction(); + tmap["correction"].stop(); + } + + for ( int i = 0; i < sd_.nst(); i++ ) + { + dipole_el_ -= 2.0 * mlwfc_[i]; + if ( pol_type_ == mlwf_ref || pol_type_ == mlwf_ref_q ) + dipole_el_ -= 2.0 * correction_[i]; + } + + // compute gradient + if ( finite_field_ ) + { + dwf_->clear(); + for ( int idir = 0; idir < 3; idir++ ) + { + if ( e_field_[idir] != 0.0 ) + { + // MLWF part + if ( pol_type_ == mlwf ) + { + const double nst = sd_.nst(); + std::vector adiag_inv_real(nst,0),adiag_inv_imag(nst,0); + for ( int ist = 0; ist < nst; ist ++ ) + { + const std::complex + adiag( mlwft_->adiag(idir*2,ist),mlwft_->adiag(idir*2+1,ist) ); + + adiag_inv_real[ist] = real( std::complex(1,0) / adiag ); + adiag_inv_imag[ist] = imag( std::complex(1,0) / adiag ); + + } + + DoubleMatrix ccos(sdcos[idir]->c()); + DoubleMatrix csin(sdsin[idir]->c()); + DoubleMatrix cp(dwf_->sd(0,0)->c()); + + int nloc = cp.nloc(); + int mloc = cp.mloc(); + int ione = 1; + + const double fac = cell.a_norm(idir) + * e_field_[idir] / ( 2.0 * M_PI ); + + for (int in = 0; in < nloc; in++) + { + int ist = cp.jglobal(in); + double fac1 = adiag_inv_real[ist] * fac; + double fac2 = adiag_inv_imag[ist] * fac; + + double *ptr1 = &cp[in*mloc], + *ptrcos = &ccos[in*mloc], + *ptrsin = &csin[in*mloc]; + + daxpy(&mloc, &fac2, ptrcos, &ione, ptr1, &ione); + daxpy(&mloc, &fac1, ptrsin, &ione, ptr1, &ione); + + } + } + else if ( pol_type_ == mlwf_ref || pol_type_ == mlwf_ref_q ) + { + // MLWF_REF part: real-space correction + DoubleMatrix cc(rwf_[idir]->sd(0,0)->c()); + DoubleMatrix cp(dwf_->sd(0,0)->c()); + + int size = cc.size(); + double alpha = e_field_[idir]; + int ione = 1; + daxpy (&size, &alpha, cc.valptr(), &ione, cp.valptr(), &ione); + } // if pol_type_ + } // if e_field_[idir] + } // for idir + } // if finite_field_ + } + else if ( pol_type_ == berry ) + { + // proxy matrix + DoubleMatrix gradp(dwf_->sd(0,0)->c()); + if ( finite_field_ ) + dwf_->clear(); + + for ( int idir = 0; idir < 3; idir++ ) + { + const double fac = cell.a_norm(idir)/( 2.0*M_PI ); + complex* val = smat_[idir]->valptr(); + + const double* re = mlwft_->a(idir*2)->cvalptr(); + const double* im = mlwft_->a(idir*2+1)->cvalptr(); + for ( int i = 0; i < smat_[idir]->size(); i++ ) + val[i] = complex(re[i],im[i]); + + // compute determinant of S + ComplexMatrix& sm = *smat_[idir]; + const Context& ctxt = sm.context(); + + // perform LU decomposition of S + valarray ipiv; + sm.lu(ipiv); + + int n = sm.n(); + int nb = sm.nb(); + // note: m == n, mb == nb + + // compute determinant from diagonal values of U (stored in diag of S) + // get full diagonal of the matrix in array diag + valarray > diag(n); + for ( int ii = 0; ii < n; ii++ ) + { + int iii = sm.l(ii) * nb + sm.x(ii); + int jjj = sm.m(ii) * nb + sm.y(ii); + if ( sm.pr(ii) == ctxt.myrow() && + sm.pc(ii) == ctxt.mycol() ) + diag[ii] = val[iii+sm.mloc()*jjj]; + } + ctxt.dsum(2*n,1,(double*)&diag[0],2*n); + + // sum the argument of diagonal elements + double sumarg = 0.0; + for ( int ii = 0; ii < n; ii++ ) + sumarg += arg(diag[ii]); + + // compute determinant + complex det = 1.0; + for ( int ii = 0; ii < n; ii++ ) + det *= diag[ii]; + + const int sign = sm.signature(ipiv); + if ( sign == -1 ) + sumarg += M_PI; + + // assume occupation number of 2.0 + dipole_el_[idir] = - 2.0 * fac * sumarg; + + if ( finite_field_ ) + { + // compute inverse of smat + sm.inverse_from_lu(ipiv); + + // real and img part of S^{-1} + DoubleMatrix s_real(ctxt_,n,n,nb,nb); + DoubleMatrix s_img(ctxt_,n,n,nb,nb); + DoubleMatrix sp(sm); + + int size = s_real.size(); + int ione = 1, itwo = 2; + + // copy real and imaginary parts of s to s_real and s_img + dcopy(&size,sp.valptr(),&itwo,s_real.valptr(),&ione); + dcopy(&size,sp.valptr()+1,&itwo,s_img.valptr(),&ione); + + // proxy Matrix for cosx|psi> and sinx|psi> + DoubleMatrix cosp(sdcos[idir]->c()); + DoubleMatrix sinp(sdsin[idir]->c()); + + // alpha = E_i * L_i / 2pi + double alpha = fac * e_field_[idir]; + + gradp.gemm('n','n',alpha,cosp,s_img,1.0); + gradp.gemm('n','n',alpha,sinp,s_real,1.0); + } + } // for idir + } + + dipole_total_ = dipole_ion_ + dipole_el_; + cell.fold_in_ws(dipole_ion_); + cell.fold_in_ws(dipole_el_); + cell.fold_in_ws(dipole_total_); +} + +/////////////////////////////////////////////////////////////////////////////// +double ElectricEnthalpy::enthalpy(Wavefunction& dwf, bool compute_hpsi) +{ + // return zero if polarization is off, or field is zero + if ( pol_type_ == off || !finite_field_ ) + return 0.0; + + enthalpy_ = - e_field_ * dipole_total_; + if ( compute_hpsi ) + { + // assert gamma-only and no spin + assert(dwf.nkp()==1 && dwf.nspin()==1); + dwf.sd(0,0)->c() += dwf_->sd(0,0)->c(); + } + return enthalpy_; +} + +/////////////////////////////////////////////////////////////////////////////// +// Correction scheme by M. Stengel and N. Spaldin, +// Phys. Rev. B 73, 075121 (2006) +// Calculate correction in real space and derivatives of the correction +/////////////////////////////////////////////////////////////////////////////// +void ElectricEnthalpy::compute_correction(void) +{ + int np0v = basis_.np(0); + int np1v = basis_.np(1); + int np2v = basis_.np(2); + const ComplexMatrix& c = sd_.c(); + DoubleMatrix cp(c); + + FourierTransform ft(basis_,np0v,np1v,np2v); + + int np012v = ft.np012(); + int np012loc = ft.np012loc(); + int nst = sd_.nst(); + int nloc = c.nloc(); + int mloc = c.mloc(); + + // store (x-x0)|psi> in rwfs + rwf_[0]->clear(); + rwf_[1]->clear(); + rwf_[2]->clear(); + + ComplexMatrix& cx = rwf_[0]->sd(0,0)->c(); + ComplexMatrix& cy = rwf_[1]->sd(0,0)->c(); + ComplexMatrix& cz = rwf_[2]->sd(0,0)->c(); + + DoubleMatrix cpx(cx); + DoubleMatrix cpy(cy); + DoubleMatrix cpz(cz); + + // calculate refinements + // ref is scaled by np012v + vector ref(nst*3); + if ( compute_quadrupole_ ) ref.resize(nst*9); + + // cell size; + const UnitCell& cell = sd_.basis().cell(); + const double ax = cell.amat(0); + const double ay = cell.amat(4); + const double az = cell.amat(8); + + // half cell size; + const double ax2 = ax / 2.0; + const double ay2 = ay / 2.0; + const double az2 = az / 2.0; + + // grid size; + const double dx = ax / np0v; + const double dy = ay / np1v; + const double dz = az / np2v; + + for ( int i = 0; i < nst; i++ ) + correction_[i] = D3vector(0,0,0); + + int niter = 1; + // check if override from the debug variable + // use: set debug MLWF_REF_NITER + if ( s_.ctrl.debug.find("MLWF_REF_NITER") != string::npos ) + { + istringstream is(s_.ctrl.debug); + string s; + is >> s >> niter; + assert(s=="MLWF_REF_NITER"); + if ( onpe0_ ) + cout << " ElectricEnthalpy: override niter value: niter= " + << niter << endl; + assert(niter > 0); + } + for ( int iter = 0; iter < niter; iter++ ) + { + fill(ref.begin(),ref.end(),0.0); + + // wavefunctions in real space + vector > wftmp(np012loc); + vector > xwftmp(np012loc); + vector > ywftmp(np012loc); + vector > zwftmp(np012loc); + + for ( int in = 0; in < nloc; in++ ) + { + int n = c.jglobal(in); + double* pref; + if ( compute_quadrupole_ ) + pref = &ref[9*n]; + else + pref = &ref[3*n]; + + // real space wavefunction in wftmp + tmap["ft"].start(); + ft.backward(c.cvalptr(mloc*in),&wftmp[0]); + tmap["ft"].stop(); + + tmap["real"].start(); + double x0 = mlwfc_[n][0] + correction_[n][0]; + double y0 = mlwfc_[n][1] + correction_[n][1]; + double z0 = mlwfc_[n][2] + correction_[n][2]; + + // compute shifted sawtooth potentials vx, vy, vz + vector vx(ft.np0()), vy(ft.np1()), vz(ft.np2()); + for ( int i = 0; i < vx.size(); i++ ) + { + double x = i * dx - x0; + if ( x > ax2 ) x -= ax; + if ( x < -ax2 ) x += ax; +#ifdef NO_VSST + vx[i] = x; +#else + vx[i] = ax * vsst(x/ax); +#endif + } + for ( int j = 0; j < vy.size(); j++ ) + { + double y = j * dy - y0; + if ( y > ay2 ) y -= ay; + if ( y < -ay2 ) y += ay; +#ifdef NO_VSST + vy[j] = y; +#else + vy[j] = ay * vsst(y/ay); +#endif + } + for ( int k = 0; k < vz.size(); k++ ) + { + double z = k * dz - z0; + if ( z > az2 ) z -= az; + if ( z < -az2 ) z += az; +#ifdef NO_VSST + vz[k] = z; +#else + vz[k] = az * vsst(z/az); +#endif + } + + for ( int i = 0; i < np012loc; i++ ) + { + int ix = ft.i(i); + int iy = ft.j(i); + int iz = ft.k(i); + + const double wft = real(wftmp[i]); + const double xwft = wft * vx[ix]; + const double ywft = wft * vy[iy]; + const double zwft = wft * vz[iz]; + + pref[0] += wft * xwft; + pref[1] += wft * ywft; + pref[2] += wft * zwft; + + xwftmp[i] = xwft; + ywftmp[i] = ywft; + zwftmp[i] = zwft; + + if ( compute_quadrupole_ ) + { + pref[3] += xwft * xwft; + pref[4] += ywft * ywft; + pref[5] += zwft * zwft; + pref[6] += xwft * ywft; + pref[7] += ywft * zwft; + pref[8] += zwft * xwft; + } + } // for i + tmap["real"].stop(); + + // ft to get xwf in reciprocal space at the last iteration + if ( iter == niter - 1 ) + { + tmap["ft"].start(); + if ( e_field_[0] != 0.0 ) + ft.forward(&xwftmp[0],cx.valptr(mloc*in)); + if ( e_field_[1] != 0.0 ) + ft.forward(&ywftmp[0],cy.valptr(mloc*in)); + if ( e_field_[2] != 0.0 ) + ft.forward(&zwftmp[0],cz.valptr(mloc*in)); + tmap["ft"].stop(); + } // if + } //for in + + ctxt_.barrier(); + tmap["dsum"].start(); + if ( compute_quadrupole_ ) + ctxt_.dsum(9*nst,1,&ref[0],9*nst); + else + ctxt_.dsum(3*nst,1,&ref[0],3*nst); + tmap["dsum"].stop(); + + tmap["real"].start(); + if ( compute_quadrupole_ ) + { + for ( int ist = 0; ist < nst; ist++ ) + { + D3vector& pcor = correction_[ist]; + D3tensor& pquad = quad_[ist]; + + pcor[0] += ref[ist*9]/np012v; + pcor[1] += ref[ist*9+1]/np012v; + pcor[2] += ref[ist*9+2]/np012v; + pquad.setdiag ( 0, ref[ist*9+3]/np012v - pcor[0] * pcor[0] ); + pquad.setdiag ( 1, ref[ist*9+4]/np012v - pcor[1] * pcor[1] ); + pquad.setdiag ( 2, ref[ist*9+5]/np012v - pcor[2] * pcor[2] ); + pquad.setoffdiag ( 0, ref[ist*9+6]/np012v - pcor[0] * pcor[1] ); + pquad.setoffdiag ( 1, ref[ist*9+7]/np012v - pcor[1] * pcor[2] ); + pquad.setoffdiag ( 2, ref[ist*9+8]/np012v - pcor[2] * pcor[0] ); + } + } + else + { + for ( int ist = 0; ist < nst; ist++ ) + { + D3vector& pcor = correction_[ist]; + pcor[0] += ref[ist*3]/np012v; + pcor[1] += ref[ist*3+1]/np012v; + pcor[2] += ref[ist*3+2]/np012v; + } + } + tmap["real"].stop(); + } // for iter +} + +//////////////////////////////////////////////////////////////////////////////// +void ElectricEnthalpy::print(ostream& os) const +{ + if ( pol_type_ == off ) return; + + os << fixed << right << setprecision(8); + // print MLWF centers if pol_type_ == MLWF or MLWF_REF or MLWF_REF_Q + if ( pol_type_ == mlwf || pol_type_ == mlwf_ref || pol_type_ == mlwf_ref_q ) + { + int nst = sd_.nst(); + os << " " << endl; + for ( int i = 0; i < nst; i++ ) + { + os << " " << endl; + if ( pol_type_ == mlwf_ref ) + { + os << " " << endl; + + if ( compute_quadrupole_ ) + os << " " + << setw(12) << quad_[i][0] << " " + << setw(12) << quad_[i][4] << " " + << setw(12) << quad_[i][8] << " " + << setw(12) << quad_[i][1] << " " + << setw(12) << quad_[i][2] << " " + << setw(12) << quad_[i][5] + << " " << endl; + } + } + os << " " << endl; + } + + // print dipole + os << setprecision(10) << fixed << right; + os << " \n"; + os << " " + << setw(14) << dipole_ion_.x << " " + << setw(14) << dipole_ion_.y << " " + << setw(14) << dipole_ion_.z << " \n"; + os << " " + << setw(14) << dipole_el_.x << " " + << setw(14) << dipole_el_.y << " " + << setw(14) << dipole_el_.z << " \n"; + os << " " + << setw(14) << dipole_total_.x << " " + << setw(14) << dipole_total_.y << " " + << setw(14) << dipole_total_.z << " \n"; + os << " \n"; + + if ( compute_quadrupole_ ) + { + D3tensor q_mlwfc; + D3tensor q_mlwfs; + for ( int ist = 0; ist < sd_.nst(); ist++ ) + { + D3vector ctr = mlwfc_[ist]; + for (int j=0; j<3; j++) + { + for (int k = 0; k < 3; k++) + q_mlwfc[j*3+k] -= 2.0 * ctr[j] * ctr[k]; + } + q_mlwfs -= quad_[ist] * 2.0; + } // for ist + + D3tensor q_ion = s_.atoms.quadrupole(); + D3tensor q_mlwf = q_mlwfc + q_mlwfs; + //total primitive quadrupoles + D3tensor q_total = q_ion + q_mlwf; + //traceless quadrupole + D3tensor q_traceless = q_total; + q_traceless.traceless(); + + os << " " << endl; + os << " " << endl + << q_ion + << " " << endl; + os << " " << endl + << q_mlwf + << " " << endl; + os << " " << endl + << q_total + << " " << endl; + os << " " << endl + << q_traceless + << " " << endl; + char uplo = 'u'; + D3vector eigval; + D3tensor eigvec; + q_traceless.syev(uplo, eigval, eigvec); + os << " " << endl + << " " << eigval << endl + << " " << endl; + os << " " << endl + << eigvec + << " " << endl; + os << " " << endl; + } + +} + +//////////////////////////////////////////////////////////////////////////////// +ostream& operator<< ( ostream& os, const ElectricEnthalpy& e ) +{ + e.print(os); + return os; +} diff --git a/src/ElectricEnthalpy.h b/src/ElectricEnthalpy.h new file mode 100644 index 0000000..fee33cc --- /dev/null +++ b/src/ElectricEnthalpy.h @@ -0,0 +1,100 @@ +//////////////////////////////////////////////////////////////////////////////// +// +// Copyright (c) 2014 The Regents of the University of California +// +// This file is part of Qbox +// +// Qbox is distributed under the terms of the GNU General Public License +// as published by the Free Software Foundation, either version 2 of +// the License, or (at your option) any later version. +// See the file COPYING in the root directory of this distribution +// or . +// +//////////////////////////////////////////////////////////////////////////////// +// +// ElectricEnthalpy.h +// +//////////////////////////////////////////////////////////////////////////////// + +#ifndef ELECTRICENTHALPY_H +#define ELECTRICENTHALPY_H + +#include +#include +#include +#include +#include "Matrix.h" +#include "D3vector.h" +#include "Wavefunction.h" +#include "SlaterDet.h" +#include "Context.h" +#include "Sample.h" +#include "Timer.h" +#include "D3tensor.h" +#include "Basis.h" +#include "Matrix.h" + +class Sample; +class MLWFTransform; + +class ElectricEnthalpy +{ + private: + + Sample& s_; + Wavefunction& wf_; + Wavefunction* dwf_; + SlaterDet& sd_; + const Context& ctxt_; + const Basis& basis_; + + bool onpe0_; + bool finite_field_; + + enum { off, berry, mlwf, mlwf_ref, mlwf_ref_q } pol_type_; + bool compute_quadrupole_; + + // electric field + D3vector e_field_; + + Wavefunction* rwf_[3]; + + // MLWFtransform is used to compute S matrix + MLWFTransform* mlwft_; + + // s matrices + ComplexMatrix* smat_[3]; + + // total, ionic and electronic part of macroscopic polarization + D3vector dipole_total_, dipole_ion_, dipole_el_; + + // electric enthalpy + double enthalpy_; + + std::vector mlwfc_; + std::vector mlwfs_; + std::vector correction_; + std::vector quad_; + + void compute_correction(void); + double vsst(double x) const; + + public: + + mutable TimerMap tmap; + + D3vector e_field(void) const { return e_field_; } + D3vector dipole_total(void) const { return dipole_total_; } + D3vector dipole_ion(void) const { return dipole_ion_; } + D3vector dipole_el(void) const { return dipole_el_; } + + double enthalpy(Wavefunction& dwf, bool compute_hpsi); + + void update(void); + void print(std::ostream& os) const; + + ElectricEnthalpy(Sample& s); + ~ElectricEnthalpy(void); +}; +std::ostream& operator << ( std::ostream& os, const ElectricEnthalpy& e ); +#endif diff --git a/src/Emass.h b/src/Emass.h index a891a0e..1469fef 100644 --- a/src/Emass.h +++ b/src/Emass.h @@ -15,7 +15,6 @@ // Emass.h // //////////////////////////////////////////////////////////////////////////////// -// $Id: Emass.h,v 1.5 2009-05-15 04:40:52 fgygi Exp $ #ifndef EMASS_H #define EMASS_H diff --git a/src/EnergyFunctional.C b/src/EnergyFunctional.C index 3cbc679..c8aefbd 100644 --- a/src/EnergyFunctional.C +++ b/src/EnergyFunctional.C @@ -28,17 +28,20 @@ #include "XCOperator.h" #include "NonLocalPotential.h" #include "ConfinementPotential.h" +#include "D3vector.h" +#include "ElectricEnthalpy.h" #include "Timer.h" #include "blas.h" #include #include -#include // fill() +#include // fill(), copy() +#include using namespace std; //////////////////////////////////////////////////////////////////////////////// -EnergyFunctional::EnergyFunctional( Sample& s, const ChargeDensity& cd) +EnergyFunctional::EnergyFunctional( Sample& s, ChargeDensity& cd) : s_(s), cd_(cd) { const AtomSet& atoms = s_.atoms; @@ -121,6 +124,8 @@ EnergyFunctional::EnergyFunctional( Sample& s, const ChargeDensity& cd) eself_ = 0.0; + // core_charge_: true if any species has a core charge density + core_charge_ = false; for ( int is = 0; is < nsp_; is++ ) { Species *s = atoms.species_list[is]; @@ -133,8 +138,22 @@ EnergyFunctional::EnergyFunctional( Sample& s, const ChargeDensity& cd) eself_ += na * s->eself(); na_[is] = na; - zv_[is] = s->zval(); + zv_[is] = s->ztot(); rcps_[is] = s->rcps(); + + core_charge_ |= s->has_nlcc(); + } + + if ( core_charge_ ) + { + vxc_g.resize(ngloc); + rhocore_sp_g.resize(nsp_); + for ( int is = 0; is < nsp_; is++ ) + rhocore_sp_g[is].resize(ngloc); + rhocore_g.resize(ngloc); + rhocore_r.resize(vft->np012loc()); + // set rhocore_r ptr in ChargeDensity + cd_.rhocore_r = &rhocore_r[0]; } // FT for interpolation of wavefunctions on the fine grid @@ -156,6 +175,11 @@ EnergyFunctional::EnergyFunctional( Sample& s, const ChargeDensity& cd) wf.sd(0,ikp)->basis()); } + // Electric enthalpy + el_enth_ = 0; + if ( s_.ctrl.polarization != "OFF" ) + el_enth_ = new ElectricEnthalpy(s_); + sf.init(tau0,*vbasis_); cell_moved(); @@ -167,6 +191,7 @@ EnergyFunctional::EnergyFunctional( Sample& s, const ChargeDensity& cd) //////////////////////////////////////////////////////////////////////////////// EnergyFunctional::~EnergyFunctional(void) { + delete el_enth_; delete xco; for ( int ikp = 0; ikp < s_.wf.nkp(); ikp++ ) { @@ -184,10 +209,10 @@ EnergyFunctional::~EnergyFunctional(void) s_.ctxt_.dmax(1,1,&tmax,1); if ( s_.ctxt_.myproc()==0 ) { - cout << "" + string s = "name=\"" + (*i).first + "\""; + cout << "" << endl; } } @@ -207,7 +232,7 @@ void EnergyFunctional::update_vhxc(bool compute_stress) const double *const g2i = vbasis_->g2i_ptr(); const double fpi = 4.0 * M_PI; const int ngloc = vbasis_->localsize(); - double sum[2], tsum[2]; + double sum[5], tsum[5]; // compute total electronic density: rhoelg = rho_up + rho_dn if ( wf.nspin() == 1 ) @@ -229,11 +254,33 @@ void EnergyFunctional::update_vhxc(bool compute_stress) // compute xc energy, update self-energy operator and potential tmap["exc"].start(); for ( int ispin = 0; ispin < wf.nspin(); ispin++ ) - fill(v_r[ispin].begin(),v_r[ispin].end(),0.0); + memset((void*)&v_r[ispin][0], 0, vft->np012loc()*sizeof(double)); + xco->update(v_r, compute_stress); exc_ = xco->exc(); + dxc_ = xco->dxc(); if ( compute_stress ) xco->compute_stress(sigma_exc); + + if ( core_charge_ ) + { + // compute Fourier coefficients of Vxc + for ( int ispin = 0; ispin < wf.nspin(); ispin++ ) + { + copy(v_r[ispin].begin(),v_r[ispin].end(),tmp_r.begin()); + if ( ispin == 0 ) + { + vft->forward(&tmp_r[0],&vxc_g[0]); + } + else + { + vft->forward(&tmp_r[0],&vtemp[0]); + for ( int ig = 0; ig < ngloc; ig++ ) + vxc_g[ig] = 0.5 * ( vxc_g[ig] + vtemp[ig] ); + } + } + } + tmap["exc"].stop(); // compute local potential energy: @@ -248,24 +295,36 @@ void EnergyFunctional::update_vhxc(bool compute_stress) } sum[0] *= omega; // sum[0] contains eps - // Hartree energy - ehart_ = 0.0; + // Hartree energy and electron-electron energy (without pseudocharges) double ehsum = 0.0; + double ehesum = 0.0; + double ehepsum = 0.0; + double ehpsum = 0.0; for ( int ig = 0; ig < ngloc; ig++ ) { - const complex tmp = rhoelg[ig] + rhopst[ig]; - ehsum += norm(tmp) * g2i[ig]; - rhogt[ig] = tmp; + const complex r = rhoelg[ig]; + const complex rp = rhopst[ig]; + ehsum += norm(r+rp) * g2i[ig]; + ehesum += norm(r) * g2i[ig]; + ehepsum += 2.0*real(conj(r)*rp * g2i[ig]); + ehpsum += norm(rp) * g2i[ig]; + rhogt[ig] = r+rp; } // factor 1/2 from definition of Ehart cancels with half sum over G // Note: rhogt[ig] includes a factor 1/Omega - // Factor omega in next line yields prefactor 4 pi / omega in + // Factor omega in next line yields prefactor 4 pi / omega in Ehart sum[1] = omega * fpi * ehsum; - // tsum[1] contains ehart + sum[2] = omega * fpi * ehesum; + sum[3] = omega * fpi * ehepsum; + sum[4] = omega * fpi * ehpsum; + + MPI_Allreduce(sum,tsum,5,MPI_DOUBLE,MPI_SUM,vbasis_->comm()); - MPI_Allreduce(sum,tsum,2,MPI_DOUBLE,MPI_SUM,vbasis_->comm()); - eps_ = tsum[0]; - ehart_ = tsum[1]; + eps_ = tsum[0]; + ehart_ = tsum[1]; + ehart_e_ = tsum[2]; + ehart_ep_ = tsum[3]; + ehart_p_ = tsum[4]; // compute vlocal_g = vion_local_g + vhart_g // where vhart_g = 4 * pi * (rhoelg + rhopst) * g2i @@ -296,6 +355,9 @@ void EnergyFunctional::update_vhxc(bool compute_stress) v_r[1][i] += vloc; } } + + if ( el_enth_ ) + el_enth_->update(); } //////////////////////////////////////////////////////////////////////////////// @@ -535,11 +597,22 @@ double EnergyFunctional::energy(bool compute_hpsi, Wavefunction& dwf, { const complex sg = s[ig]; const complex rg = rhogt[ig]; + Species *s = s_.atoms.species_list[is]; + const double g = vbasis_->g_ptr()[ig]; + const double gi = vbasis_->gi_ptr()[ig]; // next line: keep only real part - const double temp = fac2 * - ( rg.real() * sg.real() + - rg.imag() * sg.imag() ) - * rhops[is][ig] * g2i[ig]; + // contribution of pseudocharge of ion + double temp = fac2 * (rg.real() * sg.real() + rg.imag() * sg.imag()) + * rhops[is][ig] * g2i[ig]; + if ( core_charge_ ) + { + double rhoc_g, drhoc_g; + s->drhocoreg(g,rhoc_g,drhoc_g); + // next line: keep only real part + // contribution of core correction + temp -= (vxc_g[ig].real() * sg.real() + vxc_g[ig].imag() * sg.imag()) + * drhoc_g * gi * omega_inv / fpi; + } const double tgx = g_x[ig]; const double tgy = g_y[ig]; @@ -604,6 +677,38 @@ double EnergyFunctional::energy(bool compute_hpsi, Wavefunction& dwf, ets_ = - wf_entropy * s_.ctrl.fermi_temp * boltz; } etotal_ = ekin_ + econf_ + eps_ + enl_ + ecoul_ + exc_ + ets_ + eexf_; + enthalpy_ = etotal_; + + // Electric enthalpy + eefield_ = 0.0; + if ( el_enth_ ) + { + tmap["el_enth_energy"].start(); + eefield_ = el_enth_->enthalpy(dwf,compute_hpsi); + tmap["el_enth_energy"].stop(); + enthalpy_ += eefield_; + + if ( compute_forces ) + { + for ( int is = 0; is < nsp_; is++ ) + for ( int ia = 0; ia < na_[is]; ia++ ) + { + D3vector f = zv_[is] * s_.ctrl.e_field; + fion[is][3*ia] += f.x; + fion[is][3*ia+1] += f.y; + fion[is][3*ia+2] += f.z; + } + } + } + + epv_ = 0.0; + if ( compute_stress ) + { + valarray sigma_ext(s_.ctrl.ext_stress,6); + const double pext = (sigma_ext[0]+sigma_ext[1]+sigma_ext[2])/3.0; + epv_ = pext * omega; + enthalpy_ += epv_; + } if ( compute_hpsi ) { @@ -670,6 +775,8 @@ double EnergyFunctional::energy(bool compute_hpsi, Wavefunction& dwf, { double tmp = fpi * rhops[is][ig] * g2i[ig]; vtemp[ig] = tmp * conj(rhogt[ig]) + vps[is][ig] * conj(rhoelg[ig]); + if ( core_charge_ ) + vtemp[ig] += conj(vxc_g[ig]) * rhocore_sp_g[is][ig]; } fion_nl.resize(3*na_[is]); fion_nl = 0.0; @@ -747,8 +854,10 @@ double EnergyFunctional::energy(bool compute_hpsi, Wavefunction& dwf, } if ( compute_stress ) + { sigma = sigma_ekin + sigma_econf + sigma_eps + sigma_enl + sigma_ehart + sigma_exc + sigma_esr; + } if ( debug_stress && s_.ctxt_.onpe0() ) { @@ -863,6 +972,7 @@ double EnergyFunctional::energy(bool compute_hpsi, Wavefunction& dwf, void EnergyFunctional::atoms_moved(void) { const AtomSet& atoms = s_.atoms; + const Wavefunction& wf = s_.wf; int ngloc = vbasis_->localsize(); // fill tau0 with values in atom_list @@ -874,6 +984,29 @@ void EnergyFunctional::atoms_moved(void) memset( (void*)&vion_local_g[0], 0, 2*ngloc*sizeof(double) ); memset( (void*)&dvion_local_g[0], 0, 2*ngloc*sizeof(double) ); memset( (void*)&rhopst[0], 0, 2*ngloc*sizeof(double) ); + + if ( core_charge_ ) + { + // recalculate Fourier coefficients of the core charge + assert(rhocore_g.size()==ngloc); + memset( (void*)&rhocore_g[0], 0, 2*ngloc*sizeof(double) ); + const double spin_fac = wf.cell().volume() / wf.nspin(); + for ( int is = 0; is < atoms.nsp(); is++ ) + { + complex *s = &sf.sfac[is][0]; + for ( int ig = 0; ig < ngloc; ig++ ) + { + const complex sg = s[ig]; + // divide core charge by two if two spins + rhocore_g[ig] += spin_fac * sg * rhocore_sp_g[is][ig]; + } + } + // recalculate real-space core charge + vft->backward(&rhocore_g[0],&tmp_r[0]); + for ( int i = 0; i < tmp_r.size(); i++ ) + rhocore_r[i] = tmp_r[i].real(); + } + for ( int is = 0; is < atoms.nsp(); is++ ) { complex *s = &sf.sfac[is][0]; @@ -1007,13 +1140,18 @@ void EnergyFunctional::cell_moved(void) { Species *s = atoms.species_list[is]; const double * const g = vbasis_->g_ptr(); - double v,dv; + double v,dv,rhoc; for ( int ig = 0; ig < ngloc; ig++ ) { rhops[is][ig] = s->rhopsg(g[ig]) * omega_inv; s->dvlocg(g[ig],v,dv); vps[is][ig] = v * omega_inv; dvps[is][ig] = dv * omega_inv; + if ( core_charge_ ) + { + s->rhocoreg(g[ig],rhoc); + rhocore_sp_g[is][ig] = rhoc * omega_inv; + } } } @@ -1035,18 +1173,20 @@ void EnergyFunctional::print(ostream& os) const os.setf(ios::fixed,ios::floatfield); os.setf(ios::right,ios::adjustfield); os << setprecision(8); - os << " " << setw(15) << ekin() << " \n" - << " " << setw(15) << econf() << " \n" - << " " << setw(15) << eps() << " \n" - << " " << setw(15) << enl() << " \n" - << " " << setw(15) << ecoul() << " \n" - << " " << setw(15) << exc() << " \n" - << " " << setw(15) << esr() << " \n" - << " " << setw(15) << eself() << " \n" - << " " << setw(15) << ets() << " \n" - << " " << setw(15) << eexf() << " \n" - << " " << setw(15) << etotal() << " \n" - << flush; + os << " " << setw(15) << ekin() << " \n" + << " " << setw(15) << econf() << " \n" + << " " << setw(15) << eps() << " \n" + << " " << setw(15) << enl() << " \n" + << " " << setw(15) << ecoul() << " \n" + << " " << setw(15) << exc() << " \n" + << " " << setw(15) << esr() << " \n" + << " " << setw(15) << eself() << " \n" + << " " << setw(15) << ets() << " \n" + << " " << setw(15) << eexf() << " \n" + << " " << setw(15) << etotal() << " \n" + << " " << setw(15) << epv() << " \n" + << " " << setw(15) << eefield() << " \n" + << " " << setw(15) << enthalpy() << " " << endl; } //////////////////////////////////////////////////////////////////////////////// diff --git a/src/EnergyFunctional.h b/src/EnergyFunctional.h index 3246f74..f7a624a 100644 --- a/src/EnergyFunctional.h +++ b/src/EnergyFunctional.h @@ -25,8 +25,10 @@ #include #include #include "StructureFactor.h" +#include "ElectricEnthalpy.h" #include "Timer.h" +class D3vector; class Sample; class Basis; class AtomSet; @@ -45,29 +47,35 @@ class EnergyFunctional private: Sample& s_; - const ChargeDensity& cd_; + ChargeDensity& cd_; Basis* vbasis_; FourierTransform *vft; std::vector ft; StructureFactor sf; XCOperator* xco; + ElectricEnthalpy* el_enth_; std::vector > nlp; // nlp[ispin][ikp] std::vector cfp; // cfp[ikp] - std::vector > vps, dvps, rhops; + std::vector > vps, dvps, rhops, rhocore_sp_g; + std::vector rhocore_r; std::vector > tmp_r, vion_local_g, - dvion_local_g, vlocal_g, rhopst, rhogt, rhoelg, vtemp; + dvion_local_g, vxc_g, vlocal_g, rhopst, rhogt, rhoelg, vtemp, rhocore_g; std::vector > tau0, fion_esr; std::vector > fext; std::vector zv_, rcps_; std::vector na_; int nsp_; - double ekin_, econf_, eps_, enl_, ehart_, + double ekin_, econf_, eps_, enl_, ehart_, ehart_e_, ehart_ep_, ehart_p_, ecoul_, exc_, esr_, eself_, ets_, eexf_, etotal_; + double dxc_; + double epv_, eefield_, enthalpy_; std::valarray sigma_ekin,sigma_econf,sigma_eps,sigma_ehart,sigma_exc, sigma_enl, sigma_esr, sigma; + bool core_charge_; + public: std::vector > v_r; @@ -83,12 +91,21 @@ class EnergyFunctional double eps(void) const { return eps_; } double enl(void) const { return enl_; } double ehart(void) const { return ehart_; } + double ehart_e(void) const { return ehart_e_; } + double ehart_ep(void) const { return ehart_ep_; } + double ehart_p(void) const { return ehart_p_; } double ecoul(void) const { return ecoul_; } double exc(void) const { return exc_; } + double dxc(void) const { return dxc_; } double esr(void) const { return esr_; } double eself(void) const { return eself_; } double ets(void) const { return ets_; } double eexf(void) const { return eexf_; } + double eefield(void) const { return eefield_; } + double epv(void) const { return epv_; } + double enthalpy(void) const { return enthalpy_; } + + ElectricEnthalpy* el_enth() { return el_enth_; } const ConfinementPotential *confpot(int ikp) const { return cfp[ikp]; } @@ -99,7 +116,7 @@ class EnergyFunctional void print(std::ostream& os) const; - EnergyFunctional(Sample& s, const ChargeDensity& cd); + EnergyFunctional(Sample& s, ChargeDensity& cd); ~EnergyFunctional(); }; std::ostream& operator << ( std::ostream& os, const EnergyFunctional& e ); diff --git a/src/ExchangeOperator.C b/src/ExchangeOperator.C index efb62ea..cae900e 100644 --- a/src/ExchangeOperator.C +++ b/src/ExchangeOperator.C @@ -37,10 +37,10 @@ using namespace std; //////////////////////////////////////////////////////////////////////////////// ExchangeOperator::ExchangeOperator( Sample& s, double HFCoeff, const InteractionPotential& interaction_potential ) : - s_(s), wf0_(s.wf), dwf0_(s.wf), wfc_(s.wf), - KPGridPerm_(s), KPGridStat_(s), HFCoeff_(HFCoeff), + s_(s), wf0_(s.wf), dwf0_(s.wf), wfc_(s.wf), HFCoeff_(HFCoeff), interaction_potential_(interaction_potential), - coulomb_(interaction_potential.coulomb()) + coulomb_(interaction_potential.coulomb()), + gcontext_(s.wf.sd(0,0)->context()) { eex_ = 0.0; // exchange energy rcut_ = 1.0; // constant of support function for exchange integration @@ -50,9 +50,6 @@ ExchangeOperator::ExchangeOperator( Sample& s, double HFCoeff, // column communicator vcomm_ = s_.wf.sd(0,0)->basis().comm(); - // global context - gcontext_ = s_.wf.sd(0,0)->context(); - // check if the only kpoint is the gamma point: gamma_only_ = ( s_.wf.nkp()==1 ) && ( s_.wf.sd(0,0)->basis().real() ); @@ -216,10 +213,6 @@ ExchangeOperator::ExchangeOperator( Sample& s, double HFCoeff, force_kpi_.resize( nMaxLocalStates_ * mlocMax ); send_buf_forces_.resize( nMaxLocalStates_ * mlocMax ); } - - // set indices for overlaps - KPGridPerm_.SetOverlapIndices(vbasis_); - KPGridStat_.SetOverlapIndices(vbasis_); } //////////////////////////////////////////////////////////////////////////////// @@ -328,7 +321,7 @@ void ExchangeOperator::apply_VXC_(double mix, Wavefunction& wf_ref, ComplexMatrix &c(s_.wf.sd(ispin,ikp)->c()); ComplexMatrix &dc(dwf.sd(ispin,ikp)->c()); ComplexMatrix &cref(wf_ref.sd(ispin,ikp)->c()); - ComplexMatrix &dcref(dwf_ref.sd(ispin,ikp)->c()); + ComplexMatrix &dcref(dwf_ref.sd(ispin,ikp)->c()); // matproj1 = matproj1.gemm('c','n',1.0,cref,c,0.0); @@ -372,8 +365,6 @@ void ExchangeOperator::add_stress(valarray& sigma_exc) void ExchangeOperator::cell_moved(void) { vbasis_->resize( s_.wf.cell(),s_.wf.refcell(),4.0*s_.wf.ecut()); - KPGridStat_.cell_moved(); - KPGridPerm_.cell_moved(); } //////////////////////////////////////////////////////////////////////////////// @@ -393,8 +384,6 @@ double ExchangeOperator::compute_exchange_for_general_case_( Sample* s, Timer tm; tm.start(); - bool quad_correction = s_.ctrl.debug.find("EXCHANGE_NOQUAD") == string::npos; - const Wavefunction& wf = s->wf; const double omega = wf.cell().volume(); @@ -422,10 +411,6 @@ double ExchangeOperator::compute_exchange_for_general_case_( Sample* s, for ( int iKpi = 0; iKpi < nkpoints; iKpi++ ) numerical_correction[iKpi]=0.0; - // initialize overlaps - KPGridPerm_.InitOverlaps(); - KPGridStat_.InitOverlaps(); - // loop over the kpoints for ( int iKpi = 0; iKpi < nkpoints; iKpi++ ) { @@ -481,14 +466,6 @@ double ExchangeOperator::compute_exchange_for_general_case_( Sample* s, dstatei_[i][j]=0.0; } - if ( KPGridPerm_.Connection() ) - { - if ( iRotationStep != 0 ) - { - KPGridPerm_.EndPermutation(); - } - } - // set number of states of next permutation step // (contained in nNextStatesKpi_) SetNextPermutationStateNumber(); @@ -571,7 +548,7 @@ double ExchangeOperator::compute_exchange_for_general_case_( Sample* s, // if iKpi=0 (first k point) // compute the numerical part of the correction - if ( (iRotationStep==0) ) + if ( iRotationStep==0 ) { const double rc2 = rcut_*rcut_; if ( qpG21_[ig] > 0.0 ) @@ -586,7 +563,7 @@ double ExchangeOperator::compute_exchange_for_general_case_( Sample* s, // correction. Works only if this is the first iKpoint. // // divide weight by 2 as we implicitly counted kpoint j and symmetric - if ( (iRotationStep==0) ) // && ( iKpi==0 ) + if ( iRotationStep==0 ) // && ( iKpi==0 ) { if ( iKpi==iKpj ) { @@ -626,24 +603,6 @@ double ExchangeOperator::compute_exchange_for_general_case_( Sample* s, vft_->forward(&rhor1_[0], &rhog1_[0]); vft_->forward(&rhor2_[0], &rhog2_[0]); - // if we use the connexion of the kpoint grid - if ( KPGridPerm_.Connection() ) - { - // keep the overlap and symmetric - if ( iKpi==iKpj ) - { - KPGridPerm_.AddOverlap(iKpi, iKpj, i, &rhog1_[0], - &rhog2_[0], occ_kj_[j] * spinFactor); - } - else - { - KPGridPerm_.AddOverlap(iKpi, iKpj, i, &rhog1_[0], - &rhog2_[0], occ_kj_[j] * spinFactor); - KPGridStat_.AddOverlap(iKpj, iKpi, j, &rhog1_[0], - &rhog2_[0], occ_ki_[i] * spinFactor); - } - } - // initialize contrib of the states psi(kj,j) to the exchange // energy associated to state psi(ki,i) double ex_ki_i_kj_j = 0.0; @@ -813,12 +772,6 @@ double ExchangeOperator::compute_exchange_for_general_case_( Sample* s, StartForcesPermutation(dci.mloc()); } - // if we use the Kpoint grid connections - if ( KPGridPerm_.Connection() ) - { - KPGridPerm_.StartPermutation(iKpi, iSendTo_, iRecvFr_); - } - CompleteSendingOccupations(iRotationStep); // start occupations permutation @@ -845,11 +798,6 @@ double ExchangeOperator::compute_exchange_for_general_case_( Sample* s, CompleteSendingForces(1); } - // if we use the Kpoint grid connections - if ( KPGridPerm_.Connection() ) - { - KPGridPerm_.EndPermutation(); - } // Terminate permutation FreePermutation(); } @@ -865,9 +813,9 @@ double ExchangeOperator::compute_exchange_for_general_case_( Sample* s, double div_corr = 0.0; const double div_corr_1 = exfac * numerical_correction[iKpi] * - occ_ki_[i]; + occ_ki_[i]; div_corr += div_corr_1; - const double e_div_corr_1 = -div_corr_1; + const double e_div_corr_1 = -0.5 * div_corr_1 * occ_ki_[i]; exchange_sum += e_div_corr_1 * wf.weight(iKpi); // add here contributions to stress from div_corr_1; @@ -875,9 +823,10 @@ double ExchangeOperator::compute_exchange_for_general_case_( Sample* s, if ( vbasis_->mype() == 0 ) { // for screened Coulomb potential O(Omega^{-5/3}) - if ( coulomb_ ) { + if ( coulomb_ ) + { const double div_corr_2 = - exfac * rcut_ * rcut_ * occ_ki_[i] * - KPGridPerm_.weight(iKpi); + wf.weight(iKpi); div_corr += div_corr_2; const double e_div_corr_2 = -0.5 * div_corr_2 * occ_ki_[i]; exchange_sum += e_div_corr_2 * wf.weight(iKpi); @@ -892,50 +841,6 @@ double ExchangeOperator::compute_exchange_for_general_case_( Sample* s, // no contribution to stress from div_corr_3 } - // Quadratic corrections - // only implemented for Coulomb potential - if ( quad_correction and coulomb_ ) - { - // beta_x, beta_y and beta_z: curvature of rho(G=0) - double s0=KPGridPerm_.overlaps_local(iKpi,i); - - double s1_x=KPGridPerm_.overlaps_first_kx(iKpi,i)+ - KPGridStat_.overlaps_first_kx(iKpi,i); - double s2_x=KPGridPerm_.overlaps_second_kx(iKpi,i)+ - KPGridStat_.overlaps_second_kx(iKpi,i); - double d1_x=KPGridPerm_.distance_first_kx(iKpi); - double d2_x=KPGridPerm_.distance_second_kx(iKpi); - double beta_x=(s1_x+s2_x-2.0*s0)/(d1_x*d1_x+d2_x*d2_x)* - KPGridPerm_.integral_kx(iKpi); - - double s1_y=KPGridPerm_.overlaps_first_ky(iKpi,i)+ - KPGridStat_.overlaps_first_ky(iKpi,i); - double s2_y=KPGridPerm_.overlaps_second_ky(iKpi,i)+ - KPGridStat_.overlaps_second_ky(iKpi,i); - double d1_y=KPGridPerm_.distance_first_ky(iKpi); - double d2_y=KPGridPerm_.distance_second_ky(iKpi); - double beta_y=(s1_y+s2_y-2.0*s0)/(d1_y*d1_y+d2_y*d2_y)* - KPGridPerm_.integral_ky(iKpi); - - double s1_z=KPGridPerm_.overlaps_first_kz(iKpi,i)+ - KPGridStat_.overlaps_first_kz(iKpi,i); - double s2_z=KPGridPerm_.overlaps_second_kz(iKpi,i)+ - KPGridStat_.overlaps_second_kz(iKpi,i); - double d1_z=KPGridPerm_.distance_first_kz(iKpi); - double d2_z=KPGridPerm_.distance_second_kz(iKpi); - double beta_z=(s1_z+s2_z-2.0*s0)/(d1_z*d1_z+d2_z*d2_z)* - KPGridPerm_.integral_kz(iKpi); - - // note: factor occ_ki_[i] * spinFactor already in beta - const double beta_sum = beta_x + beta_y + beta_z ; - const double div_corr_4 = (4.0 * M_PI / omega ) * beta_sum * - KPGridPerm_.weight(iKpi); - div_corr += div_corr_4; - const double e_div_corr_4 = -0.5 * div_corr_4 * occ_ki_[i]; - exchange_sum += e_div_corr_4 * wf.weight(iKpi); - - } // if quad_correction - // contribution of divergence corrections to forces on wave functions if (dwf) { @@ -961,13 +866,14 @@ double ExchangeOperator::compute_exchange_for_general_case_( Sample* s, extot *= HFCoeff_; tm.stop(); +#ifdef DEBUG if ( gcontext_.onpe0() ) { cout << setprecision(10); - cout << " total exchange = " << extot << " (a.u.)\n"; cout << " total exchange computation time: " << tm.real() << " s" << endl; } +#endif return extot; } @@ -979,10 +885,6 @@ double ExchangeOperator::compute_exchange_at_gamma_(const Wavefunction &wf, Timer tm; Timer tmb; - bool quad_correction = s_.ctrl.debug.find("EXCHANGE_NOQUAD") == string::npos; - - assert(KPGridPerm_.Connection()); - wfc_ = wf; cout << setprecision(10); @@ -1011,8 +913,19 @@ double ExchangeOperator::compute_exchange_at_gamma_(const Wavefunction &wf, if ( use_bisection_ ) { tmb.start(); - const int maxsweep = 50; - double tol = 1.e-6; + int maxsweep = 50; + if ( s_.ctrl.debug.find("BISECTION_MAXSWEEP") != string::npos ) + { + // override tolerance for bisection + istringstream is(s_.ctrl.debug); + string s; + is >> s >> maxsweep; + if ( gcontext_.onpe0() ) + cout << " override bisection maxsweep value: maxsweep = " + << maxsweep << endl; + assert(maxsweep >= 0); + } + double tol = 0.001; if ( s_.ctrl.debug.find("BISECTION_TOL") != string::npos ) { // override tolerance for bisection @@ -1023,18 +936,44 @@ double ExchangeOperator::compute_exchange_at_gamma_(const Wavefunction &wf, cout << " override bisection tol value: tol = " << tol << endl; assert(tol > 0.0); } +#if TIMING + Timer tmbtransf; + tmbtransf.start(); +#endif bisection_[ispin]->compute_transform(*wfc_.sd(ispin,0),maxsweep,tol); +#if TIMING + tmbtransf.stop(); + Timer tmbcomploc; + tmbcomploc.start(); +#endif bisection_[ispin]->compute_localization(s_.ctrl.btHF); +#if TIMING + tmbcomploc.stop(); +#endif // copy of localization vector from Bisection object localization_ = bisection_[ispin]->localization(); +#if TIMING + Timer tmbsize, tmbpair; + tmbsize.start(); +#endif if ( gcontext_.onpe0() ) { cout << " ExchangeOperator: bisection size: ispin=" << ispin << ": " << bisection_[ispin]->total_size() << endl; + } +#if TIMING + tmbsize.stop(); + tmbpair.start(); +#endif + if ( gcontext_.onpe0() ) + { cout << " ExchangeOperator: pair fraction: ispin=" << ispin << ": " << bisection_[ispin]->pair_fraction() << endl; } +#if TIMING + tmbpair.stop(); +#endif // copy the orthogonal transformation u to uc_[ispin] *uc_[ispin] = bisection_[ispin]->u(); @@ -1048,6 +987,10 @@ double ExchangeOperator::compute_exchange_at_gamma_(const Wavefunction &wf, // compute the degree of the vertices of the exchange graph // using the localization vector +#if TIMING + Timer tmb_ov; + tmb_ov.start(); +#endif vector degree(nst); for ( int i = 0; i < nst; i++ ) { @@ -1059,6 +1002,15 @@ double ExchangeOperator::compute_exchange_at_gamma_(const Wavefunction &wf, } degree[i] = count; } +#if TIMING + tmb_ov.stop(); + if ( gcontext_.onpe0() ) + { + cout << setprecision(3); + cout << " ExchangeOperator: bisection overlap time: " + << tmb_ov.real() << " s" << endl; + } +#endif // permutation index vector index(nst); @@ -1215,8 +1167,27 @@ double ExchangeOperator::compute_exchange_at_gamma_(const Wavefunction &wf, } #endif +#if TIMING + Timer tmblapiv; + tmblapiv.start(); +#endif // apply the permutation to the columns of uc uc_[ispin]->lapiv('B','C',&locpivot[0]); +#if TIMING + tmblapiv.stop(); + if ( gcontext_.onpe0() ) + { + cout << setprecision(3); + cout << " ExchangeOperator: bisection size time: " + << tmbsize.real() << " s" << endl; + cout << setprecision(3); + cout << " ExchangeOperator: bisection pair time: " + << tmbpair.real() << " s" << endl; + cout << setprecision(3); + cout << " ExchangeOperator: bisection lapiv time: " + << tmblapiv.real() << " s" << endl; + } +#endif #if DEBUG // recompute the degree of the vertices of the exchange graph @@ -1269,11 +1240,29 @@ double ExchangeOperator::compute_exchange_at_gamma_(const Wavefunction &wf, cout << " ExchangeOperator: bisection distribution disabled" << endl; } // if distribute +#if TIMING + Timer tmbfwd; + tmbfwd.start(); +#endif bisection_[ispin]->forward(*uc_[ispin], *wfc_.sd(ispin,0)); +#if TIMING + tmbfwd.stop(); +#endif tmb.stop(); if ( gcontext_.onpe0() ) { +#if TIMING + cout << setprecision(3); + cout << " ExchangeOperator: bisection compute transform time: " + << tmbtransf.real() << " s" << endl; + cout << setprecision(3); + cout << " ExchangeOperator: bisection compute localization time: " + << tmbcomploc.real() << " s" << endl; + cout << setprecision(3); + cout << " ExchangeOperator: bisection forward time: " + << tmbfwd.real() << " s" << endl; +#endif cout << setprecision(3); cout << " ExchangeOperator: bisection time: " << tmb.real() << " s" << endl; @@ -1283,9 +1272,6 @@ double ExchangeOperator::compute_exchange_at_gamma_(const Wavefunction &wf, tm.start(); // compute exchange - // initialize overlaps - KPGridPerm_.InitOverlaps(); - KPGridStat_.InitOverlaps(); // real-space local states -> statej_[i][ir] // loop over states 2 by 2 @@ -1294,10 +1280,11 @@ double ExchangeOperator::compute_exchange_at_gamma_(const Wavefunction &wf, wft_->backward(c.cvalptr(i*c.mloc()), c.cvalptr((i+1)*c.mloc()), &tmp_[0]); double *p = (double *)&tmp_[0]; - for ( int ir = 0, irp = 0; ir < np012loc_; ir++, irp+=2 ) +#pragma omp parallel for + for ( int ir = 0; ir < np012loc_; ir++ ) { - statej_[i][ir]=p[irp]; - statej_[i+1][ir]=p[irp+1]; + statej_[i][ir]=p[2*ir]; + statej_[i+1][ir]=p[2*ir+1]; } } @@ -1507,10 +1494,11 @@ double ExchangeOperator::compute_exchange_at_gamma_(const Wavefunction &wf, // copy the result in state[i] and state[j] double *p = (double *)&tmp_[0]; - for ( int ir = 0, irp = 0; ir < np012loc_; ir++, irp+=2 ) +#pragma omp parallel for + for ( int ir = 0; ir < np012loc_; ir++ ) { - statei_[i][ir]=p[irp]; - statei_[j][ir]=p[irp+1]; + statei_[i][ir]=p[2*ir]; + statei_[j][ir]=p[2*ir+1]; } } @@ -1541,9 +1529,6 @@ double ExchangeOperator::compute_exchange_at_gamma_(const Wavefunction &wf, dstatei_[i][j] = 0.0; } - if ( iRotationStep != 0 ) - KPGridPerm_.EndPermutation(); - // nNextStatesKpi: number of states of next permutation step SetNextPermutationStateNumber(); // start sending states in send_buf_states_ @@ -1573,7 +1558,8 @@ double ExchangeOperator::compute_exchange_at_gamma_(const Wavefunction &wf, double *pj1 = (double *)&statej_[j1][0]; double *pj2 = (double *)&statej_[j2][0]; - for ( int ir = 0, ip = 0; ir < np012loc_; ir++, ip+=2 ) +#pragma omp parallel for + for ( int ip = 0; ip < 2*np012loc_; ip+=2 ) { p[ip] = pi1[ip] * pj1[ip]; p[ip+1] = pi2[ip] * pj2[ip]; @@ -1583,37 +1569,6 @@ double ExchangeOperator::compute_exchange_at_gamma_(const Wavefunction &wf, // Fourier transform the pair density vft_->forward(&rhor1_[0], &rhog1_[0], &rhog2_[0]); - // if i1 = j1 and iRotation=0, add the contribution to the - // state psi(0,i1) - if ( ( i1==j1 ) && ( iRotationStep==0 ) ) - { - KPGridPerm_.AddOverlap(0, 0, i1, &rhog1_[0], &rhog1_[0], - occ_kj_[j1] * spinFactor); - } - // otherwise, add contributions for both states - else - { - KPGridPerm_.AddOverlap(0, 0, i1, &rhog1_[0], &rhog1_[0], - occ_kj_[j1] * spinFactor); - KPGridStat_.AddOverlap(0, 0, j1, &rhog1_[0], &rhog1_[0], - occ_ki_[i1] * spinFactor); - } - // if i2=j2 and iRotation=0, add the contribution to the - // state psi(0,i2) - if ( ( i2==j2 ) && ( iRotationStep==0 ) ) - { - KPGridPerm_.AddOverlap(0, 0, i2, &rhog2_[0], &rhog2_[0], - occ_kj_[j2] * spinFactor); - } - // otherwise, add contributions for both states - else - { - KPGridPerm_.AddOverlap(0, 0, i2, &rhog2_[0], &rhog2_[0], - occ_kj_[j2] * spinFactor); - KPGridStat_.AddOverlap(0, 0, j2, &rhog2_[0], &rhog2_[0], - occ_ki_[i2] * spinFactor); - } - // compute contributions to the exchange energy and forces on wfs ex_sum_1 = 0.0; ex_sum_2 = 0.0; @@ -1709,7 +1664,8 @@ double ExchangeOperator::compute_exchange_at_gamma_(const Wavefunction &wf, double *dp = (double *) &dstatei_[i1][0]; double *pj = (double *) &statej_[j1][0]; double *pr = (double *) &rhor1_[0]; - for ( int ir = 0, ip = 0; ir < np012loc_; ir++, ip +=2 ) +#pragma omp parallel for + for ( int ip = 0; ip < 2*np012loc_; ip+=2 ) dp[ip] += pj[ip] * pr[ip] * weight; } } @@ -1734,7 +1690,8 @@ double ExchangeOperator::compute_exchange_at_gamma_(const Wavefunction &wf, double *pj = (double *) &statej_[j1][0]; double *pr = (double *) &rhor1_[0]; - for ( int ir = 0, ip = 0; ir < np012loc_; ir++, ip +=2 ) +#pragma omp parallel for + for ( int ip = 0; ip < 2*np012loc_; ip+=2 ) { dpi[ip] += pj[ip] * pr[ip] * weightj; dpj[ip] += pi[ip] * pr[ip] * weighti; @@ -1763,7 +1720,8 @@ double ExchangeOperator::compute_exchange_at_gamma_(const Wavefunction &wf, double *pr = (double *) &rhor1_[0]; pr = pr + 1; - for ( int ir = 0, ip = 0; ir < np012loc_; ir++, ip +=2 ) +#pragma omp parallel for + for ( int ip = 0; ip < 2*np012loc_; ip+=2 ) dp[ip] += pj[ip] * pr[ip] * weight; } } @@ -1789,7 +1747,8 @@ double ExchangeOperator::compute_exchange_at_gamma_(const Wavefunction &wf, double *pr = (double *) &rhor1_[0]; pr = pr + 1; - for ( int ir = 0, ip = 0; ir < np012loc_; ir++, ip +=2 ) +#pragma omp parallel for + for ( int ip = 0; ip < 2*np012loc_; ip+=2 ) { dpi[ip] += pj[ip] * pr[ip] * weightj; dpj[ip] += pi[ip] * pr[ip] * weighti; @@ -1808,7 +1767,8 @@ double ExchangeOperator::compute_exchange_at_gamma_(const Wavefunction &wf, double *p = (double *)&rhor1_[0]; double *pi1 = (double *)&statei_[i1][0]; double *pj1 = (double *)&statej_[j1][0]; - for ( int ir = 0, ip=0; ir < np012loc_; ir++, ip+=2 ) +#pragma omp parallel for + for ( int ip = 0; ip < 2*np012loc_; ip+=2 ) { p[ip] = pi1[ip] * pj1[ip]; p[ip+1] = 0.0; @@ -1816,22 +1776,6 @@ double ExchangeOperator::compute_exchange_at_gamma_(const Wavefunction &wf, vft_->forward(&rhor1_[0], &rhog1_[0]); - // if i1=j1 and iRotation=0, add the contribution to the - // state psi(0,i1) - if ( ( i1==j1 ) && ( iRotationStep==0 ) ) - { - KPGridPerm_.AddOverlap(0, 0, i1, &rhog1_[0], &rhog1_[0], - occ_kj_[j1] * spinFactor); - } - // otherwise, add contributions for both states - else - { - KPGridPerm_.AddOverlap(0, 0, i1, &rhog1_[0], &rhog1_[0], - occ_kj_[j1] * spinFactor); - KPGridStat_.AddOverlap(0, 0, j1, &rhog1_[0], &rhog1_[0], - occ_ki_[i1] * spinFactor); - } - ex_sum_1 = 0.0; if ( compute_stress ) { @@ -1904,7 +1848,8 @@ double ExchangeOperator::compute_exchange_at_gamma_(const Wavefunction &wf, double *dp = (double *) &dstatei_[i1][0]; double *pj = (double *) &statej_[j1][0]; double *pr = (double *) &rhor1_[0]; - for ( int ir = 0, ip = 0; ir < np012loc_; ir++, ip +=2 ) +#pragma omp parallel for + for ( int ip = 0; ip < 2*np012loc_; ip+=2 ) dp[ip] += pj[ip] * pr[ip] * weight; } } @@ -1929,7 +1874,8 @@ double ExchangeOperator::compute_exchange_at_gamma_(const Wavefunction &wf, double *pj = (double *) &statej_[j1][0]; double *pr = (double *) &rhor1_[0]; - for ( int ir = 0, ip = 0; ir < np012loc_; ir++, ip +=2 ) +#pragma omp parallel for + for ( int ip = 0; ip < 2*np012loc_; ip+=2 ) { dpi[ip] += pj[ip] * pr[ip] * weightj; dpj[ip] += pi[ip] * pr[ip] * weighti; @@ -1984,10 +1930,11 @@ double ExchangeOperator::compute_exchange_at_gamma_(const Wavefunction &wf, double *p = (double *)&tmp_[0]; double *dpr = (double *)&dstatei_[i][0]; double *dpi = (double *)&dstatei_[j][0]; - for (int ir = 0, irp = 0; ir < np012loc_; ir++, irp+=2) +#pragma omp parallel for + for ( int ip = 0; ip < 2*np012loc_; ip+=2 ) { - p[irp]=dpr[irp]; - p[irp+1]=dpi[irp]; + p[ip]=dpr[ip]; + p[ip+1]=dpi[ip]; } // transform the pair of states @@ -2023,7 +1970,6 @@ double ExchangeOperator::compute_exchange_at_gamma_(const Wavefunction &wf, StartForcesPermutation(dc.mloc()); } // if dwf - KPGridPerm_.StartPermutation(0, iSendTo_, iRecvFr_); CompleteSendingOccupations(iRotationStep); StartOccupationsPermutation(); @@ -2094,8 +2040,6 @@ double ExchangeOperator::compute_exchange_at_gamma_(const Wavefunction &wf, CompleteSendingForces(1); } - KPGridPerm_.EndPermutation(); - FreePermutation(); // transform accumulated real-space forces to G space @@ -2105,10 +2049,11 @@ double ExchangeOperator::compute_exchange_at_gamma_(const Wavefunction &wf, double *p = (double *)&tmp_[0]; double *dpr = (double *)&dstatej_[i][0]; double *dpi = (double *)&dstatej_[i+1][0]; - for ( int ir = 0, irp = 0; ir < np012loc_; ir++, irp+=2 ) +#pragma omp parallel for + for ( int ip = 0; ip < 2*np012loc_; ip+=2 ) { - p[irp]=dpr[irp]; - p[irp+1]=dpi[irp]; + p[ip]=dpr[ip]; + p[ip+1]=dpi[ip]; } // transform the pair of forces wft_->forward(&(tmp_)[0], &buffer_forces_1_[0], &buffer_forces_2_[0]); @@ -2191,51 +2136,6 @@ double ExchangeOperator::compute_exchange_at_gamma_(const Wavefunction &wf, // no contribution to stress } - // Quadratic corrections (only implemented for Coulomb potential) - if ( quad_correction and coulomb_ ) - { - // compute the curvature terms - // quadratic exchange correction - double s0=KPGridPerm_.overlaps_local(0,i); - double s1_x=KPGridPerm_.overlaps_first_kx(0,i)+ - KPGridStat_.overlaps_first_kx(0,i); - double s2_x=KPGridPerm_.overlaps_second_kx(0,i)+ - KPGridStat_.overlaps_second_kx(0,i); - double d1_x=KPGridPerm_.distance_first_kx(0); - double d2_x=KPGridPerm_.distance_second_kx(0); - double beta_x=(s1_x+s2_x-2.0*s0)/(d1_x*d1_x+d2_x*d2_x)* - KPGridPerm_.integral_kx(0); - - double s1_y=KPGridPerm_.overlaps_first_ky(0,i)+ - KPGridStat_.overlaps_first_ky(0,i); - double s2_y=KPGridPerm_.overlaps_second_ky(0,i)+ - KPGridStat_.overlaps_second_ky(0,i); - double d1_y=KPGridPerm_.distance_first_ky(0); - double d2_y=KPGridPerm_.distance_second_ky(0); - double beta_y=(s1_y+s2_y-2.0*s0)/(d1_y*d1_y+d2_y*d2_y)* - KPGridPerm_.integral_ky(0); - - double s1_z=KPGridPerm_.overlaps_first_kz(0,i)+ - KPGridStat_.overlaps_first_kz(0,i); - double s2_z=KPGridPerm_.overlaps_second_kz(0,i)+ - KPGridStat_.overlaps_second_kz(0,i); - double d1_z=KPGridPerm_.distance_first_kz(0); - double d2_z=KPGridPerm_.distance_second_kz(0); - double beta_z=(s1_z+s2_z-2.0*s0)/(d1_z*d1_z+d2_z*d2_z)* - KPGridPerm_.integral_kz(0); - - // note: factor occ_ki_[i] * spinFactor already in beta - const double beta_sum = beta_x + beta_y + beta_z ; - const double div_corr_4 = (4.0 * M_PI / omega ) * beta_sum; - div_corr += div_corr_4; - const double e_div_corr_4 = -0.5 * div_corr_4 * occ_ki_[i]; - exchange_sum += e_div_corr_4; - const double fac4 = ( 4.0 * M_PI / omega ) * occ_ki_[i]; - sigma_exhf_[0] += ( e_div_corr_4 + fac4 * beta_x ) / omega; - sigma_exhf_[1] += ( e_div_corr_4 + fac4 * beta_y ) / omega; - sigma_exhf_[2] += ( e_div_corr_4 + fac4 * beta_z ) / omega; - } - // contribution of divergence corrections to forces on wave functions if (dwf) { @@ -2277,8 +2177,7 @@ double ExchangeOperator::compute_exchange_at_gamma_(const Wavefunction &wf, #ifdef DEBUG if ( gcontext_.onpe0() ) { - cout << setprecision(10); - cout << " total exchange = " << extot << " (a.u.)\n"; + cout << setprecision(3); cout << " total exchange computation time: " << tm.real() << " s" << endl; if ( compute_stress ) diff --git a/src/ExchangeOperator.h b/src/ExchangeOperator.h index c03d3f0..be038d2 100644 --- a/src/ExchangeOperator.h +++ b/src/ExchangeOperator.h @@ -19,7 +19,6 @@ #include "Sample.h" #include "SlaterDet.h" #include "FourierTransform.h" -#include "KPGridConnectivity.h" #include "InteractionPotential.h" #ifndef EXCHANGEOPERATOR_H @@ -54,10 +53,6 @@ class ExchangeOperator void apply_VXC_(double mix, Wavefunction& wf_ref, Wavefunction& dwf_ref, Wavefunction& dwf); - // Connectivity of the kpoint Grid - KPConnectivity KPGridPerm_; - KPConnectivity KPGridStat_; - // basis for pair densities Basis* vbasis_; int np0v_, np1v_, np2v_; @@ -117,7 +112,7 @@ class ExchangeOperator vector > buffer_forces_2_; // contexts and communicators - Context gcontext_; + const Context &gcontext_; MPI_Comm vcomm_; // Communications diff --git a/src/ExponentialIntegral.C b/src/ExponentialIntegral.C index e2d7788..b0dcf78 100644 --- a/src/ExponentialIntegral.C +++ b/src/ExponentialIntegral.C @@ -28,7 +28,8 @@ using namespace std; -namespace util { +namespace util +{ // Calculate the exponential integral E_1(x): // @@ -42,16 +43,19 @@ namespace util { // // Input: x - position at which exponential integral is evaluated (x > 0) // Return: E1(x) -double E1(const double x) { +double E1(const double x) +{ - if (x < series_cutoff) { + if (x < series_cutoff) + { // use series expansion return E1_series(x); - } else { + } + else + { // use gauss_laguerre expression return exp(-x) * gauss_laguerre(x); } - } // Series expansion of the exponential integral @@ -68,8 +72,8 @@ double E1(const double x) { // n_cut is set to 25 // Input: x - position at which exponential integral is evaluated (x > 0) // Return: approximation by series expansion for E_1(x) -double E1_series(const double x) { - +double E1_series(const double x) +{ // Euler constant const double EULER_GAMMA = 0.57721566490153286060651209008241; @@ -80,7 +84,8 @@ double E1_series(const double x) { double res = 0.0; // perform the summation - for (int it = itmax; it > 1; it--) { + for (int it = itmax; it > 1; it--) + { // calculate 1/n const double fact = 1.0 / it; // add next term of summation @@ -89,7 +94,6 @@ double E1_series(const double x) { // add everything up return -EULER_GAMMA - log(x) + x * (1.0 - res); - } // The Gauss Laguerre expansion of the exponential integral can be written as @@ -105,8 +109,8 @@ double E1_series(const double x) { // where the a_n and x_n are determined by least quadrature (see reference) // Input: x0 - point at which Gaussian Laguerre quadrature is calculated // Return: E_1(x0) / exp(-x0) in this approximation -double gauss_laguerre(const double x0) { - +double gauss_laguerre(const double x0) +{ // initialize constants a_n and x_n const double size = 15; const double a[] = { 0.2182348859400869e+00, 0.3422101779228833e+00, @@ -126,12 +130,12 @@ double gauss_laguerre(const double x0) { double res = 0.0; // evaluate a_n / ( x_n + x0 ) - for ( int i = 0; i < size; i++ ) { + for ( int i = 0; i < size; i++ ) + { res += a[i] / (x[i] + x0); } return res; - } } diff --git a/src/ExponentialIntegral.h b/src/ExponentialIntegral.h index bc9587f..aa62487 100644 --- a/src/ExponentialIntegral.h +++ b/src/ExponentialIntegral.h @@ -20,10 +20,6 @@ // Tseng, Lee, Journal of Hydrology, 205 (1998) 38-51 // //////////////////////////////////////////////////////////////////////////////// -// -// Author: Martin Schlipf (2013) -// Contact: martin.schlipf@gmail.com -// #ifndef EXPONENTIAL_INTEGRAL_H #define EXPONENTIAL_INTEGRAL_H diff --git a/src/ExtForce.C b/src/ExtForce.C index be38d9d..55bdb07 100644 --- a/src/ExtForce.C +++ b/src/ExtForce.C @@ -12,10 +12,9 @@ // //////////////////////////////////////////////////////////////////////////////// // -// ExtForce.C +// ExtForce.C // //////////////////////////////////////////////////////////////////////////////// -// $Id: ExtForce.C,v 1.1 2010-02-20 23:13:02 fgygi Exp $ #include "ExtForce.h" #include diff --git a/src/ExtForce.h b/src/ExtForce.h index b0ad43c..3c8abc4 100644 --- a/src/ExtForce.h +++ b/src/ExtForce.h @@ -12,10 +12,9 @@ // //////////////////////////////////////////////////////////////////////////////// // -// ExtForce.h +// ExtForce.h // //////////////////////////////////////////////////////////////////////////////// -// $Id: ExtForce.h,v 1.1 2010-02-20 23:13:02 fgygi Exp $ #ifndef EXTFORCE_H #define EXTFORCE_H diff --git a/src/ExtForceCmd.h b/src/ExtForceCmd.h index 1184fe0..a721ff5 100644 --- a/src/ExtForceCmd.h +++ b/src/ExtForceCmd.h @@ -15,7 +15,6 @@ // ExtForceCmd.h: // //////////////////////////////////////////////////////////////////////////////// -// $Id: ExtForceCmd.h,v 1.1 2010-02-20 23:13:02 fgygi Exp $ #ifndef EXTFORCECMD_H #define EXTFORCECMD_H diff --git a/src/ExtForceSet.C b/src/ExtForceSet.C index 162cef8..bb30b9e 100644 --- a/src/ExtForceSet.C +++ b/src/ExtForceSet.C @@ -15,7 +15,6 @@ // ExtForceSet.C // //////////////////////////////////////////////////////////////////////////////// -// $Id: ExtForceSet.C,v 1.1 2010-02-20 23:13:02 fgygi Exp $ #include "ExtForceSet.h" #include "AtomicExtForce.h" diff --git a/src/ExtForceSet.h b/src/ExtForceSet.h index 9b98302..1444dde 100644 --- a/src/ExtForceSet.h +++ b/src/ExtForceSet.h @@ -15,7 +15,6 @@ // ExtForceSet.h // //////////////////////////////////////////////////////////////////////////////// -// $Id: ExtForceSet.h,v 1.1 2010-02-20 23:13:02 fgygi Exp $ #ifndef EXTFORCESET_H #define EXTFORCESET_H diff --git a/src/ExtStress.h b/src/ExtStress.h index 2d08bc6..171dc13 100644 --- a/src/ExtStress.h +++ b/src/ExtStress.h @@ -15,7 +15,6 @@ // ExtStress.h // //////////////////////////////////////////////////////////////////////////////// -// $Id: ExtStress.h,v 1.5 2008-09-08 15:56:18 fgygi Exp $ #ifndef EXTSTRESS_H #define EXTSTRESS_H diff --git a/src/FermiTemp.h b/src/FermiTemp.h index 8e14b9e..4085900 100644 --- a/src/FermiTemp.h +++ b/src/FermiTemp.h @@ -15,7 +15,6 @@ // FermiTemp.h // //////////////////////////////////////////////////////////////////////////////// -// $Id: FermiTemp.h,v 1.4 2008-09-08 15:56:18 fgygi Exp $ #ifndef FERMITEMP_H #define FERMITEMP_H diff --git a/src/FoldInWsCmd.h b/src/FoldInWsCmd.h index 2649413..7f4fc11 100644 --- a/src/FoldInWsCmd.h +++ b/src/FoldInWsCmd.h @@ -15,7 +15,6 @@ // FoldInWsCmd.h: // //////////////////////////////////////////////////////////////////////////////// -// $Id: FoldInWsCmd.h,v 1.3 2008-09-08 15:56:18 fgygi Exp $ #ifndef FOLDINWSCMD_H #define FOLDINWSCMD_H diff --git a/src/FourierTransform.C b/src/FourierTransform.C index dfd1ab1..b1b3820 100644 --- a/src/FourierTransform.C +++ b/src/FourierTransform.C @@ -27,6 +27,11 @@ #if _OPENMP #include +#else +// _OPENMP is not defined +#if defined(USE_FFTW3_THREADS) +#error "Need OpenMP to use FFTW3 threads" +#endif #endif #if USE_MPI @@ -35,13 +40,32 @@ typedef int MPI_Comm; #endif -#if USE_FFTW +#if defined(USE_FFTW2) || defined(USE_FFTW3) #ifdef ADD_ #define zdscal zdscal_ +#define zcopy zcopy_ +#endif +#endif + +#if defined(USE_FFTW2) +#if defined(FFTWMEASURE) +#define FFTW_ALGO FFTW_MEASURE +#else +#define FFTW_ALGO FFTW_ESTIMATE +#endif +#endif + +#if defined(USE_FFTW3) +#if defined(FFTWMEASURE) +#define FFTW_ALGO ( FFTW_MEASURE | FFTW_UNALIGNED ) +#else +#define FFTW_ALGO ( FFTW_ESTIMATE | FFTW_UNALIGNED ) +#endif #endif +#if defined(USE_FFTW2) || defined(USE_FFTW3) extern "C" void zdscal(int *n,double *alpha,std::complex *x,int *incx); -#elif USE_ESSL +#elif USE_ESSL_FFT extern "C" { void dcft_(int *initflag, std::complex *x, int *inc2x, int *inc3x, std::complex *y, int *inc2y, int *inc3y, @@ -55,10 +79,11 @@ extern "C" { double *aux2, int *naux2); #define USE_GATHER_SCATTER 1 } -#else -#define NO_FFT_LIB 1 +#elif defined(FFT_NOLIB) void cfftm ( std::complex *ain, std::complex *aout, double scale, int ntrans, int length, int ainc, int ajmp, int idir ); +#else +#error "Must define USE_FFTW2, USE_FFTW3, USE_ESSL_FFT or FFT_NOLIB" #endif #if USE_GATHER_SCATTER @@ -77,7 +102,7 @@ using namespace std; //////////////////////////////////////////////////////////////////////////////// FourierTransform::~FourierTransform() { -#if USE_FFTW +#if USE_FFTW2 fftw_destroy_plan(fwplan0); fftw_destroy_plan(fwplan1); fftw_destroy_plan(fwplan2); @@ -85,6 +110,23 @@ FourierTransform::~FourierTransform() fftw_destroy_plan(bwplan1); fftw_destroy_plan(bwplan2); #endif + +#if USE_FFTW3 +#if USE_FFTW3_THREADS + fftw_cleanup_threads(); +#endif +#if defined(USE_FFTW3_2D) || defined(USE_FFTW3_THREADS) + fftw_destroy_plan(fwplan2d); + fftw_destroy_plan(bwplan2d); +#else + fftw_destroy_plan(fwplanx); + fftw_destroy_plan(bwplanx); + fftw_destroy_plan(fwplany); + fftw_destroy_plan(bwplany); +#endif + fftw_destroy_plan(fwplan); + fftw_destroy_plan(bwplan); +#endif } //////////////////////////////////////////////////////////////////////////////// @@ -153,8 +195,14 @@ FourierTransform::FourierTransform (const Basis &basis, // resize array zvec holding columns zvec_.resize(nvec_ * np2_); +#if TIMING + tm_init.start(); +#endif // Initialize FT library auxiliary arrays init_lib(); +#if TIMING + tm_init.stop(); +#endif // allocate send buffer sbuf.resize(nvec_ * np2_); @@ -534,16 +582,17 @@ void FourierTransform::bwd(complex* val) #if TIMING tm_b_fft.start(); + tm_b_z.start(); #endif -#if USE_ESSL +#if USE_ESSL_FFT int inc1 = 1, inc2 = np2_, ntrans = nvec_, isign = -1, initflag = 0; double scale = 1.0; - dcft_(&initflag,&zvec_[0],&inc1,&inc2,&zvec_[0],&inc1,&inc2,&np2_,&ntrans, - &isign,&scale,&aux1zb[0],&naux1z,&aux2[0],&naux2); - -#elif USE_FFTW + if ( ntrans > 0 ) + dcft_(&initflag,&zvec_[0],&inc1,&inc2,&zvec_[0],&inc1,&inc2,&np2_,&ntrans, + &isign,&scale,&aux1zb[0],&naux1z,&aux2[0],&naux2); +#elif USE_FFTW2 /* * void fftw(fftw_plan plan, int howmany, * FFTW_COMPLEX *in, int istride, int idist, @@ -553,16 +602,29 @@ void FourierTransform::bwd(complex* val) #pragma omp parallel for for ( int i = 0; i < nvec_; i++ ) { - //#pragma omp task fftw_one(bwplan2,(FFTW_COMPLEX*)&zvec_[i*np2_],(FFTW_COMPLEX*)0); } #else int ntrans = nvec_, inc1 = 1, inc2 = np2_; fftw(bwplan2,ntrans,(FFTW_COMPLEX*)&zvec_[0],inc1,inc2, (FFTW_COMPLEX*)0,0,0); -#endif +#endif // _OPENMP + +#elif USE_FFTW3 // USE_FFTW2 +#if USE_FFTW3_THREADS + fftw_execute_dft ( bwplan, (fftw_complex*)&zvec_[0], + (fftw_complex*)&zvec_[0]); #else + #pragma omp parallel for + for ( int i = 0; i < nvec_; i++ ) + { + fftw_execute_dft ( bwplan, (fftw_complex*)&zvec_[i*np2_], + (fftw_complex*)&zvec_[i*np2_]); + } +#endif // USE_FFTW3_THREADS + +#elif defined(FFT_NOLIB) // USE_FFTW3 // No library /* Transform along z */ int ntrans = nvec_; @@ -572,9 +634,13 @@ void FourierTransform::bwd(complex* val) double scale = 1.0; int idir = -1; cfftm ( &zvec_[0], &zvec_[0], scale, ntrans, length, ainc, ajmp, idir ); -#endif +#else +#error "Must define USE_FFTW2, USE_FFTW3, USE_ESSL_FFT or FFT_NOLIB" +#endif // USE_FFTW3 #if TIMING + tm_b_z.stop(); + tm_b_com.start(); tm_b_fft.stop(); tm_b_pack.start(); #endif @@ -635,13 +701,12 @@ void FourierTransform::bwd(complex* val) // copy from rbuf to val // scatter index array iunpack { - const int len = np012loc(); + const int len = np012loc() * 2; double* const pv = (double*) &val[0]; #pragma omp parallel for for ( int i = 0; i < len; i++ ) { - pv[2*i] = 0.0; - pv[2*i+1] = 0.0; + pv[i] = 0.0; } } @@ -679,59 +744,134 @@ void FourierTransform::bwd(complex* val) #if TIMING tm_b_unpack.stop(); tm_b_fft.start(); + tm_b_com.stop(); + tm_b_xy.start(); #endif +#if USE_FFTW3 +#if USE_FFTW3_THREADS + fftw_execute_dft ( bwplan2d, (fftw_complex*)&val[0], + (fftw_complex*)&val[0] ); +#elif USE_FFTW3_2D + #pragma omp parallel for + for ( int k = 0; k < np2_loc_[myproc_]; k++ ) + fftw_execute_dft ( bwplan2d, (fftw_complex*)&val[k*np0_*np1_], + (fftw_complex*)&val[k*np0_*np1_] ); +#else // FFTW3_2D + // fftw3 1d + for ( int k = 0; k < np2_loc_[myproc_]; k++ ) + { + int ibase = k * np0_ * np1_; +#if TIMING + tm_b_x.start(); +#endif + #pragma omp parallel for + for ( int i = 0; i < ntrans0_; i++ ) + { + // Transform first block along x: positive y indices + fftw_execute_dft ( bwplanx, (fftw_complex*)&val[ibase+i*np0_], + (fftw_complex*)&val[ibase+i*np0_]); + // Transform second block along x: negative y indices + fftw_execute_dft ( bwplanx, + (fftw_complex*)&val[ibase+(np1_-ntrans0_+i)*np0_], + (fftw_complex*)&val[ibase+(np1_-ntrans0_+i)*np0_]); + } +#if TIMING + tm_b_x.stop(); + tm_b_y.start(); +#endif +#if FFTW_TRANSPOSE + #pragma omp parallel + { + vector >t_trans(np1_); + #pragma omp for + for ( int i = 0; i < np0_; i++ ) + { + int length = t_trans.size(); + int inc1 = 1, inc2 = np0_; + zcopy(&length, &val[ibase+i], &inc2, &t_trans[0], &inc1); + fftw_execute_dft ( bwplany, (fftw_complex*)&t_trans[0], + (fftw_complex*)&t_trans[0]); + zcopy(&length, &t_trans[0], &inc1, &val[ibase+i], &inc2); + } + } +#else // FFTW_TRANSPOSE + #pragma omp parallel for + for ( int i = 0; i < np0_; i++ ) + { + fftw_execute_dft ( bwplany, (fftw_complex*)&val[ibase+i], + (fftw_complex*)&val[ibase+i]); + } +#endif // FFTW_TRANSPOSE +#if TIMING + tm_b_y.stop(); +#endif + } +#endif // USE_FFTW3_2D + +#elif USE_ESSL_FFT for ( int k = 0; k < np2_loc_[myproc_]; k++ ) { // transform along x for non-zero vectors only // transform along x for y in [0,ntrans0_] and y in [np1-ntrans0_, np1-1] -#if USE_ESSL #if USE_ESSL_2DFFT - // use 2D FFT for x and y transforms - int inc1, inc2, istart, isign = -1, initflag = 0; - double scale = 1.0; + // use 2D FFT for x and y transforms + int inc1, inc2, istart, isign = -1, initflag = 0; + double scale = 1.0; - // xy transform - istart = k * np0_ * np1_; - inc1 = 1; inc2 = np0_; - dcft2_(&initflag,&val[istart],&inc1,&inc2,&val[istart],&inc1,&inc2, - &np0_,&np1_,&isign,&scale,&aux1xyb[0],&naux1xy,&aux2[0],&naux2); + // xy transform + istart = k * np0_ * np1_; + inc1 = 1; inc2 = np0_; + dcft2_(&initflag,&val[istart],&inc1,&inc2,&val[istart],&inc1,&inc2, + &np0_,&np1_,&isign,&scale,&aux1xyb[0],&naux1xy,&aux2[0],&naux2); #else - // use multiple 1-d FFTs for x and y transforms + // use multiple 1-d FFTs for x and y transforms - int inc1, inc2, ntrans, istart, length, isign = -1, initflag = 0; - double scale = 1.0; - // transform only non-zero vectors along x - // First block: positive y indices: [0,ntrans0_] - ntrans = ntrans0_; - inc1 = 1; - inc2 = np0_; - istart = k * np0_ * np1_; - length = np0_; - dcft_(&initflag,&val[istart],&inc1,&inc2,&val[istart],&inc1,&inc2, - &length,&ntrans,&isign,&scale,&aux1xb[0],&naux1x,&aux2[0],&naux2); + int inc1, inc2, ntrans, istart, length, isign = -1, initflag = 0; + double scale = 1.0; + // transform only non-zero vectors along x + // First block: positive y indices: [0,ntrans0_] + ntrans = ntrans0_; + if ( ntrans > 0 ) + { + inc1 = 1; + inc2 = np0_; + istart = k * np0_ * np1_; + length = np0_; + dcft_(&initflag,&val[istart],&inc1,&inc2,&val[istart],&inc1,&inc2, + &length,&ntrans,&isign,&scale,&aux1xb[0],&naux1x,&aux2[0],&naux2); + + // Second block: negative y indices: [np1-ntrans0_,np1-1] + inc1 = 1; + inc2 = np0_; + istart = np0_ * ( (np1_-ntrans) + k * np1_ ); + length = np0_; + dcft_(&initflag,&val[istart],&inc1,&inc2,&val[istart],&inc1,&inc2, + &length,&ntrans,&isign,&scale,&aux1xb[0],&naux1x,&aux2[0],&naux2); + } - // Second block: negative y indices: [np1-ntrans0_,np1-1] - inc1 = 1; - inc2 = np0_; - istart = np0_ * ( (np1_-ntrans) + k * np1_ ); - length = np0_; - dcft_(&initflag,&val[istart],&inc1,&inc2,&val[istart],&inc1,&inc2, - &length,&ntrans,&isign,&scale,&aux1xb[0],&naux1x,&aux2[0],&naux2); - - // transform along y for all values of x - ntrans = np0_; - inc1 = np0_; - inc2 = 1; - istart = k * np0_ * np1_; - length = np1_; - dcft_(&initflag,&val[istart],&inc1,&inc2,&val[istart],&inc1,&inc2, - &length,&ntrans,&isign,&scale,&aux1yb[0],&naux1y,&aux2[0],&naux2); -#endif -#elif USE_FFTW + // transform along y for all values of x + ntrans = np0_; + if ( ntrans > 0 ) + { + inc1 = np0_; + inc2 = 1; + istart = k * np0_ * np1_; + length = np1_; + dcft_(&initflag,&val[istart],&inc1,&inc2,&val[istart],&inc1,&inc2, + &length,&ntrans,&isign,&scale,&aux1yb[0],&naux1y,&aux2[0],&naux2); + } +#endif // USE_ESSL_2DFFT + } // k + +#elif USE_FFTW2 + for ( int k = 0; k < np2_loc_[myproc_]; k++ ) + { + // transform along x for non-zero vectors only + // transform along x for y in [0,ntrans0_] and y in [np1-ntrans0_, np1-1] #if _OPENMP int ibase = k * np0_ * np1_; #pragma omp parallel for @@ -755,7 +895,6 @@ void FourierTransform::bwd(complex* val) #pragma omp parallel for for ( int i = 0; i < np0_; i++ ) { - //#pragma omp task { // transform along y for all values of x // copy data to local array @@ -771,7 +910,7 @@ void FourierTransform::bwd(complex* val) } } //delete [] tmp1; -#else +#else // _OPENMP int inc1, inc2, istart; int ntrans = ntrans0_; @@ -795,9 +934,14 @@ void FourierTransform::bwd(complex* val) istart = k * np0_ * np1_; fftw(bwplan1,ntrans,(FFTW_COMPLEX*)&val[istart],inc1,inc2, (FFTW_COMPLEX*)0,0,0); -#endif -#else - // No library +#endif // _OPENMP + } // k +#elif defined(FFT_NOLIB) // USE_FFTW2 + // No library + for ( int k = 0; k < np2_loc_[myproc_]; k++ ) + { + // transform along x for non-zero vectors only + // transform along x for y in [0,ntrans0_] and y in [np1-ntrans0_, np1-1] // transform along x for non-zero elements // Transform first block along x: positive y indices int ntrans = ntrans0_; @@ -820,11 +964,13 @@ void FourierTransform::bwd(complex* val) ainc = np0_; ajmp = 1; cfftm (&val[istart],&val[istart],scale,ntrans,length,ainc,ajmp,idir ); - -#endif } // for k +#else +#error "Must define USE_FFTW2, USE_FFTW3, USE_ESSL_FFT or FFT_NOLIB" +#endif #if TIMING + tm_b_xy.stop(); tm_b_fft.stop(); #endif } @@ -834,12 +980,75 @@ void FourierTransform::fwd(complex* val) { #if TIMING tm_f_fft.start(); + tm_f_xy.start(); +#endif + +//fftw_execute_dft is thread safe +#if USE_FFTW3 +#if USE_FFTW3_THREADS + fftw_execute_dft ( fwplan2d, (fftw_complex*)&val[0], + (fftw_complex*)&val[0] ); +#elif USE_FFTW3_2D // USE_FFTW3_2D + #pragma omp parallel for + for ( int k = 0; k < np2_loc_[myproc_]; k++ ) + fftw_execute_dft ( fwplan2d, (fftw_complex*)&val[k*np0_*np1_], + (fftw_complex*)&val[k*np0_*np1_] ); +#else // USE_FFTW3_2D + for ( int k = 0; k < np2_loc_[myproc_]; k++ ) + { + const int ibase = k * np0_ * np1_; +#if TIMING + tm_f_y.start(); +#endif +#if FFTW_TRANSPOSE + #pragma omp parallel + { + vector >t_trans(np1_); + #pragma omp for + for ( int i = 0; i < np0_; i++ ) + { + int length = t_trans.size(); + int inc1 = 1, inc2 = np0_; + zcopy(&length, &val[ibase+i], &inc2, &t_trans[0], &inc1); + fftw_execute_dft ( fwplany, (fftw_complex*)&t_trans[0], + (fftw_complex*)&t_trans[0]); + zcopy(&length, &t_trans[0], &inc1, &val[ibase+i], &inc2); + } + } +#else // FFTW_TRANSPOSE + #pragma omp parallel for + for ( int i = 0; i < np0_; i++ ) + { + fftw_execute_dft ( fwplany, (fftw_complex*)&val[ibase+i], + (fftw_complex*)&val[ibase+i]); + } +#endif // FFTW_TRANSPOSE +#if TIMING + tm_f_y.stop(); + tm_f_x.start(); +#endif + #pragma omp parallel for + for ( int i = 0; i < ntrans0_; i++ ) + { + // Transform first block along x: positive y indices + fftw_execute_dft ( fwplanx,(fftw_complex*)&val[ibase+i*np0_], + (fftw_complex*)&val[ibase+i*np0_]); + + // Transform second block along x: negative y indices + fftw_execute_dft ( fwplanx, + (fftw_complex*)&val[ibase+(np1_-ntrans0_+i)*np0_], + (fftw_complex*)&val[ibase+(np1_-ntrans0_+i)*np0_]); + } +#if TIMING + tm_f_x.stop(); #endif + } +#endif // USE_FFTW3_2D +#elif USE_ESSL_FFT for ( int k = 0; k < np2_loc_[myproc_]; k++ ) { // transform along x for non-zero vectors only // transform along x for y in [0,ntrans0_] and y in [np1-ntrans0_, np1-1] -#if USE_ESSL #if USE_ESSL_2DFFT // use 2D FFT for x and y transforms @@ -860,33 +1069,44 @@ void FourierTransform::fwd(complex* val) double scale = 1.0; // transform along y for all values of x ntrans = np0_; - inc1 = np0_; - inc2 = 1; - istart = k * np0_ * np1_; - length = np1_; - dcft_(&initflag,&val[istart],&inc1,&inc2,&val[istart],&inc1,&inc2, - &length,&ntrans,&isign,&scale,&aux1yf[0],&naux1y,&aux2[0],&naux2); + if ( ntrans > 0 ) + { + inc1 = np0_; + inc2 = 1; + istart = k * np0_ * np1_; + length = np1_; + dcft_(&initflag,&val[istart],&inc1,&inc2,&val[istart],&inc1,&inc2, + &length,&ntrans,&isign,&scale,&aux1yf[0],&naux1y,&aux2[0],&naux2); + } // transform only non-zero vectors along x ntrans = ntrans0_; - inc1 = 1; - inc2 = np0_; - istart = k * np0_ * np1_; - length = np0_; - dcft_(&initflag,&val[istart],&inc1,&inc2,&val[istart],&inc1,&inc2, - &length,&ntrans,&isign,&scale,&aux1xf[0],&naux1x,&aux2[0],&naux2); - - inc1 = 1; - inc2 = np0_; - istart = np0_ * ( (np1_-ntrans) + k * np1_ ); - length = np0_; - dcft_(&initflag,&val[istart],&inc1,&inc2,&val[istart],&inc1,&inc2, - &length,&ntrans,&isign,&scale,&aux1xf[0],&naux1x,&aux2[0],&naux2); -#endif -#elif USE_FFTW + if ( ntrans > 0 ) + { + inc1 = 1; + inc2 = np0_; + istart = k * np0_ * np1_; + length = np0_; + dcft_(&initflag,&val[istart],&inc1,&inc2,&val[istart],&inc1,&inc2, + &length,&ntrans,&isign,&scale,&aux1xf[0],&naux1x,&aux2[0],&naux2); + + inc1 = 1; + inc2 = np0_; + istart = np0_ * ( (np1_-ntrans) + k * np1_ ); + length = np0_; + dcft_(&initflag,&val[istart],&inc1,&inc2,&val[istart],&inc1,&inc2, + &length,&ntrans,&isign,&scale,&aux1xf[0],&naux1x,&aux2[0],&naux2); + } +#endif // USE_ESSL_2DFFT + } // k +#elif USE_FFTW2 + for ( int k = 0; k < np2_loc_[myproc_]; k++ ) + { + // transform along x for non-zero vectors only + // transform along x for y in [0,ntrans0_] and y in [np1-ntrans0_, np1-1] #if _OPENMP int ibase = k * np0_ * np1_; - complex *tmp1 = new complex[np1_]; + //complex *tmp1 = new complex[np1_]; #pragma omp parallel for for ( int i = 0; i < np0_; i++ ) { @@ -905,7 +1125,7 @@ void FourierTransform::fwd(complex* val) #endif } } - delete [] tmp1; + //delete [] tmp1; #pragma omp parallel for for ( int i = 0; i < ntrans0_; i++ ) @@ -919,7 +1139,7 @@ void FourierTransform::fwd(complex* val) (FFTW_COMPLEX*)0); } } -#else +#else // _OPENMP int inc1, inc2, istart; // transform along y for all values of x @@ -943,9 +1163,14 @@ void FourierTransform::fwd(complex* val) istart = np0_ * ( (np1_-ntrans) + k * np1_ ); fftw(fwplan0,ntrans,(FFTW_COMPLEX*)&val[istart],inc1,inc2, (FFTW_COMPLEX*)0,0,0); -#endif -#else - // No library +#endif // _OPENMP + } // k +#elif defined(FFT_NOLIB) + // No library + for ( int k = 0; k < np2_loc_[myproc_]; k++ ) + { + // transform along x for non-zero vectors only + // transform along x for y in [0,ntrans0_] and y in [np1-ntrans0_, np1-1] // transform along y for all values of x int ntrans = np0_; int istart = k * np0_ * np1_; @@ -966,15 +1191,19 @@ void FourierTransform::fwd(complex* val) istart = np0_ * ( (np1_-ntrans) + k * np1_ ); cfftm (&val[istart],&val[istart],scale,ntrans,length,ainc,ajmp,idir ); -#endif } // for k +#else +#error "Must define USE_FFTW2, USE_FFTW3, USE_ESSL_FFT or FFT_NOLIB" +#endif - // gather val into rbuf #if TIMING + tm_f_xy.stop(); + tm_f_com.start(); tm_f_fft.stop(); tm_f_pack.start(); #endif + // gather val into rbuf #if USE_GATHER_SCATTER // zgthr: x(i) = y(indx(i)) // void zgthr_(int* n, complex* y, complex* x, int*indx); @@ -1031,7 +1260,7 @@ void FourierTransform::fwd(complex* val) int n = zvec_.size(); zgthr_(&n,y,x,&ipack_[0]); } -#else +#else // no gather scatter const int zvec_size = zvec_.size(); const double* const ps = (double*) &sbuf[0]; double* const pz = (double*) &zvec_[0]; @@ -1051,18 +1280,21 @@ void FourierTransform::fwd(complex* val) #if TIMING tm_f_unpack.stop(); tm_f_fft.start(); + tm_f_com.stop(); + tm_f_z.start(); #endif -#if USE_ESSL +#if USE_ESSL_FFT int inc1 = 1, inc2 = np2_, ntrans = nvec_, isign = 1, initflag = 0; - double scale = 1.0 / (np0_ * np1_ * np2_); + double scale = 1.0 / np012(); - dcft_(&initflag,&zvec_[0],&inc1,&inc2,&zvec_[0],&inc1,&inc2,&np2_,&ntrans, - &isign,&scale,&aux1zf[0],&naux1z,&aux2[0],&naux2); + if ( ntrans > 0 ) + dcft_(&initflag,&zvec_[0],&inc1,&inc2,&zvec_[0],&inc1,&inc2,&np2_,&ntrans, + &isign,&scale,&aux1zf[0],&naux1z,&aux2[0],&naux2); -#elif USE_FFTW +#elif USE_FFTW2 #if _OPENMP - const double fac = 1.0 / ( np0_ * np1_ * np2_ ); + const double fac = 1.0 / np012(); #pragma omp parallel for for ( int i = 0; i < nvec_; i++ ) { @@ -1088,22 +1320,44 @@ void FourierTransform::fwd(complex* val) fftw(fwplan2,ntrans,(FFTW_COMPLEX*)&zvec_[0],inc1,inc2, (FFTW_COMPLEX*)0,0,0); int len = zvec_.size(); - double fac = 1.0 / ( np0_ * np1_ * np2_ ); + double fac = 1.0 / np012(); zdscal(&len,&fac,&zvec_[0],&inc1); #endif +#elif USE_FFTW3 + +#if USE_FFTW3_THREADS + fftw_execute_dft ( fwplan, (fftw_complex*)&zvec_[0], + (fftw_complex*)&zvec_[0]); #else + // do np2_ same for D_USE_1D or not + #pragma omp parallel for + for ( int i = 0; i < nvec_; i++ ) + { + fftw_execute_dft ( fwplan, (fftw_complex*)&zvec_[i*np2_], + (fftw_complex*)&zvec_[i*np2_]); + } +#endif + // scale + double fac = 1.0 / np012(); + int len = zvec_.size(); + int inc1 = 1; + zdscal(&len,&fac,&zvec_[0],&inc1); +#elif defined(FFT_NOLIB) // No library /* Transform along z */ int ntrans = nvec_; int length = np2_; int ainc = 1; int ajmp = np2_; - double scale = 1.0 / ( np0_ * np1_ * np2_ ); + double scale = 1.0 / np012(); int idir = 1; cfftm ( &zvec_[0], &zvec_[0], scale, ntrans, length, ainc, ajmp, idir ); +#else +#error "Must define USE_FFTW2, USE_FFTW3, USE_ESSL_FFT or FFT_NOLIB" #endif #if TIMING + tm_f_z.stop(); tm_f_fft.stop(); #endif } @@ -1113,7 +1367,8 @@ void FourierTransform::init_lib(void) { // initialization of FFT libs -#if USE_ESSL +#if USE_ESSL_FFT + complex *p = 0; #if USE_ESSL_2DFFT // use 2D FFT for x and y transforms and 1D FFT for z transforms naux1xy = 40000 + 2.28 * (np0_+np1_); @@ -1145,15 +1400,17 @@ void FourierTransform::init_lib(void) // initialize z transforms int ntrans = nvec_; - inc1 = 1; inc2 = np2_; - isign = -1; - dcft_(&initflag,p,&inc1,&inc2,p,&inc1,&inc2,&np2_,&ntrans, - &isign,&scale,&aux1zb[0],&naux1z,&aux2[0],&naux2); - isign = 1; scale = 1.0 / ( np0_ * np1_ * np2_ ); - dcft_(&initflag,p,&inc1,&inc2,p,&inc1,&inc2,&np2_,&ntrans, - &isign,&scale,&aux1zf[0],&naux1z,&aux2[0],&naux2); -#else - + if ( ntrans > 0 ) + { + inc1 = 1; inc2 = np2_; + isign = -1; + dcft_(&initflag,p,&inc1,&inc2,p,&inc1,&inc2,&np2_,&ntrans, + &isign,&scale,&aux1zb[0],&naux1z,&aux2[0],&naux2); + isign = 1; scale = 1.0 / np012(); + dcft_(&initflag,p,&inc1,&inc2,p,&inc1,&inc2,&np2_,&ntrans, + &isign,&scale,&aux1zf[0],&naux1z,&aux2[0],&naux2); + } +#else // USE_ESSL_2DFFT naux1x = (int) (20000 + 2.28 * np0_); naux1y = (int) (20000 + 2.28 * np1_); @@ -1177,58 +1434,159 @@ void FourierTransform::init_lib(void) int initflag = 1, inc1, inc2, ntrans, isign; double scale = 1.0; - complex *p = 0; // x transforms inc1 = 1; inc2 = np0_; ntrans = ntrans0_; - isign = -1; - dcft_(&initflag,p,&inc1,&inc2,p,&inc1,&inc2,&np0_,&ntrans, - &isign,&scale,&aux1xb[0],&naux1x,&aux2[0],&naux2); - isign = 1; - dcft_(&initflag,p,&inc1,&inc2,p,&inc1,&inc2,&np0_,&ntrans, - &isign,&scale,&aux1xf[0],&naux1x,&aux2[0],&naux2); + if ( ntrans > 0 ) + { + isign = -1; + dcft_(&initflag,p,&inc1,&inc2,p,&inc1,&inc2,&np0_,&ntrans, + &isign,&scale,&aux1xb[0],&naux1x,&aux2[0],&naux2); + isign = 1; + dcft_(&initflag,p,&inc1,&inc2,p,&inc1,&inc2,&np0_,&ntrans, + &isign,&scale,&aux1xf[0],&naux1x,&aux2[0],&naux2); + } // y transforms inc1 = np0_; inc2 = 1; ntrans = ntrans1_; - isign = -1; - dcft_(&initflag,p,&inc1,&inc2,p,&inc1,&inc2,&np1_,&ntrans, - &isign,&scale,&aux1yb[0],&naux1y,&aux2[0],&naux2); - isign = 1; - dcft_(&initflag,p,&inc1,&inc2,p,&inc1,&inc2,&np1_,&ntrans, - &isign,&scale,&aux1yf[0],&naux1y,&aux2[0],&naux2); + if ( ntrans > 0 ) + { + isign = -1; + dcft_(&initflag,p,&inc1,&inc2,p,&inc1,&inc2,&np1_,&ntrans, + &isign,&scale,&aux1yb[0],&naux1y,&aux2[0],&naux2); + isign = 1; + dcft_(&initflag,p,&inc1,&inc2,p,&inc1,&inc2,&np1_,&ntrans, + &isign,&scale,&aux1yf[0],&naux1y,&aux2[0],&naux2); + } // z transforms inc1 = 1; inc2 = np2_; ntrans = ntrans2_; - isign = -1; - dcft_(&initflag,p,&inc1,&inc2,p,&inc1,&inc2,&np2_,&ntrans, - &isign,&scale,&aux1zb[0],&naux1z,&aux2[0],&naux2); - isign = 1; scale = 1.0 / ( np0_ * np1_ * np2_ ); - dcft_(&initflag,p,&inc1,&inc2,p,&inc1,&inc2,&np2_,&ntrans, - &isign,&scale,&aux1zf[0],&naux1z,&aux2[0],&naux2); + if ( ntrans > 0 ) + { + isign = -1; + dcft_(&initflag,p,&inc1,&inc2,p,&inc1,&inc2,&np2_,&ntrans, + &isign,&scale,&aux1zb[0],&naux1z,&aux2[0],&naux2); + isign = 1; scale = 1.0 / np012(); + dcft_(&initflag,p,&inc1,&inc2,p,&inc1,&inc2,&np2_,&ntrans, + &isign,&scale,&aux1zf[0],&naux1z,&aux2[0],&naux2); + } -#endif -#elif USE_FFTW - -#if FFTWMEASURE - // FFTW_MEASURE - fwplan0 = fftw_create_plan(np0_,FFTW_FORWARD,FFTW_MEASURE|FFTW_IN_PLACE); - fwplan1 = fftw_create_plan(np1_,FFTW_FORWARD,FFTW_MEASURE|FFTW_IN_PLACE); - fwplan2 = fftw_create_plan(np2_,FFTW_FORWARD,FFTW_MEASURE|FFTW_IN_PLACE); - bwplan0 = fftw_create_plan(np0_,FFTW_BACKWARD,FFTW_MEASURE|FFTW_IN_PLACE); - bwplan1 = fftw_create_plan(np1_,FFTW_BACKWARD,FFTW_MEASURE|FFTW_IN_PLACE); - bwplan2 = fftw_create_plan(np2_,FFTW_BACKWARD,FFTW_MEASURE|FFTW_IN_PLACE); -#else - // FFTW_ESTIMATE - fwplan0 = fftw_create_plan(np0_,FFTW_FORWARD,FFTW_ESTIMATE|FFTW_IN_PLACE); - fwplan1 = fftw_create_plan(np1_,FFTW_FORWARD,FFTW_ESTIMATE|FFTW_IN_PLACE); - fwplan2 = fftw_create_plan(np2_,FFTW_FORWARD,FFTW_ESTIMATE|FFTW_IN_PLACE); - bwplan0 = fftw_create_plan(np0_,FFTW_BACKWARD,FFTW_ESTIMATE|FFTW_IN_PLACE); - bwplan1 = fftw_create_plan(np1_,FFTW_BACKWARD,FFTW_ESTIMATE|FFTW_IN_PLACE); - bwplan2 = fftw_create_plan(np2_,FFTW_BACKWARD,FFTW_ESTIMATE|FFTW_IN_PLACE); +#endif // USE_ESSL_2DFFT + +#elif USE_FFTW2 + + fwplan0 = fftw_create_plan(np0_,FFTW_FORWARD,FFTW_ALGO|FFTW_IN_PLACE); + fwplan1 = fftw_create_plan(np1_,FFTW_FORWARD,FFTW_ALGO|FFTW_IN_PLACE); + fwplan2 = fftw_create_plan(np2_,FFTW_FORWARD,FFTW_ALGO|FFTW_IN_PLACE); + bwplan0 = fftw_create_plan(np0_,FFTW_BACKWARD,FFTW_ALGO|FFTW_IN_PLACE); + bwplan1 = fftw_create_plan(np1_,FFTW_BACKWARD,FFTW_ALGO|FFTW_IN_PLACE); + bwplan2 = fftw_create_plan(np2_,FFTW_BACKWARD,FFTW_ALGO|FFTW_IN_PLACE); + +#elif USE_FFTW3 + vector > aux(np0_*np1_); +#if defined(USE_FFTW3MKL) && !defined(USE_FFTW3_THREADS) && _OPENMP + fftw3_mkl.number_of_user_threads = omp_get_max_threads(); #endif -#else +#if USE_FFTW3_THREADS + fftw_init_threads(); + fftw_plan_with_nthreads(omp_get_max_threads()); + vector > aux1(np0_*np1_*np2_loc_[myproc_]); + + // xy + int rank = 2; + int n[] = {np1_,np0_}; + int howmany = np2_loc_[myproc_]; + //int howmany = 1; + int idist = np0_*np1_, odist = np0_*np1_; + int istride = 1, ostride = 1; /* array is contiguous in memory */ + int *inembed = n, *onembed = n; + + fwplan2d = fftw_plan_many_dft(rank, n, howmany, (fftw_complex*)&aux1[0], + inembed, istride, idist, + (fftw_complex*)&aux1[0], onembed, + ostride, odist, -1, FFTW_ALGO); + bwplan2d = fftw_plan_many_dft(rank, n, howmany, (fftw_complex*)&aux1[0], + inembed, istride, idist, + (fftw_complex*)&aux1[0], onembed, + ostride, odist, 1, FFTW_ALGO); + + // z + rank = 1; + int nz[] = {np2_}; + howmany = nvec_; + idist = np2_, odist = np2_; + istride = 1, ostride = 1; /* array is contiguous in memory */ + inembed = nz, onembed = nz; + + fwplan = fftw_plan_many_dft(rank, nz, howmany, (fftw_complex*)&zvec_[0], + inembed, istride, idist, + (fftw_complex*)&zvec_[0], onembed, + ostride, odist, -1, FFTW_ALGO); + bwplan = fftw_plan_many_dft(rank, nz, howmany, (fftw_complex*)&zvec_[0], + inembed, istride, idist, + (fftw_complex*)&zvec_[0], onembed, + ostride, odist, 1, FFTW_ALGO); + + +#else // USE_FFTW3_THREADS +#if USE_FFTW3_2D + // row major in FFTW3 2d plans + fwplan2d = fftw_plan_dft_2d ( np1_, np0_, (fftw_complex*)(&aux[0]), + (fftw_complex*)(&aux[0]), -1, + FFTW_ALGO ); + bwplan2d = fftw_plan_dft_2d ( np1_, np0_, (fftw_complex*)(&aux[0]), + (fftw_complex*)(&aux[0]), 1, + FFTW_ALGO ); +#else // USE_FFTW3_2D + // FFTW3 1D + fwplanx = fftw_plan_dft_1d ( np0_, (fftw_complex*)(&aux[0]), + (fftw_complex*)(&aux[0]), -1, + FFTW_ALGO ); + bwplanx = fftw_plan_dft_1d ( np0_, (fftw_complex*)(&aux[0]), + (fftw_complex*)(&aux[0]), 1, + FFTW_ALGO ); + +#if FFTW_TRANSPOSE + fwplany = fftw_plan_dft_1d ( np1_, (fftw_complex*)(&aux[0]), + (fftw_complex*)(&aux[0]), -1, + FFTW_ALGO ); + bwplany = fftw_plan_dft_1d ( np1_, (fftw_complex*)(&aux[0]), + (fftw_complex*)(&aux[0]), 1, + FFTW_ALGO ); + +#else // FFTW_TRANSPOSE + // strided FFT + int rank = 1; + int n[] = {np1_}; + int howmany = 1; + int idist = 1, odist = 1; + int istride = np0_, ostride = np0_; /* array is contiguous in memory */ + int *inembed = n, *onembed = n; + + fwplany = fftw_plan_many_dft(rank, n, howmany, (fftw_complex*)&aux[0], + inembed, istride, idist, + (fftw_complex*)&aux[0], onembed, + ostride, odist, -1, FFTW_ALGO); + bwplany = fftw_plan_many_dft(rank, n, howmany, (fftw_complex*)&aux[0], + inembed, istride, idist, + (fftw_complex*)&aux[0], onembed, + ostride, odist, 1, FFTW_ALGO); +#endif // FFTW_TRANSPOSE +#endif // USE_FFTW3_2D + // do z using 1d plans + fwplan = fftw_plan_dft_1d ( np2_, (fftw_complex*)(&zvec_[0]), + (fftw_complex*)(&zvec_[0]), -1, + FFTW_ALGO ); + bwplan = fftw_plan_dft_1d ( np2_, (fftw_complex*)(&zvec_[0]), + (fftw_complex*)(&zvec_[0]), 1, + FFTW_ALGO ); +#endif //USE_FFTW3_THREADS + +#elif FFT_NOLIB // USE_FFTW3 /* no library */ +#else +#error "Must define USE_FFTW2, USE_FFTW3, USE_ESSL_FFT or FFT_NOLIB" #endif } @@ -1365,7 +1723,7 @@ void FourierTransform::zvec_to_doublevector(complex *c1, } -#if NO_FFT_LIB +#if defined(FFT_NOLIB) //////////////////////////////////////////////////////////////////////////////// // @@ -1472,7 +1830,7 @@ void cfftm ( complex *ain, complex *aout, double scale, * Transcription from 'FFT as Nested Multiplication, with a twist' * C. de Boor, SIAM Sci. Stat. Comput., Vol 1, No 1, March 1980 * - * Adapted to C by F.Gygi, 17 Feb 1993, 9 Dec 1993 + * Adapted to C 17 Feb 1993, 9 Dec 1993 * ******************************************************************************/ diff --git a/src/FourierTransform.h b/src/FourierTransform.h index 39db0e6..19954d5 100644 --- a/src/FourierTransform.h +++ b/src/FourierTransform.h @@ -22,7 +22,15 @@ #include #include -#if USE_FFTW +#if !( defined(USE_FFTW2) || defined(USE_FFTW3) || defined(USE_ESSL_FFT) || defined(FFT_NOLIB) ) +#error "Must define USE_FFTW2, USE_FFTW3, USE_ESSL_FFT or FFT_NOLIB" +#endif + +#if defined(USE_FFTW2) && defined(USE_FFTW3) +#error "Cannot define USE_FFTW2 and USE_FFTW3" +#endif + +#if USE_FFTW2 #if USE_DFFTW #include "dfftw.h" #else @@ -30,6 +38,13 @@ #endif #endif +#if USE_FFTW3 +#include "fftw3.h" +#if USE_FFTW3MKL +#include "fftw3_mkl.h" +#endif +#endif + #if USE_MPI #include #endif @@ -63,21 +78,32 @@ class FourierTransform void init_lib(void); -#if USE_ESSL +#if USE_ESSL_FFT #if USE_ESSL_2DFFT - std::vector aux1xyf; - std::vector aux1xyb; - int naux1xy; + std::vector aux1xyf,aux1zf; + std::vector aux1xyb,aux1zb; + std::vector aux2; + int naux1xy,naux1z,naux2; #else std::vector aux1xf, aux1yf, aux1zf; std::vector aux1xb, aux1yb, aux1zb; std::vector aux2; int naux1x,naux1y,naux1z,naux2; #endif -#elif USE_FFTW || USE_FFTW3 +#elif USE_FFTW2 fftw_plan fwplan0,fwplan1,fwplan2,bwplan0,bwplan1,bwplan2; +#elif USE_FFTW3 + //plans for np2_ + fftw_plan fwplan, bwplan; +#if defined(USE_FFTW3_2D) || defined(USE_FFTW3_THREADS) + fftw_plan fwplan2d, bwplan2d; #else + fftw_plan fwplanx, fwplany, bwplanx, bwplany; +#endif +#elif defined(FFT_NOLIB) // no library +#else +#error "Must define USE_FFTW2, USE_FFTW3, USE_ESSL_FFT or FFT_NOLIB" #endif void vector_to_zvec(const std::complex* c); @@ -116,14 +142,21 @@ class FourierTransform int np2_loc(int iproc) const { return np2_loc_[iproc]; } int np2_first() const { return np2_first_[myproc_]; } int np2_first(int iproc) const { return np2_first_[iproc]; } - int np012() const { return np0_ * np1_ * np2_; } + long int np012() const { return ((long int)np0_) * np1_ * np2_; } int np012loc(int iproc) const { return np0_ * np1_ * np2_loc_[iproc]; } int np012loc() const { return np0_ * np1_ * np2_loc_[myproc_]; } int index(int i, int j, int k) const { return i + np0_ * ( j + np1_ * k ); } + int i(int ind) const { return ind % np0_; } + int j(int ind) const { return (ind / np0_) % np1_; } + int k(int ind) const { return (ind / np0_) / np1_ + np2_first(); } void reset_timers(void); Timer tm_f_map, tm_f_fft, tm_f_pack, tm_f_mpi, tm_f_zero, tm_f_unpack, - tm_b_map, tm_b_fft, tm_b_pack, tm_b_mpi, tm_b_zero, tm_b_unpack; + tm_b_map, tm_b_fft, tm_b_pack, tm_b_mpi, tm_b_zero, tm_b_unpack, + tm_f_xy, tm_f_z, tm_f_x, tm_f_y, + tm_b_xy, tm_b_z, tm_b_x, tm_b_y, + tm_init, tm_b_com, tm_f_com; + }; #endif diff --git a/src/GlobalExtForce.C b/src/GlobalExtForce.C index 404be5e..325835a 100644 --- a/src/GlobalExtForce.C +++ b/src/GlobalExtForce.C @@ -12,10 +12,9 @@ // //////////////////////////////////////////////////////////////////////////////// // -// GlobalExtForce.C +// GlobalExtForce.C // //////////////////////////////////////////////////////////////////////////////// -// $Id: GlobalExtForce.C,v 1.1 2010-02-20 23:13:02 fgygi Exp $ #include "GlobalExtForce.h" #include "AtomSet.h" diff --git a/src/GlobalExtForce.h b/src/GlobalExtForce.h index 5c741b9..3c2ce1a 100644 --- a/src/GlobalExtForce.h +++ b/src/GlobalExtForce.h @@ -12,10 +12,9 @@ // //////////////////////////////////////////////////////////////////////////////// // -// GlobalExtForce.h +// GlobalExtForce.h // //////////////////////////////////////////////////////////////////////////////// -// $Id: GlobalExtForce.h,v 1.1 2010-02-20 23:13:02 fgygi Exp $ #ifndef GLOBALEXTFORCE_H #define GLOBALEXTFORCE_H diff --git a/src/HelpCmd.h b/src/HelpCmd.h index af4edde..04b0753 100644 --- a/src/HelpCmd.h +++ b/src/HelpCmd.h @@ -15,7 +15,6 @@ // HelpCmd.h: // //////////////////////////////////////////////////////////////////////////////// -// $Id: HelpCmd.h,v 1.4 2008-09-08 15:56:18 fgygi Exp $ #ifndef HELPCMD_H #define HELPCMD_H diff --git a/src/InteractionPotential.h b/src/InteractionPotential.h index 38fc4bd..ed8e86a 100644 --- a/src/InteractionPotential.h +++ b/src/InteractionPotential.h @@ -19,9 +19,6 @@ // Implements InteractionPotential class that evaluates the potential for given // norm of G vectors and the derivative w.r.t. this argument // -// Author: Martin Schlipf (2013) -// Contact: martin.schlipf@gmail.com -// //////////////////////////////////////////////////////////////////////////////// #ifndef INTERACTIONPOTENTIAL_H @@ -34,17 +31,12 @@ class InteractionPotential public: // default constructor = Coulomb potential - InteractionPotential() : - coulomb_(true) - { - } + InteractionPotential() : coulomb_(true) {} // constructor - define function and derivative InteractionPotential(double(*V)(const double&), double(*dV)(const double&), double(*div_scale)(const double&)) : - V_(V), dV_(dV), div_scale_(div_scale), coulomb_(false) - { - } + V_(V), dV_(dV), div_scale_(div_scale), coulomb_(false) {} // is the interaction potential a coulomb potential? inline bool coulomb() const @@ -55,7 +47,8 @@ class InteractionPotential // evaluate the interaction potential for given norm of G vector inline double operator()(const double G2) const { - // the current implementation expects that coulomb potential is treated externaly + // the current implementation expects that the coulomb potential + // is treated externaly assert(not coulomb_); return V_(G2); } @@ -63,14 +56,16 @@ class InteractionPotential // evaluate the derivative of the interaction potential w.r.t. G^2 inline double derivative(const double G2) const { - // the current implementation expects that coulomb potential is treated externaly + // the current implementation expects that the coulomb potential + // is treated externaly assert(not coulomb_); return dV_(G2); } inline double divergence_scaling(const double rcut) const { - // the current implementation expects that coulomb potential is treated externaly + // the current implementation expects that the coulomb potential + // is treated externaly assert(not coulomb_); return div_scale_(rcut); } diff --git a/src/IonicStepper.h b/src/IonicStepper.h index ab975df..c6dd1ff 100644 --- a/src/IonicStepper.h +++ b/src/IonicStepper.h @@ -15,7 +15,6 @@ // IonicStepper.h: // //////////////////////////////////////////////////////////////////////////////// -// $Id: IonicStepper.h,v 1.15 2010-04-16 21:43:11 fgygi Exp $ #ifndef IONICSTEPPER_H #define IONICSTEPPER_H @@ -68,8 +67,9 @@ class IonicStepper pmass_[is] = atoms_.species_list[is]->mass() * 1822.89; } natoms_ = atoms_.size(); - atoms_.get_positions(r0_); - atoms_.get_velocities(v0_); + // get positions and velocities from atoms_ + get_positions(); + get_velocities(); } double r0(int is, int i) const { return r0_[is][i]; } @@ -77,6 +77,10 @@ class IonicStepper const std::vector >& r0(void) const { return r0_; } const std::vector >& v0(void) const { return v0_; } const std::vector& pmass(void) const { return pmass_; } + void get_positions(void) { atoms_.get_positions(r0_); } + void get_velocities(void) { atoms_.get_velocities(v0_); } + void set_positions(void) { atoms_.set_positions(r0_); } + void set_velocities(void) { atoms_.set_velocities(v0_); } void setup_constraints(void) { diff --git a/src/IterCmd.h b/src/IterCmd.h new file mode 100644 index 0000000..16c6dfa --- /dev/null +++ b/src/IterCmd.h @@ -0,0 +1,72 @@ +//////////////////////////////////////////////////////////////////////////////// +// +// Copyright (c) 2015 The Regents of the University of California +// +// This file is part of Qbox +// +// Qbox is distributed under the terms of the GNU General Public License +// as published by the Free Software Foundation, either version 2 of +// the License, or (at your option) any later version. +// See the file COPYING in the root directory of this distribution +// or . +// +//////////////////////////////////////////////////////////////////////////////// +// +// IterCmd.h +// +//////////////////////////////////////////////////////////////////////////////// + +#ifndef ITERCMD_H +#define ITERCMD_H + +#include +#include +#include +#include + +#include "Sample.h" + +class IterCmd : public Var +{ + Sample *s; + + public: + + const char *name ( void ) const { return "iter_cmd"; }; + + int set ( int argc, char **argv ) + { + if ( !strcmp(argv[1],"NULL") ) + { + // reset iter_cmd to empty string + s->ctrl.iter_cmd.clear(); + return 0; + } + + // include all arguments until \n + string v; + for ( int i = 1; i < argc; i++ ) + { + if ( i > 1 ) + v += " "; + v += argv[i]; + } + + s->ctrl.iter_cmd = v; + + return 0; + } + + string print (void) const + { + ostringstream st; + st.setf(ios::left,ios::adjustfield); + st << setw(10) << name() << " = "; + st.setf(ios::right,ios::adjustfield); + st << s->ctrl.iter_cmd; + return st.str(); + } + + IterCmd(Sample *sample) : s(sample) { s->ctrl.iter_cmd = ""; }; +}; +#endif diff --git a/src/IterCmdPeriod.h b/src/IterCmdPeriod.h new file mode 100644 index 0000000..c83fffd --- /dev/null +++ b/src/IterCmdPeriod.h @@ -0,0 +1,69 @@ +//////////////////////////////////////////////////////////////////////////////// +// +// Copyright (c) 2015 The Regents of the University of California +// +// This file is part of Qbox +// +// Qbox is distributed under the terms of the GNU General Public License +// as published by the Free Software Foundation, either version 2 of +// the License, or (at your option) any later version. +// See the file COPYING in the root directory of this distribution +// or . +// +//////////////////////////////////////////////////////////////////////////////// +// +// IterCmdPeriod.h +// +//////////////////////////////////////////////////////////////////////////////// + +#ifndef ITERCMDPERIOD_H +#define ITERCMDPERIOD_H + +#include +#include +#include +#include + +#include "Sample.h" + +class IterCmdPeriod : public Var +{ + Sample *s; + + public: + + const char *name ( void ) const { return "iter_cmd_period"; }; + + int set ( int argc, char **argv ) + { + if ( argc != 2 ) + { + if ( ui->onpe0() ) + cout << " iter_cmd_period must be a positive integer" << endl; + return 1; + } + + int v = atoi(argv[1]); + if ( v <= 0 ) + { + if ( ui->onpe0() ) + cout << " iter_cmd_period must be a positive integer" << endl; + return 1; + } + s->ctrl.iter_cmd_period = v; + return 0; + } + + string print (void) const + { + ostringstream st; + st.setf(ios::left,ios::adjustfield); + st << setw(10) << name() << " = "; + st.setf(ios::right,ios::adjustfield); + st << setw(10) << s->ctrl.iter_cmd_period; + return st.str(); + } + + IterCmdPeriod(Sample *sample) : s(sample) { s->ctrl.iter_cmd_period = 1; }; +}; +#endif diff --git a/src/JDWavefunctionStepper.C b/src/JDWavefunctionStepper.C index 10745ba..292248a 100644 --- a/src/JDWavefunctionStepper.C +++ b/src/JDWavefunctionStepper.C @@ -81,6 +81,8 @@ void JDWavefunctionStepper::update(Wavefunction& dwf) tmap_["jd_residual"].stop(); // dwf.sd->c() now contains the descent direction (HV-VA) + + // update preconditioner prec_.update(wf_); tmap_["jd_compute_z"].start(); @@ -215,45 +217,67 @@ void JDWavefunctionStepper::update(Wavefunction& dwf) // (Y,HY) // factor 2.0 in next line: G and -G + tmap_["jd_gemm"].start(); a.gemm('t','n',2.0,c_proxy_t,cp_proxy_t,0.0); + tmap_["jd_gemm"].stop(); // rank-1 update correction a.ger(-1.0,c_proxy_t,0,cp_proxy_t,0); // a contains (Y,HY), copy to h11 block + tmap_["jd_getsub"].start(); h.getsub(a,a.m(),a.n(),0,0,0,0); + tmap_["jd_getsub"].stop(); // (Z,HY) // factor 2.0 in next line: G and -G + tmap_["jd_gemm"].start(); a.gemm('t','n',2.0,c_proxy,cp_proxy_t,0.0); + tmap_["jd_gemm"].stop(); // rank-1 update correction a.ger(-1.0,c_proxy,0,cp_proxy_t,0); // a contains (Z,HY), copy to h21 block + tmap_["jd_getsub"].start(); h.getsub(a,a.m(),a.n(),0,0,a.m(),0); + tmap_["jd_getsub"].stop(); // (Z,HZ) // factor 2.0 in next line: G and -G + tmap_["jd_gemm"].start(); a.gemm('t','n',2.0,c_proxy,cp_proxy,0.0); + tmap_["jd_gemm"].stop(); // rank-1 update correction a.ger(-1.0,c_proxy,0,cp_proxy,0); // a contains (Z,HZ), copy to h22 block + tmap_["jd_getsub"].start(); h.getsub(a,a.m(),a.n(),0,0,a.m(),a.n()); + tmap_["jd_getsub"].stop(); // diagonalize h // Note: we only need the first n eigenvectors of the (2n x 2n) matrix valarray w(h.m()); // q is (2n,2n) DoubleMatrix q(h.context(),h.n(),h.n(),h.nb(),h.nb()); + tmap_["jd_syev"].start(); h.syev('l',w,q); + tmap_["jd_syev"].stop(); // compute the first n eigenvectors and store in wf // Y = Z Q21 (store result in dwf) // get Q21 in a + tmap_["jd_getsub"].start(); a.getsub(q,a.n(),a.n(),a.n(),0); + tmap_["jd_getsub"].stop(); + tmap_["jd_gemm"].start(); cp_proxy.gemm('n','n',1.0,c_proxy,a,0.0); + tmap_["jd_gemm"].stop(); // Y = Y Q11 (store result in wf) // get Q11 in a + tmap_["jd_getsub"].start(); a.getsub(q,a.n(),a.n(),0,0); + tmap_["jd_getsub"].stop(); + tmap_["jd_gemm"].start(); c_proxy.gemm('n','n',1.0,c_proxy_t,a,0.0); + tmap_["jd_gemm"].stop(); // add two contributions c_proxy += cp_proxy; @@ -276,37 +300,59 @@ void JDWavefunctionStepper::update(Wavefunction& dwf) // (Y,HY) // factor 2.0 in next line: G and -G + tmap_["jd_gemm"].start(); a.gemm('c','n',1.0,ct,cpt,0.0); + tmap_["jd_gemm"].stop(); // a contains (Y,HY), copy to h11 block + tmap_["jd_getsub"].start(); h.getsub(a,a.m(),a.n(),0,0,0,0); + tmap_["jd_getsub"].stop(); // (Z,HY) + tmap_["jd_gemm"].start(); a.gemm('c','n',1.0,c,cpt,0.0); + tmap_["jd_gemm"].stop(); // a contains (Z,HY), copy to h21 block + tmap_["jd_getsub"].start(); h.getsub(a,a.m(),a.n(),0,0,a.m(),0); + tmap_["jd_getsub"].stop(); // (Z,HZ) + tmap_["jd_gemm"].start(); a.gemm('c','n',1.0,c,cp,0.0); + tmap_["jd_gemm"].stop(); // a contains (Z,HZ), copy to h22 block + tmap_["jd_getsub"].start(); h.getsub(a,a.m(),a.n(),0,0,a.m(),a.n()); + tmap_["jd_getsub"].stop(); // diagonalize h // Note: we only need the first n eigenvectors of the (2n x 2n) matrix valarray w(h.m()); // q is (2n,2n) ComplexMatrix q(h.context(),h.n(),h.n(),h.nb(),h.nb()); + tmap_["jd_heev"].start(); h.heev('l',w,q); + tmap_["jd_heev"].stop(); // compute the first n eigenvectors and store in wf // Y = Z Q21 (store result in dwf) // get Q21 in a + tmap_["jd_getsub"].start(); a.getsub(q,a.n(),a.n(),a.n(),0); + tmap_["jd_getsub"].stop(); + tmap_["jd_gemm"].start(); cp.gemm('n','n',1.0,c,a,0.0); + tmap_["jd_gemm"].stop(); // Y = Y Q11 (store result in wf) // get Q11 in a + tmap_["jd_getsub"].start(); a.getsub(q,a.n(),a.n(),0,0); + tmap_["jd_getsub"].stop(); + tmap_["jd_gemm"].start(); c.gemm('n','n',1.0,ct,a,0.0); + tmap_["jd_gemm"].stop(); // add two contributions c += cp; diff --git a/src/KPGridConnectivity.C b/src/KPGridConnectivity.C deleted file mode 100644 index cab6cb8..0000000 --- a/src/KPGridConnectivity.C +++ /dev/null @@ -1,1002 +0,0 @@ -//////////////////////////////////////////////////////////////////////////////// -// -// Copyright (c) 2008 The Regents of the University of California -// -// This file is part of Qbox -// -// Qbox is distributed under the terms of the GNU General Public License -// as published by the Free Software Foundation, either version 2 of -// the License, or (at your option) any later version. -// See the file COPYING in the root directory of this distribution -// or . -// -//////////////////////////////////////////////////////////////////////////////// -// -// KPGridConnectivity.C -// -//////////////////////////////////////////////////////////////////////////////// - -#include "KPGridConnectivity.h" -using namespace std; - -#define TAG_Overlaps_first_kx 6 -#define TAG_Overlaps_first_ky 7 -#define TAG_Overlaps_first_kz 8 -#define TAG_Overlaps_second_kx 9 -#define TAG_Overlaps_second_ky 10 -#define TAG_Overlaps_second_kz 11 -#define TAG_Overlaps_local 12 - -//////////////////////////////////////////////////////////////////////////////// -KPConnectivity::KPConnectivity(const Sample& s) :s_(s), kdist_tol_(1.e-5) -{ -#ifdef DEBUG - int onpe0 = ( ( s_.ctxt_.myrow()==0 ) && ( s_.ctxt_.mycol()==0 ) ); - if ( onpe0 ) - cout << "Building kp grid connection for quad HF exchange convergence\n"; -#endif - - // get number of kpoints - nkpoints_ = s_.wf.nkp(); - comm_ = s_.wf.sd(0,0)->context().comm(); - - // get maximum number of local states - int nStateLoc_= s_.wf.sd(0,0)->nstloc(); - MPI_Allreduce(&nStateLoc_,&nStateMax_,1,MPI_INT,MPI_MAX,comm_); - - // allocate memory for kpoint weights - weight_.resize(nkpoints_); - - // allocate memory for the list of integrals - integral_kx_.resize(nkpoints_); - integral_ky_.resize(nkpoints_); - integral_kz_.resize(nkpoints_); - - // allocate memory for the list of neighbours - first_neighbour_kx_.resize(nkpoints_); - first_neighbour_ky_.resize(nkpoints_); - first_neighbour_kz_.resize(nkpoints_); - second_neighbour_kx_.resize(nkpoints_); - second_neighbour_ky_.resize(nkpoints_); - second_neighbour_kz_.resize(nkpoints_); - - // allocate memory for the symmetry tags - first_symmetric_kx_.resize(nkpoints_); - first_symmetric_ky_.resize(nkpoints_); - first_symmetric_kz_.resize(nkpoints_); - second_symmetric_kx_.resize(nkpoints_); - second_symmetric_ky_.resize(nkpoints_); - second_symmetric_kz_.resize(nkpoints_); - - // allocate memory for the index of overlaps - local_ig_overlap_.resize(nkpoints_); - first_ig_overlap_kx_.resize(nkpoints_); - first_ig_overlap_ky_.resize(nkpoints_); - first_ig_overlap_kz_.resize(nkpoints_); - second_ig_overlap_kx_.resize(nkpoints_); - second_ig_overlap_ky_.resize(nkpoints_); - second_ig_overlap_kz_.resize(nkpoints_); - - // allocate memory for the index of overlaps - first_T_overlap_kx_.resize(nkpoints_); - first_T_overlap_ky_.resize(nkpoints_); - first_T_overlap_kz_.resize(nkpoints_); - second_T_overlap_kx_.resize(nkpoints_); - second_T_overlap_ky_.resize(nkpoints_); - second_T_overlap_kz_.resize(nkpoints_); - - // allocate memory for the distances - first_distance_kx_.resize(nkpoints_); - first_distance_ky_.resize(nkpoints_); - first_distance_kz_.resize(nkpoints_); - second_distance_kx_.resize(nkpoints_); - second_distance_ky_.resize(nkpoints_); - second_distance_kz_.resize(nkpoints_); - - // allocate memory for the overlaps - local_overlap_.resize(nkpoints_*nStateMax_); - first_overlap_kx_.resize(nkpoints_*nStateMax_); - first_overlap_ky_.resize(nkpoints_*nStateMax_); - first_overlap_kz_.resize(nkpoints_*nStateMax_); - second_overlap_kx_.resize(nkpoints_*nStateMax_); - second_overlap_ky_.resize(nkpoints_*nStateMax_); - second_overlap_kz_.resize(nkpoints_*nStateMax_); - - // allocate memory for the communications - local_send_buff_.resize(nStateMax_); - first_send_buff_kx_.resize(nStateMax_); - first_send_buff_ky_.resize(nStateMax_); - first_send_buff_kz_.resize(nStateMax_); - second_send_buff_kx_.resize(nStateMax_); - second_send_buff_ky_.resize(nStateMax_); - second_send_buff_kz_.resize(nStateMax_); - - // get the dimensionality of the kpoint grid - DimX_=0; - DimY_=0; - DimZ_=0; - for ( int iKp=0; iKpkdist_tol_ ) DimX_=1; - if ( abs(dk.y)>kdist_tol_ ) DimY_=1; - if ( abs(dk.z)>kdist_tol_ ) DimZ_=1; - - // test diference with symmetric - dk = s_.wf.kpoint(iKp) + s_.wf.kpoint(0); - if ( abs(dk.x)>kdist_tol_ ) DimX_=1; - if ( abs(dk.y)>kdist_tol_ ) DimY_=1; - if ( abs(dk.z)>kdist_tol_ ) DimZ_=1; - } - - cell_moved(); -} - -//////////////////////////////////////////////////////////////////////////////// -void KPConnectivity::cell_moved(void) -{ - // recompute all quantities that depend on the cell dimensions - - int onpe0 = ( ( s_.ctxt_.myrow()==0 ) && ( s_.ctxt_.mycol()==0 ) ); - // get the length of each vector of the reciprocal cell - double length_kx=length(s_.wf.cell().b(0)); - double length_ky=length(s_.wf.cell().b(1)); - double length_kz=length(s_.wf.cell().b(2)); - - // get the connectivity of the grid - for ( int iKpi=0; iKpikdist_tol_ || - fabs(second_distance_ky_[iKpi]+first_distance_ky_[iKpi])>kdist_tol_ || - fabs(second_distance_kz_[iKpi]+first_distance_kz_[iKpi])>kdist_tol_) - { - if ( onpe0 ) - { - cout << " Found an irregular grid at kpoint " - << iKpi << ": cannot use connectivity.\n"; - cout << " distances kx: " << second_distance_kx_[iKpi] - << " " << first_distance_kx_[iKpi] << " neigbour kx:" - << second_neighbour_kx_[iKpi] << " " - << first_neighbour_kx_[iKpi] << "\n"; - cout << " distances ky: " << second_distance_ky_[iKpi] - << " " << first_distance_ky_[iKpi] << " neigbour ky:" - << second_neighbour_ky_[iKpi] << " " - << first_neighbour_ky_[iKpi] << "\n"; - cout << " distances kz: " << second_distance_kz_[iKpi] - << " " << first_distance_kz_[iKpi] << " neigbour kz:" - << second_neighbour_kz_[iKpi] << " " - << first_neighbour_kz_[iKpi] << "\n"; - } - ConnectivityComplete_=0; - return; - } - } - - // compute the weight of this kpoint - // this weight should be non zero only for kpoints - // taken into account during the computation - if ( s_.wf.weight(iKpi)!=0.0 ) - { - weight_[iKpi]=fabs((first_distance_kx_[iKpi]-second_distance_kx_[iKpi]) * - (first_distance_ky_[iKpi]-second_distance_ky_[iKpi]) * - (first_distance_ky_[iKpi]-second_distance_ky_[iKpi]))/ - (8.0 * length_kx * length_ky * length_kz); - } - else - { - weight_[iKpi]=0.0; - } - -#ifdef DEBUG - if ( onpe0 ) - cout << "Weight kpoint " << iKpi << " = " << weight_[iKpi] << "\n"; -#endif - } - - // compute the value of the integral of v^2/(x^2+y^2+z^2) for v = {x,y,z}, - // normed to a unitary volume of integration (deltaX*deltaY*deltaZ = 1). - double deltaX=fabs(first_distance_kx_[0]-second_distance_kx_[0]); - double deltaY=fabs(first_distance_ky_[0]-second_distance_ky_[0]); - double deltaZ=fabs(first_distance_kz_[0]-second_distance_kz_[0]); - double volume=deltaX*deltaY*deltaZ; - deltaX=deltaX/pow(volume,3.0); - deltaY=deltaY/pow(volume,3.0); - deltaZ=deltaZ/pow(volume,3.0); - - double xmin=-deltaX/2.0; - double xmax= deltaX/2.0; - double ymin=-deltaY/2.0; - double ymax= deltaY/2.0; - double zmin=-deltaZ/2.0; - double zmax= deltaZ/2.0; - - // estimate numerically the integral - // start by creating a grid - int n=1000; - vector x(n); - vector y(n); - vector z(n); - - for ( int i=0; ilocalsize(); - for ( int ig = 0; ig < ngloc; ig++ ) - { - // get the corresponding index - int ix = vbasis_->idx(3*ig+0); - int iy = vbasis_->idx(3*ig+1); - int iz = vbasis_->idx(3*ig+2); - if ( -1 <= ix && ix <= 1 && - -1 <= iy && iy <= 1 && - -1 <= iz && iz <= 1 ) - indices[1+ix][1+iy][1+iz]=ig; - } - // set the indices of the overlap for each kpoint - for ( int iKpi=0; iKpifirst_neighbour_kx_[iKpi] || first_symmetric_kx_[iKpi] ) ) - { - int ix = -(int)first_T_overlap_kx_[iKpi].x; - int iy = -(int)first_T_overlap_kx_[iKpi].y; - int iz = -(int)first_T_overlap_kx_[iKpi].z; - assert(ix*ix<2 && iy*iy<2 && iz*iz<2); - first_ig_overlap_kx_[iKpi] = indices[ix+1][iy+1][iz+1]; - } - else - { - int ix = (int)first_T_overlap_kx_[iKpi].x; - int iy = (int)first_T_overlap_kx_[iKpi].y; - int iz = (int)first_T_overlap_kx_[iKpi].z; - assert(ix*ix<2 && iy*iy<2 && iz*iz<2); - first_ig_overlap_kx_[iKpi] = indices[ix+1][iy+1][iz+1]; - } - - if ( !( iKpi>first_neighbour_ky_[iKpi] || first_symmetric_ky_[iKpi] ) ) - { - int ix = -(int)first_T_overlap_ky_[iKpi].x; - int iy = -(int)first_T_overlap_ky_[iKpi].y; - int iz = -(int)first_T_overlap_ky_[iKpi].z; - assert(ix*ix<2 && iy*iy<2 && iz*iz<2); - first_ig_overlap_ky_[iKpi] = indices[ix+1][iy+1][iz+1]; - } - else - { - int ix = (int)first_T_overlap_ky_[iKpi].x; - int iy = (int)first_T_overlap_ky_[iKpi].y; - int iz = (int)first_T_overlap_ky_[iKpi].z; - assert(ix*ix<2 && iy*iy<2 && iz*iz<2); - first_ig_overlap_ky_[iKpi] = indices[ix+1][iy+1][iz+1]; - } - - if ( !( iKpi>first_neighbour_kz_[iKpi] || first_symmetric_kz_[iKpi] ) ) - { - int ix = -(int)first_T_overlap_kz_[iKpi].x; - int iy = -(int)first_T_overlap_kz_[iKpi].y; - int iz = -(int)first_T_overlap_kz_[iKpi].z; - assert(ix*ix<2 && iy*iy<2 && iz*iz<2); - first_ig_overlap_kz_[iKpi] = indices[ix+1][iy+1][iz+1]; - } - else - { - int ix = (int)first_T_overlap_kz_[iKpi].x; - int iy = (int)first_T_overlap_kz_[iKpi].y; - int iz = (int)first_T_overlap_kz_[iKpi].z; - assert(ix*ix<2 && iy*iy<2 && iz*iz<2); - first_ig_overlap_kz_[iKpi] = indices[ix+1][iy+1][iz+1]; - } - - // second neighbors - if ( !( iKpi>second_neighbour_kx_[iKpi] || second_symmetric_kx_[iKpi] ) ) - { - int ix = -(int)second_T_overlap_kx_[iKpi].x; - int iy = -(int)second_T_overlap_kx_[iKpi].y; - int iz = -(int)second_T_overlap_kx_[iKpi].z; - assert(ix*ix<2 && iy*iy<2 && iz*iz<2); - second_ig_overlap_kx_[iKpi] = indices[ix+1][iy+1][iz+1]; - } - else - { - int ix = (int)second_T_overlap_kx_[iKpi].x; - int iy = (int)second_T_overlap_kx_[iKpi].y; - int iz = (int)second_T_overlap_kx_[iKpi].z; - assert(ix*ix<2 && iy*iy<2 && iz*iz<2); - second_ig_overlap_kx_[iKpi] = indices[ix+1][iy+1][iz+1]; - } - - if ( !( iKpi>second_neighbour_ky_[iKpi] || second_symmetric_ky_[iKpi] ) ) - { - int ix = -(int)second_T_overlap_ky_[iKpi].x; - int iy = -(int)second_T_overlap_ky_[iKpi].y; - int iz = -(int)second_T_overlap_ky_[iKpi].z; - assert(ix*ix<2 && iy*iy<2 && iz*iz<2); - second_ig_overlap_ky_[iKpi] = indices[ix+1][iy+1][iz+1]; - } - else - { - int ix = (int)second_T_overlap_ky_[iKpi].x; - int iy = (int)second_T_overlap_ky_[iKpi].y; - int iz = (int)second_T_overlap_ky_[iKpi].z; - assert(ix*ix<2 && iy*iy<2 && iz*iz<2); - second_ig_overlap_ky_[iKpi] = indices[ix+1][iy+1][iz+1]; - } - - if ( !( iKpi>second_neighbour_kz_[iKpi] || second_symmetric_kz_[iKpi] ) ) - { - int ix = -(int)second_T_overlap_kz_[iKpi].x; - int iy = -(int)second_T_overlap_kz_[iKpi].y; - int iz = -(int)second_T_overlap_kz_[iKpi].z; - assert(ix*ix<2 && iy*iy<2 && iz*iz<2); - second_ig_overlap_kz_[iKpi] = indices[ix+1][iy+1][iz+1]; - } - else - { - int ix = (int)second_T_overlap_kz_[iKpi].x; - int iy = (int)second_T_overlap_kz_[iKpi].y; - int iz = (int)second_T_overlap_kz_[iKpi].z; - assert(ix*ix<2 && iy*iy<2 && iz*iz<2); - second_ig_overlap_kz_[iKpi] = indices[ix+1][iy+1][iz+1]; - } - } -} - -//////////////////////////////////////////////////////////////////////////////// -void KPConnectivity::InitOverlaps() -{ - for ( int iKpi=0; iKpi *valueDirect, complex *valueSymmetric, - double occupation) -{ - // fast return - if ( occupation==0.0 ) return; - - // local overlap - if ( iKpi==iKpj && local_ig_overlap_[iKpi] >= 0 ) - local_overlap_[iKpi*nStateMax_+iLocStatei] += - norm(valueDirect[ local_ig_overlap_[iKpi] ]) * occupation; - - // case of kpx neighbour - if ( first_neighbour_kx_[iKpi]==iKpj && first_ig_overlap_kx_[iKpi]>=0 ) - { - // store overlap value for both states - first_overlap_kx_[iKpi*nStateMax_+iLocStatei] += - ( first_symmetric_kx_[iKpi] ? - norm(valueSymmetric[ first_ig_overlap_kx_[iKpi] ]) : - norm(valueDirect[ first_ig_overlap_kx_[iKpi] ]) ) * occupation; - } - - if ( second_neighbour_kx_[iKpi]==iKpj && second_ig_overlap_kx_[iKpi]>=0 ) - { - // store overlap value for both states - second_overlap_kx_[iKpi*nStateMax_+iLocStatei] += - ( second_symmetric_kx_[iKpi] ? - norm(valueSymmetric[ second_ig_overlap_kx_[iKpi] ]) : - norm(valueDirect[ second_ig_overlap_kx_[iKpi] ]) ) * occupation; - } - - // case of kpy neighbour - if ( first_neighbour_ky_[iKpi]==iKpj && first_ig_overlap_ky_[iKpi]>=0 ) - { - // store overlap value for both states - first_overlap_ky_[iKpi*nStateMax_+iLocStatei] += - ( first_symmetric_ky_[iKpi] ? - norm(valueSymmetric[ first_ig_overlap_ky_[iKpi] ]) : - norm(valueDirect[ first_ig_overlap_ky_[iKpi] ]) ) * occupation; - } - - if ( second_neighbour_ky_[iKpi]==iKpj && second_ig_overlap_ky_[iKpi]>=0 ) - { - // store overlap value for both states - second_overlap_ky_[iKpi*nStateMax_+iLocStatei] += - ( second_symmetric_ky_[iKpi] ? - norm(valueSymmetric[ second_ig_overlap_ky_[iKpi] ]) : - norm(valueDirect[ second_ig_overlap_ky_[iKpi] ]) ) * occupation; - } - - // case of kpz neighbour - if ( first_neighbour_kz_[iKpi]==iKpj && first_ig_overlap_kz_[iKpi]>=0 ) - { - // store overlap value for both states - first_overlap_kz_[iKpi*nStateMax_+iLocStatei] += - ( first_symmetric_kz_[iKpi] ? - norm(valueSymmetric[ first_ig_overlap_kz_[iKpi] ]) : - norm(valueDirect[ first_ig_overlap_kz_[iKpi] ]) ) * occupation; - } - - if ( second_neighbour_kz_[iKpi]==iKpj && second_ig_overlap_kz_[iKpi]>=0 ) - { - // store overlap value for both states - second_overlap_kz_[iKpi*nStateMax_+iLocStatei] += - ( second_symmetric_kz_[iKpi] ? - norm(valueSymmetric[ second_ig_overlap_kz_[iKpi] ]) : - norm(valueDirect[ second_ig_overlap_kz_[iKpi] ]) ) * occupation; - } -} -#undef norm - -// array permutation -//////////////////////////////////////////////////////////////////////////////// -void KPConnectivity::StartPermutation(int iKp, int iSendTo, int iRecvFr) -{ - // local overlaps - // copy overlaps into send buffer - for ( int i=0; i. -// -//////////////////////////////////////////////////////////////////////////////// -// -// KPGridConnectivity.h -// -//////////////////////////////////////////////////////////////////////////////// - -#include -#include -#include "Sample.h" -#include "SlaterDet.h" - -using namespace std; - -#ifndef KPGRIDCONNECTIVITY_H -#define KPGRIDCONNECTIVITY_H - -class KPConnectivity -{ - private: - - const Sample &s_; - int DimX_; - int DimY_; - int DimZ_; - int nkpoints_; - int nStateMax_; - int ConnectivityComplete_; - double kdist_tol_; - - std::vector first_neighbour_kx_; - std::vector first_neighbour_ky_; - std::vector first_neighbour_kz_; - std::vector second_neighbour_kx_; - std::vector second_neighbour_ky_; - std::vector second_neighbour_kz_; - - std::vector first_symmetric_kx_; - std::vector first_symmetric_ky_; - std::vector first_symmetric_kz_; - std::vector second_symmetric_kx_; - std::vector second_symmetric_ky_; - std::vector second_symmetric_kz_; - - std::vector local_ig_overlap_; - std::vector first_ig_overlap_kx_; - std::vector first_ig_overlap_ky_; - std::vector first_ig_overlap_kz_; - std::vector second_ig_overlap_kx_; - std::vector second_ig_overlap_ky_; - std::vector second_ig_overlap_kz_; - - std::vector first_distance_kx_; - std::vector first_distance_ky_; - std::vector first_distance_kz_; - std::vector second_distance_kx_; - std::vector second_distance_ky_; - std::vector second_distance_kz_; - - std::vector local_overlap_; - std::vector first_overlap_kx_; - std::vector first_overlap_ky_; - std::vector first_overlap_kz_; - std::vector second_overlap_kx_; - std::vector second_overlap_ky_; - std::vector second_overlap_kz_; - - std::vector local_send_buff_; - std::vector first_send_buff_kx_; - std::vector first_send_buff_ky_; - std::vector first_send_buff_kz_; - std::vector second_send_buff_kx_; - std::vector second_send_buff_ky_; - std::vector second_send_buff_kz_; - - std::vector integral_kx_; - std::vector integral_ky_; - std::vector integral_kz_; - - std::vector weight_; - - std::vector first_T_overlap_kx_; - std::vector first_T_overlap_ky_; - std::vector first_T_overlap_kz_; - std::vector second_T_overlap_kx_; - std::vector second_T_overlap_ky_; - std::vector second_T_overlap_kz_; - - MPI_Comm comm_; - - MPI_Request send_request_local_; - MPI_Request send_request_first_kx_; - MPI_Request send_request_first_ky_; - MPI_Request send_request_first_kz_; - MPI_Request send_request_second_kx_; - MPI_Request send_request_second_ky_; - MPI_Request send_request_second_kz_; - - MPI_Request recv_request_local_; - MPI_Request recv_request_first_kx_; - MPI_Request recv_request_first_ky_; - MPI_Request recv_request_first_kz_; - MPI_Request recv_request_second_kx_; - MPI_Request recv_request_second_ky_; - MPI_Request recv_request_second_kz_; - - public: - - // constructor - KPConnectivity(const Sample& s_); - - // destructor - ~KPConnectivity(); - - // methods - int Connection() { return ConnectivityComplete_; } - void InitOverlaps(); - void SetOverlapIndices(Basis *vbasis_); - void AddOverlap(int iKpi, int iKpj, int iLocStatei, - complex *valueDirect, complex *valueSymmetric, - double occupation); - void cell_moved(void); - void StartPermutation(int iKp, int iSendTo, int iRecvFr); - void EndPermutation(); - - double overlaps_local(int iKp, int iLocState); - double overlaps_first_kx(int iKp, int iLocState); - double overlaps_first_ky(int iKp, int iLocState); - double overlaps_first_kz(int iKp, int iLocState); - double overlaps_second_kx(int iKp, int iLocState); - double overlaps_second_ky(int iKp, int iLocState); - double overlaps_second_kz(int iKp, int iLocState); - - double distance_first_kx(int iKp); - double distance_first_ky(int iKp); - double distance_first_kz(int iKp); - double distance_second_kx(int iKp); - double distance_second_ky(int iKp); - double distance_second_kz(int iKp); - - double integral_kx(int iKp); - double integral_ky(int iKp); - double integral_kz(int iKp); - - double weight(int iKp); -}; -#endif diff --git a/src/KpointCmd.h b/src/KpointCmd.h index 0abe08e..8f6c149 100644 --- a/src/KpointCmd.h +++ b/src/KpointCmd.h @@ -15,7 +15,6 @@ // KpointCmd.h: // //////////////////////////////////////////////////////////////////////////////// -// $Id: KpointCmd.h,v 1.3 2008-09-08 15:56:18 fgygi Exp $ #ifndef KPOINTCMD_H #define KPOINTCMD_H @@ -40,6 +39,7 @@ class KpointCmd : public Cmd " syntax:\n\n" " kpoint add kx ky kz weight\n" " kpoint delete kx ky kz \n" + " kpoint move kx ky kz new_kx new_ky new_kz\n" " kpoint list\n\n"; } @@ -80,6 +80,22 @@ class KpointCmd : public Cmd double kz = atof(argv[4]); s->wf.del_kpoint(D3vector(kx,ky,kz)); } + else if ( subcmd == "move" ) + { + if ( argc != 8 ) + { + if ( onpe0 ) + cout << help_msg(); + return 1; + } + double kx = atof(argv[2]); + double ky = atof(argv[3]); + double kz = atof(argv[4]); + double newkx = atof(argv[5]); + double newky = atof(argv[6]); + double newkz = atof(argv[7]); + s->wf.move_kpoint(D3vector(kx,ky,kz),D3vector(newkx,newky,newkz)); + } else if ( subcmd == "list" ) { if ( argc != 2 ) @@ -90,7 +106,7 @@ class KpointCmd : public Cmd } if ( onpe0 ) { - cout << " <-- kpoint list: reciprocal lattice coordinates" << endl; + cout << "