Commit a734fb1a by Francois Gygi

Added APC counters


git-svn-id: http://qboxcode.org/svn/qb/trunk@384 cba15fb0-1239-40c8-b417-11db7ca47a34
parent 56f893f6
......@@ -3,7 +3,7 @@
// BOSampleStepper.C
//
////////////////////////////////////////////////////////////////////////////////
// $Id: BOSampleStepper.C,v 1.21 2005-02-04 21:59:55 fgygi Exp $
// $Id: BOSampleStepper.C,v 1.22 2005-04-26 19:06:46 fgygi Exp $
#include "BOSampleStepper.h"
#include "EnergyFunctional.h"
......@@ -20,6 +20,10 @@
#include "Preconditioner.h"
#include "AndersonMixer.h"
#ifdef USE_APC
#include "apc.h"
#endif
#include <iostream>
#include <iomanip>
using namespace std;
......@@ -145,6 +149,9 @@ void BOSampleStepper::step(int niter)
// ionic iteration
tm_iter.start();
#ifdef USE_APC
ApcStart(1);
#endif
if ( s_.ctxt_.onpe0() )
cout << " <iteration count=\"" << iter+1 << "\">\n";
......@@ -549,9 +556,7 @@ void BOSampleStepper::step(int niter)
if ( compute_eigvec || s_.ctrl.wf_diag == "EIGVAL" )
{
energy = ef_.energy(true,dwf,false,fion,false,sigma_eks);
tmap["diag"].start();
s_.wf.diag(dwf,compute_eigvec);
tmap["diag"].stop();
}
// update occupation numbers
......@@ -596,6 +601,9 @@ void BOSampleStepper::step(int niter)
}
}
#ifdef USE_APC
ApcStop(1);
#endif
// print iteration time
double time = tm_iter.real();
double tmin = time;
......
......@@ -3,11 +3,12 @@
# bgl.mk
#
#-------------------------------------------------------------------------------
# $Id: bgl.mk,v 1.1 2005-03-17 23:14:41 fgygi Exp $
# $Id: bgl.mk,v 1.2 2005-04-26 19:08:12 fgygi Exp $
#
PLT=BGL
#-------------------------------------------------------------------------------
BGL_ROOT=/bgl/BlueLight/ppcfloor
BGL_SYS=$(BGL_ROOT)/bglsys
LIBS_MPI += -L $(BGL_ROOT)/bglsys/lib -lmpich.rts \
-lmsglayer.rts -lrts.rts -ldevices.rts
......@@ -20,12 +21,18 @@
PLTFLAGS += -DUSE_FFTW \
-DUSE_MPI -DSCALAPACK \
-D__linux__ -DPLT_BIG_ENDIAN -DUSE_XERCES \
-DUSE_CSTDIO_LFS -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64
-DUSE_CSTDIO_LFS -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 \
-DUSE_APC
FFTWDIR=$(HOME)/software/fftw/bgl/bglfftwgel-2.1.5.pre5
FFTWINCLUDEDIR=$(FFTWDIR)/fftw
FFTWLIBDIR=$(FFTWDIR)/fftw/.libs
APCDIR=$(HOME)/Ctools/counters/bglusr
APCINCLUDEDIR=$(APCDIR)/include
APCLIBDIR=$(APCDIR)/lib
APCLIB=-L$(APCLIBDIR) -lapc.rts -L$(BGL_SYS)/lib -lbgl_perfctr.rts
XERCESCDIR=$(HOME)/software/xml/xerces-c-src_2_6_0
XERCESCLIBDIR=$(XERCESCDIR)/lib
......@@ -34,16 +41,18 @@
BLASDIR=$(HOME)/software/blas/lib
INCLUDE = -I$(XERCESCDIR)/include \
-I$(FFTWINCLUDEDIR) -I$(BGL_ROOT)/bglsys/include
-I$(FFTWINCLUDEDIR) -I$(BGL_ROOT)/bglsys/include \
-I$(APCINCLUDEDIR)
CXXFLAGS= -g -O3 -qarch=440 -D$(PLT) $(INCLUDE) $(PLTFLAGS) $(DFLAGS)
LIBPATH = -L$(FFTWLIBDIR) \
-L$(BLASDIR) -L$(XERCESCLIBDIR) \
-L/opt/ibmcmp/xlf/9.1/blrts_lib
-L/opt/ibmcmp/xlf/9.1/blrts_lib -L$(APCLIBDIR)
LIBS = $(PLIBS) -lfftw $(BLASLIB) -lg2c \
-lxlf90 -lxlopt -lxlomp_ser -lxl -lxlfmath -lmassv -lxerces-c
-lxlf90 -lxlopt -lxlomp_ser -lxl -lxlfmath -lmassv -lxerces-c \
$(APCLIB)
LDFLAGS = $(LIBPATH) $(LIBS) $(LIBS_MPI)
......
......@@ -3,7 +3,7 @@
// qb.C
//
////////////////////////////////////////////////////////////////////////////////
// $Id: qb.C,v 1.43 2005-03-17 17:16:54 fgygi Exp $
// $Id: qb.C,v 1.44 2005-04-26 19:08:46 fgygi Exp $
#include <iostream>
#include <string>
......@@ -16,6 +16,9 @@ using namespace std;
#if AIX
#include<filehdr.h>
#endif
#ifdef USE_APC
#include "apc.h"
#endif
#include "isodate.h"
#include "release.h"
......@@ -79,6 +82,9 @@ int main(int argc, char **argv, char **envp)
#if USE_MPI
MPI_Init(&argc,&argv);
#endif
#if USE_APC
ApcInit();
#endif
#if BGLDEBUG
{
......@@ -126,7 +132,7 @@ int main(int argc, char **argv, char **envp)
// Identify executable name, checksum, size and link date
if ( getlogin() != 0 )
cout << "<user> " << getlogin() << " </user>" << endl;
#if AIX
#if AIX || OSF1
// read filehdr for link time
filehdr hdr;
FILE *fx = fopen(argv[0],"r");
......@@ -261,6 +267,9 @@ int main(int argc, char **argv, char **envp)
}
} // end of Context scope
#if USE_APC
ApcFinalize();
#endif
#if USE_MPI
MPI_Finalize();
#endif
......
......@@ -11,6 +11,10 @@ using namespace std;
#include "FourierTransform.h"
#include "Timer.h"
#if USE_APC
#include "apc.h"
#endif
int fft_flops(int n)
{
return 5.0 * n * log((double) n) / log(2.0);
......@@ -22,6 +26,9 @@ int main(int argc, char **argv)
#if USE_MPI
MPI_Init(&argc,&argv);
#endif
#if USE_APC
ApcInit();
#endif
// extra scope to ensure that Context objects get destructed before
// the MPI_Finalize call
{
......@@ -119,7 +126,13 @@ int main(int argc, char **argv)
tm.reset();
ft2.reset_timers();
tm.start();
#if USE_APC
ApcStart(1);
#endif
ft2.forward(&f2[0],&x[0]);
#if USE_APC
ApcStop(1);
#endif
tm.stop();
cout << " fwd1: vgrid->wf" << endl;
cout << " fwd1: tm_f_fft: " << ft2.tm_f_fft.real() << endl;
......@@ -140,7 +153,13 @@ int main(int argc, char **argv)
tm.reset();
ft2.reset_timers();
tm.start();
#if USE_APC
ApcStart(2);
#endif
ft2.backward(&x[0],&f2[0]);
#if USE_APC
ApcStop(2);
#endif
tm.stop();
cout << " bwd1: wf->vgrid" << endl;
cout << " bwd1: tm_b_fft: " << ft2.tm_b_fft.real() << endl;
......@@ -161,7 +180,13 @@ int main(int argc, char **argv)
tm.reset();
ft2.reset_timers();
tm.start();
#if USE_APC
ApcStart(3);
#endif
ft2.forward(&f2[0],&x[0]);
#if USE_APC
ApcStop(3);
#endif
tm.stop();
cout << " fwd2: vgrid->wf" << endl;
cout << " fwd2: tm_f_fft: " << ft2.tm_f_fft.real() << endl;
......@@ -185,7 +210,13 @@ int main(int argc, char **argv)
tm.reset();
ft2.reset_timers();
tm.start();
#if USE_APC
ApcStart(4);
#endif
ft2.backward(&x[0],&f2[0]);
#if USE_APC
ApcStop(4);
#endif
tm.stop();
cout << " bwd2: wf->vgrid" << endl;
cout << " bwd2: tm_b_fft: " << ft2.tm_b_fft.real() << endl;
......@@ -210,7 +241,13 @@ int main(int argc, char **argv)
tm.reset();
ft2.reset_timers();
tm.start();
#if USE_APC
ApcStart(5);
#endif
ft2.forward(&f2[0],&x1[0],&x2[0]);
#if USE_APC
ApcStop(5);
#endif
tm.stop();
cout << " fwd3: vgrid->wf double transform" << endl;
cout << " fwd3: tm_f_fft: " << ft2.tm_f_fft.real() << endl;
......@@ -231,7 +268,13 @@ int main(int argc, char **argv)
tm.reset();
ft2.reset_timers();
tm.start();
#if USE_APC
ApcStart(6);
#endif
ft2.backward(&x1[0],&x2[0],&f2[0]);
#if USE_APC
ApcStop(6);
#endif
tm.stop();
cout << " bwd3: wf->vgrid double transform" << endl;
cout << " bwd3: tm_b_fft: " << ft2.tm_b_fft.real() << endl;
......@@ -267,7 +310,13 @@ int main(int argc, char **argv)
tm.reset();
vft.reset_timers();
tm.start();
#if USE_APC
ApcStart(7);
#endif
vft.forward(&vf[0],&vg[0]);
#if USE_APC
ApcStop(7);
#endif
tm.stop();
cout << " fwd4: vgrid->v(g)" << endl;
cout << " fwd4: tm_b_fft: " << vft.tm_b_fft.real() << endl;
......@@ -288,7 +337,13 @@ int main(int argc, char **argv)
tm.reset();
vft.reset_timers();
tm.start();
#if USE_APC
ApcStart(8);
#endif
vft.backward(&vg[0],&vf[0]);
#if USE_APC
ApcStop(8);
#endif
tm.stop();
cout << " bwd4: v(g)->vgrid" << endl;
cout << " bwd4: tm_b_fft: " << vft.tm_b_fft.real() << endl;
......@@ -384,6 +439,9 @@ int main(int argc, char **argv)
} // Context scope
#if USE_APC
ApcFinalize();
#endif
#if USE_MPI
MPI_Finalize();
#endif
......
......@@ -6,13 +6,6 @@
#include "Timer.h"
#ifdef IA32
#include "readTSC.h"
#else
long long readTSC(void) { return 0; }
#endif
long long clk, clk_bwd, clk_fwd;
#include <iostream>
#include <complex>
#include <valarray>
......@@ -21,17 +14,31 @@ using namespace std;
#include "fftw.h"
#ifdef IA32
#include "readTSC.h"
long long clk, clk_bwd, clk_fwd;
#endif
#if USE_APC
#include "apc.h"
#endif
int main(int argc, char**argv)
{
#if USE_APC
ApcInit();
#endif
const int niter = 10;
const int np = atoi(argv[1]);
const int nvec = atoi(argv[2]);
const int ldz = np + 4;
const int ldz = np + 1;
fftw_plan fwplan, bwplan;
// resize array zvec holding columns
valarray<complex<double> > zvec(nvec * ldz);
//cout << "zvec ptr: " << &zvec[0] << endl;
// initialization of FFT libs
......@@ -48,12 +55,14 @@ int main(int argc, char**argv)
Timer t_fwd,t_bwd;
#ifdef IA32
clk_bwd = 0;
clk_fwd = 0;
#endif
for ( int iter = 0; iter < niter; iter++ )
{
t_bwd.start();
/*
* void fftw(fftw_plan plan, int howmany,
* FFTW_COMPLEX *in, int istride, int idist,
......@@ -62,17 +71,36 @@ int main(int argc, char**argv)
int ntrans = nvec;
int inc1 = 1;
int inc2 = ldz;
#ifdef IA32
clk = readTSC();
#endif
#if USE_APC
ApcStart(1);
#endif
fftw(bwplan,ntrans,(FFTW_COMPLEX*)&zvec[0],inc1,inc2,
(FFTW_COMPLEX*)0,0,0);
#if USE_APC
ApcStop(1);
#endif
#ifdef IA32
clk_bwd += readTSC() - clk;
#endif
t_bwd.stop();
t_fwd.start();
#ifdef IA32
clk = readTSC();
#endif
#if USE_APC
ApcStart(2);
#endif
fftw(fwplan,ntrans,(FFTW_COMPLEX*)&zvec[0],inc1,inc2,
(FFTW_COMPLEX*)0,0,0);
#if USE_APC
ApcStop(2);
#endif
#ifdef IA32
clk_fwd += readTSC() - clk;
#endif
t_fwd.stop();
}
......@@ -83,15 +111,21 @@ int main(int argc, char**argv)
#if FFTWMEASURE
<< "(fftw-measure)"
#endif
<< ": " << 1.e6*t_fwd.real()/(niter*nvec) << " microseconds"
<< " " << clk_fwd/(niter*nvec) << " cycles" << endl;
<< ": " << 1.e6*t_fwd.real()/(niter*nvec) << " microseconds"
#ifdef IA32
<< " " << clk_fwd/(niter*nvec) << " cycles"
#endif
<< endl;
cout << " bwd: time per transform (in-place,generic)"
#if FFTWMEASURE
<< "(fftw-measure)"
#endif
<< ": " << 1.e6*t_bwd.real()/(niter*nvec) << " microseconds"
<< " " << clk_bwd/(niter*nvec) << " cycles" << endl;
<< ": " << 1.e6*t_bwd.real()/(niter*nvec) << " microseconds"
#ifdef IA32
<< " " << clk_bwd/(niter*nvec) << " cycles"
#endif
<< endl;
#if 1
// Use out-of-place, specific plan
......@@ -107,8 +141,10 @@ int main(int argc, char**argv)
FFTW_BACKWARD,FFTW_ESTIMATE|FFTW_OUT_OF_PLACE,
(FFTW_COMPLEX*)&zvec[0],1,(FFTW_COMPLEX*)&zvec_out[0],1);
#ifdef IA32
clk_bwd = 0;
clk_fwd = 0;
#endif
for ( int iter = 0; iter < niter; iter++ )
{
......@@ -116,17 +152,37 @@ int main(int argc, char**argv)
int inc1 = 1;
int inc2 = ldz;
t_bwd.start();
clk = readTSC();
#ifdef IA32
clk = readTSC();
#endif
#if USE_APC
ApcStart(3);
#endif
fftw(bwplan,ntrans,(FFTW_COMPLEX*)&zvec[0],inc1,inc2,
(FFTW_COMPLEX*)&zvec_out[0],inc1,inc2);
clk_bwd += readTSC() - clk;
#if USE_APC
ApcStop(3);
#endif
#ifdef IA32
clk_bwd += readTSC() - clk;
#endif
t_bwd.stop();
t_fwd.start();
clk = readTSC();
#ifdef IA32
clk = readTSC();
#endif
#if USE_APC
ApcStart(4);
#endif
fftw(fwplan,ntrans,(FFTW_COMPLEX*)&zvec[0],inc1,inc2,
(FFTW_COMPLEX*)&zvec_out[0],inc1,inc2);
clk_fwd += readTSC() - clk;
#if USE_APC
ApcStop(4);
#endif
#ifdef IA32
clk_fwd += readTSC() - clk;
#endif
t_fwd.stop();
}
......@@ -138,15 +194,24 @@ int main(int argc, char**argv)
#if FFTWMEASURE
<< "(fftw-measure)"
#endif
<< ": " << 1.e6*t_fwd.real()/(niter*nvec) << " microseconds"
<< " " << clk_bwd/(niter*nvec) << " cycles" << endl;
<< ": " << 1.e6*t_fwd.real()/(niter*nvec) << " microseconds"
#ifdef IA32
<< " " << clk_fwd/(niter*nvec) << " cycles"
#endif
<< endl;
cout << " bwd: time per transform (out-of-place,specific)"
#if FFTWMEASURE
<< "(fftw-measure)"
#endif
<< ": " << 1.e6*t_bwd.real()/(niter*nvec) << " microseconds"
<< " " << clk_bwd/(niter*nvec) << " cycles" << endl;
<< ": " << 1.e6*t_bwd.real()/(niter*nvec) << " microseconds"
#ifdef IA32
<< " " << clk_bwd/(niter*nvec) << " cycles"
#endif
<< endl;
#endif
#if USE_APC
ApcFinalize();
#endif
return 0;
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment