acou3d.cpp   acou3d.cpp 
#define BZ_DISABLE_RESTRICT
#include <blitz/array.h> #include <blitz/array.h>
#include <blitz/traversal.h> #include <blitz/traversal.h>
#include <blitz/timer.h> #include <blitz/timer.h>
#include <fstream>
#ifdef BZ_HAVE_STD
#include <fstream>
#else
#include <fstream.h>
#endif
BZ_USING_NAMESPACE(blitz) BZ_USING_NAMESPACE(blitz)
#ifdef BZ_FORTRAN_SYMBOLS_WITH_TRAILING_UNDERSCORES #if defined(BZ_FORTRAN_SYMBOLS_WITH_TRAILING_UNDERSCORES)
#define acoustic3d_f90 acoustic3d_f90_ #define acoustic3d_f90 acoustic3d_f90_
#define acoustic3d_f77 acoustic3d_f77_ #define acoustic3d_f77 acoustic3d_f77_
#define acoustic3d_f90tuned acoustic3d_f90tuned_ #define acoustic3d_f90tuned acoustic3d_f90tuned_
#define acoustic3d_f77tuned acoustic3d_f77tuned_ #define acoustic3d_f77tuned acoustic3d_f77tuned_
#endif #elif defined(BZ_FORTRAN_SYMBOLS_WITH_DOUBLE_TRAILING_UNDERSCORES)
#define acoustic3d_f90 acoustic3d_f90__
#ifdef BZ_FORTRAN_SYMBOLS_CAPS #define acoustic3d_f77 acoustic3d_f77__
#define acoustic3d_f90tuned acoustic3d_f90tuned__
#define acoustic3d_f77tuned acoustic3d_f77tuned__
#elif defined(BZ_FORTRAN_SYMBOLS_CAPS)
#define acoustic3d_f90 ACOUSTIC3D_F90 #define acoustic3d_f90 ACOUSTIC3D_F90
#define acoustic3d_f77 ACOUSTIC3D_F77 #define acoustic3d_f77 ACOUSTIC3D_F77
#define acoustic3d_f90tuned ACOUSTIC3D_F90TUNED #define acoustic3d_f90tuned ACOUSTIC3D_F90TUNED
#define acoustic3d_f77tuned ACOUSTIC3D_F77TUNED #define acoustic3d_f77tuned ACOUSTIC3D_F77TUNED
#endif #endif
extern "C" { extern "C" {
void acoustic3d_f90(int& N, int& niters, float& check); void acoustic3d_f90(int& N, int& niters, float& check);
void acoustic3d_f77(int& N, int& niters, float& check); void acoustic3d_f77(int& N, int& niters, float& check);
void acoustic3d_f90tuned(int& N, int& niters, float& check); void acoustic3d_f90tuned(int& N, int& niters, float& check);
skipping to change at line 41 skipping to change at line 51
float acoustic3D_BlitzCycled(int N, int niters); float acoustic3D_BlitzCycled(int N, int niters);
float acoustic3D_BlitzStencil(int N, int niters); float acoustic3D_BlitzStencil(int N, int niters);
int main() int main()
{ {
Timer timer; Timer timer;
int N = 112; int N = 112;
int niters = 210; // Must be divisible by 3 for tuned Fortran versio ns int niters = 210; // Must be divisible by 3 for tuned Fortran versio ns
float check; float check;
generateFastTraversalOrder(TinyVector<int,2>(N-2,N-2)); cout << "Acoustic 3D Benchmark" << endl << endl;
timer.start(); double Mflops = (N-2)*(N-2)*(N-2) * 11.0 * niters / 1.0e+6;
acoustic3d_f90(N, niters, check);
timer.stop();
cout << "Fortran 90: " << timer.elapsedSeconds() << " s check = "
<< check << endl;
timer.start(); generateFastTraversalOrder(TinyVector<int,2>(N-2,N-2));
acoustic3d_f77(N, niters, check);
timer.stop();
cout << "Fortran 77: " << timer.elapsedSeconds() << " s check = "
<< check << endl;
timer.start();
acoustic3d_f90tuned(N, niters, check);
timer.stop();
cout << "Fortran 90 (tuned): " << timer.elapsedSeconds() << " s check
= "
<< check << endl;
timer.start(); timer.start();
acoustic3d_f77tuned(N, niters, check); check = acoustic3D_BlitzRaw(N, niters);
timer.stop(); timer.stop();
cout << "Fortran 77 (tuned): " << timer.elapsedSeconds() << " s check cout << "Blitz++ (raw): " << timer.elapsedSeconds() << " s check =
= " "
<< check << endl; << check << " Mflops = " << (Mflops/timer.elapsedSeconds())
<< endl << endl;
timer.start(); timer.start();
check = acoustic3D_BlitzRaw(N, niters); check = acoustic3D_BlitzStencil(N, niters);
timer.stop(); timer.stop();
cout << "Blitz++ (raw): " << timer.elapsedSeconds() << " s check = cout << "Blitz++ (stencil): " << timer.elapsedSeconds()
" << " s check = " << check
<< check << endl; << " Mflops = " << (Mflops/timer.elapsedSec
onds())
<< endl << endl;
#if 0 #if 0
timer.start(); timer.start();
check = acoustic3D_BlitzInterlaced(N, niters, c); check = acoustic3D_BlitzInterlaced(N, niters, c);
timer.stop(); timer.stop();
cout << "Blitz++ (interlaced): " << timer.elapsedSeconds() << " s c heck = " cout << "Blitz++ (interlaced): " << timer.elapsedSeconds() << " s c heck = "
<< check << endl; << check << endl;
#endif #endif
timer.start(); timer.start();
check = acoustic3D_BlitzCycled(N, niters); check = acoustic3D_BlitzCycled(N, niters);
timer.stop(); timer.stop();
cout << "Blitz++ (cycled): " << timer.elapsedSeconds() << " s check = " cout << "Blitz++ (cycled): " << timer.elapsedSeconds() << " s check = "
<< check << endl; << check << " Mflops = " << (Mflops/timer.elapsedSeconds())
<< endl << endl;
timer.start(); timer.start();
check = acoustic3D_BlitzInterlacedCycled(N, niters); check = acoustic3D_BlitzInterlacedCycled(N, niters);
timer.stop(); timer.stop();
cout << "Blitz++ (interlaced & cycled): " << timer.elapsedSeconds() cout << "Blitz++ (interlaced & cycled): " << timer.elapsedSeconds()
<< " s check = " << check << endl; << " s check = " << check
<< " Mflops = " << (Mflops/timer.elapsedSec
onds())
<< endl << endl;
#ifdef FORTRAN_90
timer.start(); timer.start();
check = acoustic3D_BlitzStencil(N, niters); acoustic3d_f90(N, niters, check);
timer.stop(); timer.stop();
cout << "Blitz++ (stencil): " << timer.elapsedSeconds() cout << "Fortran 90: " << timer.elapsedSeconds() << " s check = "
<< " s check = " << check << endl; << check << " Mflops = " << (Mflops/timer.elapsedSeconds())
<< endl << endl;
timer.start();
acoustic3d_f90tuned(N, niters, check);
timer.stop();
cout << "Fortran 90 (tuned): " << timer.elapsedSeconds() << " s check
= "
<< check << " Mflops = " << (Mflops/timer.elapsedSeconds())
<< endl << endl;
#endif
timer.start();
acoustic3d_f77(N, niters, check);
timer.stop();
cout << "Fortran 77: " << timer.elapsedSeconds() << " s check = "
<< check << " Mflops = " << (Mflops/timer.elapsedSeconds())
<< endl << endl;
timer.start();
acoustic3d_f77tuned(N, niters, check);
timer.stop();
cout << "Fortran 77 (tuned): " << timer.elapsedSeconds() << " s check
= "
<< check << " Mflops = " << (Mflops/timer.elapsedSeconds())
<< endl << endl;
return 0; return 0;
} }
void setupInitialConditions(Array<float,3>& P1, Array<float,3>& P2, void setupInitialConditions(Array<float,3>& P1, Array<float,3>& P2,
Array<float,3>& P3, Array<float,3>& c, int N); Array<float,3>& P3, Array<float,3>& c, int N);
void snapshot(const Array<float,3>& P, const Array<float,3>& c); void snapshot(const Array<float,3>& P, const Array<float,3>& c);
void checkArray(const Array<float,3>& A, int N); void checkArray(const Array<float,3>& A, int N);
void setupInitialConditions(Array<float,3>& P1, Array<float,3>& P2, void setupInitialConditions(Array<float,3>& P1, Array<float,3>& P2,
Array<float,3>& P3, Array<float,3>& c, int N) Array<float,3>& P3, Array<float,3>& c, int N)
{ {
// Set the velocity field // Set the velocity field
c(Range(0,N/2-1), Range::all(), Range::all()) = 0.05; c(Range(0,N/2-1), Range::all(), Range::all()) = 0.05;
c(Range(N/2,N-1), Range::all(), Range::all()) = 0.3; c(Range(N/2,N-1), Range::all(), Range::all()) = 0.3;
int cavityLeft = 3*N/7.0-1; double Nfp = static_cast<double>(N);
int cavityRight = 4*N/7.0-1; int cavityLeft = static_cast<int>(3*Nfp/7-1);
int cavityFront = 3*N/7.0-1; int cavityRight = static_cast<int>(4*Nfp/7-1);
int cavityBack = 4*N/7.0-1; int cavityFront = static_cast<int>(3*Nfp/7-1);
int cavityTop = 5*N/7.0-1; int cavityBack = static_cast<int>(4*Nfp/7-1);
int cavityBottom = 6*N/7.0-1; int cavityTop = static_cast<int>(5*Nfp/7-1);
int cavityBottom = static_cast<int>(6*Nfp/7-1);
c(Range(cavityTop,cavityBottom),Range(cavityLeft,cavityRight), c(Range(cavityTop,cavityBottom),Range(cavityLeft,cavityRight),
Range(cavityFront,cavityBack)) = 0.02; Range(cavityFront,cavityBack)) = 0.02;
int cavityTop2 = 1*N/7.0-1; int cavityTop2 = static_cast<int>(1*Nfp/7-1);
int cavityBottom2 = 2*N/7.0-1; int cavityBottom2 = static_cast<int>(2*Nfp/7-1);
c(Range(cavityTop2,cavityBottom2),Range(cavityLeft,cavityRight), c(Range(cavityTop2,cavityBottom2),Range(cavityLeft,cavityRight),
Range(cavityFront,cavityBack)) = 0.001; Range(cavityFront,cavityBack)) = 0.001;
// Initial pressure distribution // Initial pressure distribution
BZ_USING_NAMESPACE(blitz::tensor) BZ_USING_NAMESPACE(blitz::tensor)
float ci = N/2-1; float ci = N/2-1;
float cj = N/2-1; float cj = N/2-1;
float ck = N/2-1; float ck = N/2-1;
float s2 = 64.0 * 9.0 / pow2(N/2.0); float s2 = 64.0 * 9.0 / pow2(N/2.0);
P1 = 0.0; P1 = 0.0;
skipping to change at line 182 skipping to change at line 209
float Pmin = -0.2; float Pmin = -0.2;
float PScale = 1.0/0.4; float PScale = 1.0/0.4;
float VScale = 0.5; float VScale = 0.5;
ofs << "P" << snapshotNum << " = [ "; ofs << "P" << snapshotNum << " = [ ";
for (int i=0; i < N; ++i) for (int i=0; i < N; ++i)
{ {
for (int j=0; j < N; ++j) for (int j=0; j < N; ++j)
{ {
float value = (P(i,j,k)-Pmin)*PScale + c(i,j,k)*VScale; float value = (P(i,j,k)-Pmin)*PScale + c(i,j,k)*VScale;
int r = value * 4096; int r = static_cast<int>(value * 4096);
ofs << r << " "; ofs << r << " ";
} }
if (i < N-1) if (i < N-1)
ofs << ";" << endl; ofs << ";" << endl;
} }
ofs << "];" << endl; ofs << "];" << endl;
} }
 End of changes. 19 change blocks. 
45 lines changed or deleted 74 lines changed or added

This html diff was produced by rfcdiff 1.41. The latest version is available from http://tools.ietf.org/tools/rfcdiff/