Test codes
einspline : each creates its own einspline engine
einspline_smp: one einspline engine is shared among threads
model name : Intel(R) Xeon(R) CPU E5450 @ 3.00GHzcpu MHz : 1998.000
cache size : 6144 KB
#einspline benchmark grid = 48 48 48 num_splines = 128 num_samples = 512 iterations = 10 number of operations in millions
#MPI = 1 OMP_NUM_THREADS = 8
# mpi openmp datatype value_op vgl_op vgh_op value_time vgl_time vgh_time
einspline 1 8 double 1.7260872571e+00 1.2830702944e+00 9.0044690421e-01 3.7967953086e-02 5.1077482104e-02 7.2781637311e-02
einspline 1 8 single 3.4774082427e+00 2.5799094819e+00 2.1541640811e+00 1.8846219778e-02 2.5402441621e-02 3.0422937870e-02
einspline 1 8 d-complex 8.8070116029e-01 6.6905884027e-01 5.0425915747e-01 7.4413436651e-02 9.7952520847e-02 1.2996491790e-01
einspline 1 8 s-complex 1.7699565770e+00 1.3577521490e+00 1.1522053788e+00 3.7026897073e-02 4.8268014193e-02 5.6878748536e-02
Initialization = 1.7413839102e+01
Total time = 7.1226139069e+00
real 0m24.744s
user 1m12.078s
sys 0m0.645s
[jnkim@simxeon icc]$ time bin/einspline -i 10
#einspline benchmark grid = 48 48 48 num_splines = 128 num_samples = 512 iterations = 10 number of operations in millions
#MPI = 1 OMP_NUM_THREADS = 8
# mpi openmp datatype value_op vgl_op vgh_op value_time vgl_time vgh_time
einspline 1 8 double 1.6264771222e+00 1.4034680775e+00 1.3961064948e+00 4.0293219686e-02 4.6695753932e-02 4.6941977739e-02
einspline 1 8 single 3.4518989569e+00 2.7460278927e+00 2.6929439071e+00 1.8985491991e-02 2.3865744472e-02 2.4336192012e-02
einspline 1 8 d-complex 9.1103207389e-01 7.6430576730e-01 7.3123864183e-01 7.1935996413e-02 8.5745787621e-02 8.9623272419e-02
einspline 1 8 s-complex 1.7490727233e+00 1.3974193550e+00 1.3865715188e+00 3.7468996644e-02 4.6897876263e-02 4.7264781594e-02
real 1m30.848s
user 11m39.168s
sys 0m12.785s
[jnkim@simxeon icc]$ sth4
[jnkim@simxeon icc]$ time bin/einspline_smp -i 10
#einspline benchmark grid = 48 48 48 num_splines = 128 num_samples = 512 iterations = 10 number of operations in millions
#MPI = 1 OMP_NUM_THREADS = 4
# mpi openmp datatype value_op vgl_op vgh_op value_time vgl_time vgh_time
einspline 1 4 double 3.0769573391e+00 2.2520140469e+00 1.5271970539e+00 2.1298962831e-02 2.9101061821e-02 4.2912602425e-02
einspline 1 4 single 5.8639775735e+00 4.4393466408e+00 3.6404873128e+00 1.1176031828e-02 1.4762532711e-02 1.8001985550e-02
einspline 1 4 d-complex 1.6493272298e+00 1.2284598659e+00 7.9961312644e-01 3.9734989405e-02 5.3348100185e-02 8.1959635019e-02
einspline 1 4 s-complex 3.1637499602e+00 2.4234459864e+00 2.0247456992e+00 2.0714658499e-02 2.7042484283e-02 3.2367521524e-02
Initialization = 1.7408624887e+01
Total time = 4.1715650558e+00
real 0m21.739s
user 0m32.937s
sys 0m0.574s
[jnkim@simxeon icc]$ time bin/einspline -i 10
#einspline benchmark grid = 48 48 48 num_splines = 128 num_samples = 512 iterations = 10 number of operations in millions
#MPI = 1 OMP_NUM_THREADS = 4
# mpi openmp datatype value_op vgl_op vgh_op value_time vgl_time vgh_time
einspline 1 4 double 3.0950414773e+00 2.5525078101e+00 2.4183752073e+00 2.1174514294e-02 2.5675141811e-02 2.7099186182e-02
einspline 1 4 single 6.4742658912e+00 4.9731899624e+00 4.5755161405e+00 1.0122537613e-02 1.3177859783e-02 1.4323192835e-02
einspline 1 4 d-complex 1.7043845978e+00 1.3788397400e+00 1.1306969275e+00 3.8451415300e-02 4.7529816628e-02 5.7960712910e-02
einspline 1 4 s-complex 3.0175624056e+00 2.5897466145e+00 2.4621232880e+00 2.1718192101e-02 2.5305950642e-02 2.6617676020e-02
real 0m46.649s
user 2m59.389s
sys 0m3.634s
[jnkim@simxeon icc]$ sth1
[jnkim@simxeon icc]$ time bin/einspline_smp -i 10
#einspline benchmark grid = 48 48 48 num_splines = 128 num_samples = 512 iterations = 10 number of operations in millions
#MPI = 1 OMP_NUM_THREADS = 1
# mpi openmp datatype value_op vgl_op vgh_op value_time vgl_time vgh_time
einspline 1 1 double 5.6900408404e+00 3.9057973134e+00 3.0634608293e+00 1.1517667770e-02 1.6779160500e-02 2.1392798424e-02
einspline 1 1 single 1.0555379181e+01 6.9701925374e+00 5.6394813259e+00 6.2087774277e-03 9.4023227692e-03 1.1620926857e-02
einspline 1 1 d-complex 3.0464710844e+00 2.0829812058e+00 1.3647943481e+00 2.1512103081e-02 3.1462597847e-02 4.8018956184e-02
einspline 1 1 s-complex 5.6986994341e+00 3.8918451143e+00 3.0759390195e+00 1.1500167847e-02 1.6839313507e-02 2.1306014061e-02
Initialization = 1.7252362967e+01
Total time = 2.2776720524e+00
real 0m19.782s
user 0m19.124s
sys 0m0.530s
[jnkim@simxeon icc]$ time bin/einspline -i 10
#einspline benchmark grid = 48 48 48 num_splines = 128 num_samples = 512 iterations = 10 number of operations in millions
#MPI = 1 OMP_NUM_THREADS = 1
# mpi openmp datatype value_op vgl_op vgh_op value_time vgl_time vgh_time
einspline 1 1 double 5.0518904658e+00 3.8952037522e+00 3.0648100194e+00 1.2972569466e-02 1.6824793816e-02 2.1383380890e-02
einspline 1 1 single 1.0546024376e+01 6.9927829632e+00 5.6323284951e+00 6.2142848969e-03 9.3719482422e-03 1.1635684967e-02
einspline 1 1 d-complex 2.9347356598e+00 2.0848122376e+00 1.3684879305e+00 2.2331142426e-02 3.1434965134e-02 4.7889351845e-02
einspline 1 1 s-complex 5.5835560695e+00 3.8830047598e+00 3.0682207348e+00 1.1737322807e-02 1.6877651215e-02 2.1359610558e-02
real 0m19.935s
user 0m19.329s
sys 0m0.529s