mirror of
https://github.com/boostorg/odeint.git
synced 2025-05-09 23:24:01 +00:00
simd performance now uses aligned memory
This commit is contained in:
parent
da5388eb87
commit
430fe8183b
@ -10,6 +10,9 @@
|
||||
# you also need NT2s SIMD libary available set the include path here:
|
||||
# SIMD_INCLUDE = /path/to/simd/include
|
||||
|
||||
INCLUDES += -I$(BOOST_ROOT) -I${SIMD_INCLUDE}
|
||||
|
||||
# INTEL COMPILER
|
||||
# change this if you want to cross-compile
|
||||
ARCH = Host
|
||||
# ARCH = AVX
|
||||
@ -17,6 +20,14 @@ ARCH = Host
|
||||
|
||||
CXX = icpc
|
||||
CC = icpc
|
||||
|
||||
INCLUDES += -I../../include/ -I$(BOOST_ROOT) -I${SIMD_INCLUDE}
|
||||
CXXFLAGS = -Ofast -x${ARCH} -fno-alias -ip -inline-forceinline -std=c++0x -DNDEBUG ${INCLUDES}
|
||||
|
||||
|
||||
# GCC COMPILER
|
||||
# change this if you want to cross-compile
|
||||
# ARCH = native
|
||||
# # ARCH = core-avx-i
|
||||
|
||||
# CXX = g++
|
||||
# CC = g++
|
||||
# CXXFLAGS = -O3 -ffast-math -mtune=${ARCH} -march=${ARCH} -std=c++0x ${INCLUDES}
|
||||
|
@ -21,19 +21,23 @@
|
||||
#include <boost/numeric/odeint.hpp>
|
||||
#include <boost/simd/sdk/simd/pack.hpp>
|
||||
#include <boost/simd/sdk/simd/io.hpp>
|
||||
#include <boost/simd/memory/allocator.hpp>
|
||||
#include <boost/simd/include/functions/splat.hpp>
|
||||
#include <boost/simd/include/functions/plus.hpp>
|
||||
#include <boost/simd/include/functions/multiplies.hpp>
|
||||
|
||||
|
||||
namespace odeint = boost::numeric::odeint;
|
||||
namespace simd = boost::simd;
|
||||
|
||||
typedef boost::timer timer_type;
|
||||
|
||||
static const size_t dim = 3; // roessler is 3D
|
||||
|
||||
typedef boost::simd::pack<double> simd_pack;
|
||||
typedef simd::pack<double> simd_pack;
|
||||
typedef boost::array<simd_pack, dim> state_type;
|
||||
typedef std::vector<state_type> state_vec;
|
||||
// use the simd allocator to get properly aligned memory
|
||||
typedef std::vector< state_type, simd::allocator< state_type > > state_vec;
|
||||
|
||||
static const size_t pack_size = simd_pack::static_size;
|
||||
|
||||
|
@ -28,20 +28,28 @@ t_intel = [get_runtime_from_file("perf_workbook/odeint_rk4_array_intel.perf"),
|
||||
get_runtime_from_file("perf_ariel/odeint_rk4_array_intel.perf"),
|
||||
get_runtime_from_file("perf_lyra/odeint_rk4_array_intel.perf")]
|
||||
|
||||
t_gfort = [get_runtime_from_file("perf_workbook/odeint_rk4_array_gfort.perf"),
|
||||
get_runtime_from_file("perf_ariel/odeint_rk4_array_gfort.perf"),
|
||||
get_runtime_from_file("perf_lyra/odeint_rk4_array_gfort.perf")]
|
||||
t_gfort = [get_runtime_from_file("perf_workbook/rk4_gfort.perf"),
|
||||
get_runtime_from_file("perf_ariel/rk4_gfort.perf"),
|
||||
get_runtime_from_file("perf_lyra/rk4_gfort.perf")]
|
||||
|
||||
t_c_intel = [get_runtime_from_file("perf_workbook/rk4_c_intel.perf"),
|
||||
get_runtime_from_file("perf_ariel/rk4_c_intel.perf"),
|
||||
get_runtime_from_file("perf_lyra/rk4_c_intel.perf")]
|
||||
|
||||
print t_c_intel
|
||||
|
||||
|
||||
ind = np.arange(3) # the x locations for the groups
|
||||
width = 0.2 # the width of the bars
|
||||
width = 0.15 # the width of the bars
|
||||
|
||||
fig = plt.figure()
|
||||
ax = fig.add_subplot(111)
|
||||
rects1 = ax.bar(ind, t_gcc, width, color='b', label="odeint gcc")
|
||||
rects2 = ax.bar(ind+width, t_intel, width, color='g', label="odeint intel")
|
||||
rects3 = ax.bar(ind+2*width, t_gfort, width, color='c', label="gfort")
|
||||
rects3 = ax.bar(ind+2*width, t_c_intel, width, color='y', label="C intel")
|
||||
rects4 = ax.bar(ind+3*width, t_gfort, width, color='c', label="gfort")
|
||||
|
||||
ax.axis([-width, 2.0+4*width, 0.0, 0.85])
|
||||
ax.axis([-width, 2.0+5*width, 0.0, 0.85])
|
||||
ax.set_ylabel('Runtime (s)')
|
||||
ax.set_title('Performance for integrating the Lorenz system')
|
||||
ax.set_xticks(ind + 1.5*width)
|
||||
@ -51,6 +59,6 @@ ax.set_xticklabels(('Core i5-3210M\n3.1 GHz',
|
||||
ax.legend(loc='upper left', prop={'size': 16})
|
||||
|
||||
plt.savefig("perf.pdf")
|
||||
plt.savefig("perf.png")
|
||||
plt.savefig("perf.png", dpi=50)
|
||||
|
||||
plt.show()
|
||||
|
Loading…
x
Reference in New Issue
Block a user