Re: [eigen] Performance regression with Matrix4f multiplication? |
[ Thread Index |
Date Index
| More lists.tuxfamily.org/eigen Archives
]
- To: eigen <eigen@xxxxxxxxxxxxxxxxxxx>
- Subject: Re: [eigen] Performance regression with Matrix4f multiplication?
- From: Gael Guennebaud <gael.guennebaud@xxxxxxxxx>
- Date: Sun, 26 Nov 2017 21:35:02 +0100
- Dkim-signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20161025; h=mime-version:in-reply-to:references:from:date:message-id:subject:to; bh=YPG7ZcCb9x+3i8bKWJdGcTyx8YOLR95eOKhiuQKhBK8=; b=uBmKlONmRHhmZPmDkWQH99ZdvAwS5PItZeyC0iC9dEqTx05jNLagr/IlaKBaUxNjN2 hVWzhXB0xd98UWQ7AO091xsfCl+kU0jkf1ZOBT7BL7aTfmR049mD4nTYTHY065rLGaJn HqivopanReSWGG6lBOxTW6Bt4N+5Kp4jdscnVYmMQK9pRrJhYi53gXrb8a1mLCe4LvEp naCm4jzoxJ5eoW3+Hcpk/wl2yE9j1lg3h0XIeM29goCT5ORlHYWzfc8HXPUh98RVcYwE zeZ/1dzUNGGVqt99orodJDEUEn/oGrMl5bIl9M7KU1TxCHFnT/vtDQPBrd7u7eWRCplb teCw==
In some cases your loop gets over-optimized by the compiler leading to inconsistent results depending on compiler version and flags. See attached file for a more correct version. Also, better use 3.3.4 than 3.3.0.
gael
#include <Eigen/Core>
#include <benchmark/benchmark.h>
static const int num_iterations = 1000;
template<typename A, typename B, typename C>
EIGEN_DONT_INLINE
void prod(const A& a, const B& b, C& c)
{
c.noalias() += a * b;
}
template<class T>
static inline void BM_EigenMatrix4(benchmark::State& state) {
Eigen::Matrix<T, 4, 4> mat1 = Eigen::Matrix<T, 4, 4>::Random(4, 4);
Eigen::Matrix<T, 4, 4> mat2 = Eigen::Matrix<T, 4, 4>::Random(4, 4);
Eigen::Matrix<T, 4, 4> mat3;
for (auto _ : state) {
for (int i = 0; i < num_iterations; ++i) {
prod(mat1, mat2, mat3);
}
}
}
BENCHMARK_TEMPLATE(BM_EigenMatrix4, float);
BENCHMARK_TEMPLATE(BM_EigenMatrix4, double);
BENCHMARK_MAIN();