Re: [eigen] Optimization advice for a specific expression |
[ Thread Index |
Date Index
| More lists.tuxfamily.org/eigen Archives
]
--=-=-=
Content-Type: text/plain; charset=utf-8
Content-Disposition: inline
Content-Transfer-Encoding: quoted-printable
Christoph Hertzberg writes:
> On 2016-02-04 17:02, Alberto Luaces wrote:
>>> Other than that: * What exactly do you mean by homogeneous?
>>
>> G =3D [ v3x3 | 0]
>> [ 0=E1=B5=80 | 1]
>>
>> where "v3x3" is a 3x3 matrix, and "0" a vector of 3 zero-valued elements.
>>
>>> Can you make any assumptions on the determinant of G? (Calculating the
>>> det didn't not seem to be the bottleneck, though)
>>
>> No, I cannot, but as you point, removing the determinant computation for
>> testing purposes does not yield any improvement.
>
> Given the above, you have:
> G.determinant() =3D=3D v3x3.determinant(),
> which should be significantly faster than the full determinant.
> If v3x3 happens to be a rotation matrix, G.determinant()=3D=3D1;
>
>>> Does it have any special form (e.g. low rank, or Identity+lowRank)
>>
>> It is just symmetric:
>>
>> Eplus << 2 , 1 , 1 , 5,
>> 1 , 2 , 1 , 5,
>> 1 , 1 , 2 , 5,
>> 5 , 5 , 5 , 20;
>
> The upper left is Identity + [1,1,1]^T*[1,1,1]. I'll abbreviate V=3Dv3x3.
> If you then define w=3DV*[1,1,1]^T =3D=3D V.rowwise().sum(), the resulting
> summand is (without the determinant factor):
> [ V*V^T + w*w^T, 5*w ]
> [ 5*w^T , 20 ]
>
> Again, if V happens to be a rotation, then V*V^T=3DIdentity
>
> So with a bit of hand-coding you may gain a lot efficiency here. It
> may be tricky to get it right that vectorization can still be
> used. E.g., instead of V.rowwise().sum() you can calculate Vector4d
> w=3DG.leftCols<3>().rowwise().sum(), which should be near optimal
> [again, check the generated assembly]
> Multiplication by 5 can be done at the end (if I4x4 initially is 0),
> and the lower right element is just 20*{sum of determinants}.
Thank you, Christoph: it certainly works better now, although it seems
that there exist some differences between Eigen versions. I have
summarized them in the following table:
| Eigen version | General algorithm | Hand-coded algorithm |
| 3.2.7 | 0.10s | 0.04s |
| 3.3-beta1 | 0.21s | 0.15s |
I am attaching a minimal test case for reference. The bottleneck lies
on the function InertiaTensor::addFace(). The data from the table were
computed with the compilation flags "-O3 -DNDEBUG". Eigen 3.3-beta1
reports its version as "3.2.92"
--=-=-=
Content-Type: text/x-c++src; charset=utf-8
Content-Disposition: attachment; filename=main.cpp
Content-Transfer-Encoding: quoted-printable
#include <iostream>
#include <ctime>
#include <chrono>
#include <Eigen/Core>
#include <Eigen/Geometry>
// @InProceedings{Euler2006,
// Title =3D {Fast Computation Of Inertia Through
// Affinely Extended Euler And Tensor},
// Author =3D {DiCarlo, Antonio and Paoluzzi, Alberto},
// Year =3D {2006}}
template<class REAL, class INT>
class InertiaTensor
{
public:
// --------------------------------------------------------------------=
--------------------- Constructor
/// Constructor without parameters.
InertiaTensor()
: dens_(0.0)
{
Eplus << 2 , 1 , 1 , 5,
1 , 2 , 1 , 5,
1 , 1 , 2 , 5,
5 , 5 , 5 , 20; // 120 * E+
}
// --------------------------------------------------------------------=
----------------------- Accessors
/// Set the density of the volume.
/// \param dens Density of the volume
inline void setDensity(const REAL dens){ dens_ =3D dens; };
/// Returns the mass of the volume
/// \return The volume mass, a scalar
inline REAL getMass() { return mass_; };
/// Returns the position of the centre of mass of the volume (defined i=
n the same coordinates as the mesh)
/// \return A 3x1 array
inline REAL* getrG() { return rG_; };
/// Returns the inertia tensor of the volume about its centre of mass (=
defined in the same coordinates as the mesh)
/// \return A 3x3 array
inline REAL* getInertiaTensor() { return J_; };
// --------------------------------------------------------------------=
----------------------- Begin Vol
/// Starts the evaluation of the volumetric properties of a new volume.
/// Clears the integrals, mass, centre of mass position, and inertia te=
nsor.
void beginVolume()
{
InertiaTensor4x4.setZero();
}
// --------------------------------------------------------------------=
------------------------- End Vol
/// Finalizes the volume integration and evaluates the physical propert=
ies.
void endVolume()
{
computeMassProperties();
}
// --------------------------------------------------------------------=
------------------------- AddFace
/// Adds a new face to the integration process.
/// \param vertex_data A 3xnum_vertices array with the x, y, and z mes=
h coordinates of the face vertices.
/// \param normal_data A 3x1 array with the x, y, and z mesh coordinat=
es of the face normal. May be set to NULL if the face is a triangle.
/// \param num_vertices Number of vertices in the face.
void addFace(const REAL* vertex_data, const REAL* normal_data, const IN=
T num_vertices)
{
// Check that we have at least 2 vertices per face
assert(num_vertices =3D=3D 3);
Eigen::Map<const Eigen::Matrix<REAL, 3, 3>> v(vertex_data);
Eigen::Matrix<REAL, 4, 4> G;
// G =3D [v0 v1 v2 0]
// [ 0 0 0 1]
G.col(0).head(3) =3D v.col(0);
G.col(1).head(3) =3D v.col(1);
G.col(2).head(3) =3D v.col(2);
G.col(3).setZero();
G.row(3).setZero();
G(3, 3) =3D 1.0;
Eigen::Matrix<REAL, 4, 4> temp;
Eigen::Vector4d w =3D G.template leftCols<3>().rowwise().sum();
temp << G.template block<3, 3>(0, 0) * G.template block<3, 3>(0, 0).transp=
ose() + w.template head<3>() * w.transpose().template head<3>(), 5 * w.temp=
late head<3>(),
5*w.transpose().template head<3>(), 20;
// I4x4 =3D det(G) G E+ G=E1=B5=80
InertiaTensor4x4.noalias() +=3D temp * (G.template block<3,3>(0,0).determ=
inant() / 120.0);
// Initial implementation
// InertiaTensor4x4.noalias() +=3D (G.template block<3,3>(0,0).determinant=
() / 120.0) * G * Eplus * G.transpose();
}
private:
REAL dens_; // Density
REAL mass_; // Mass of the volume
REAL rG_[3]; // Position of the centre of mass
REAL J_[9]; // Inertia tensor
Eigen::Matrix<REAL, 4, 4> Eplus, InertiaTensor4x4;
// --------------------------------------------------------------------=
--------- Compute mass properties
void computeMassProperties()
{
// Evaluate mass
mass_ =3D InertiaTensor4x4(3, 3) * dens_;
}
};
template<class F>
void massProps(const char *object, const char* method, unsigned int niters,=
double dens, double *mass1, double *J1, double *rg1, int nf, const F &feva=
l) {
std::cout << "--------------------------------------------------" << std:=
:endl;
std::cout << object << ": " << nf << " faces." << std::endl;
std::cout << "--------------------------------------------------" << std:=
:endl;
std::chrono::high_resolution_clock::time_point t1 =3D std::chrono::high_r=
esolution_clock::now();
for (unsigned int iters =3D 0; iters < niters; iters++)
{
feval();
}
std::chrono::high_resolution_clock::time_point t2 =3D std::chrono::high_r=
esolution_clock::now();
std::chrono::duration<double> time_span1 =3D std::chrono::duration_cast<s=
td::chrono::duration<double>>(t2 - t1);
std::cout << object << " - " << method << std::endl;
std::cout << "Elapsed time: " << time_span1.count() << " seconds." << std=
::endl;
std::cout << "Total mass: " << *mass1 << std::endl;
std::cout << std::endl;
}
template <class DT>
int computeMassPropertiesIT(DT *v, int *i, int nf, const double dens,=20
double *mass, double *r, double *J)
{
InertiaTensor<double, int> vp;
vp.setDensity(dens);
double faceverts[9];
Eigen::Map<Eigen::Matrix<DT, 3 , Eigen::Dynamic>> verts(v, 3, *std::max=
_element(i, i + nf) + 1);
vp.beginVolume();
for (int face =3D 0; face < nf; face++)
{
Eigen::Map<Eigen::Matrix3d> points(faceverts);
points.col(0) =3D verts.col(i[face * 3 + 0]).template cast<double>();
points.col(1) =3D verts.col(i[face * 3 + 1]).template cast<double>();
points.col(2) =3D verts.col(i[face * 3 + 2]).template cast<double>();
vp.addFace(faceverts, nullptr, 3);
}
vp.endVolume();
*mass =3D vp.getMass();
Eigen::Map<Eigen::Vector3d>(r, 3, 1) =3D Eigen::Map<Eigen::Vector3d>(vp=
..getrG(), 3, 1);
Eigen::Map<Eigen::Matrix3d>(J, 3, 3) =3D Eigen::Map<Eigen::Matrix3d>(vp=
..getInertiaTensor(), 3, 3);
}
int main(int argc, char *argv[])
{
// Definition of a tetrahedron ___________________________________________=
_________________
// Number of faces
int nf =3D 4;
// Array of normals
double n[12] =3D {
1.0 / sqrt(3.0), 1.0 / sqrt(3.0), 1.0 / sqrt(3.0),
0.0, -1.0, 0.0,
0.0, 0.0, -1.0,
-1.0, 0.0, 0.0,
};
// Array of vertices (x, y, z coordinates of each vertex)
double v[12] =3D {
0.0, 0.0, 0.0,
1.0, 0.0, 0.0,
0.0, 1.0, 0.0,
0.0, 0.0, 1.0
};
// Array of vertex indices (vertices in each face)
int i[12] =3D {
1, 2, 3,
0, 1, 3,=20
0, 2, 1,=20
0, 3, 2
};
// Evaluation of mass properties _________________________________________=
_________________
double rg1[3], J1[9];
double mass1;
const double dens =3D 7780.0;
const unsigned int niters_pyramid =3D 1000000;
std::cout << "Using Eigen version: " << EIGEN_WORLD_VERSION << "." << EIGE=
N_MAJOR_VERSION << "." << EIGEN_MINOR_VERSION << "\n";
massProps("PYRAMID", "DICARLO", niters_pyramid, dens, &mass1, J1, rg1, nf,
[&](){ computeMassPropertiesIT(v, i, nf, dens, &mass1, rg1, J1);});
return 0;
}
--=-=-=
Content-Type: text/plain
Content-Disposition: inline
Regards,
Alberto
--=-=-=--