[eigen] Re: Add EIGEN_HAS_INTRINSIC_INT128 macro |
[ Thread Index |
Date Index
| More lists.tuxfamily.org/eigen Archives
]
- To: eigen@xxxxxxxxxxxxxxxxxxx
- Subject: [eigen] Re: Add EIGEN_HAS_INTRINSIC_INT128 macro
- From: Sam Hasinoff <hasinoff@xxxxxxxxxx>
- Date: Wed, 6 Nov 2019 14:10:22 -0800
- Dkim-signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=20161025; h=mime-version:references:in-reply-to:from:date:message-id:subject:to; bh=7gzzbkfEnI4IiZYthk/7uSlvPIn5n6hs8HmRpsYocno=; b=fJ8gHz8V0Hn62oN82VjxF7N+9PTxn47UWwkZJBw9yAYc94OWXDmTpmENBAOwsI1IoL dPVJMtlx8fmveHo3AHO7oyeyYdZ/Ogzuyy3ZEuvjNKpwx8TtK4MeNBBzKfgn2pHoXYpZ uvLEXV5hBvnqMAe3Zi2p3Yv/4DD22vP68v18+o1fdoDoB50fbjxKTDB+faJIYYJrLNlQ FFYlOdj3RobAUmRamJsUc+b7LVdVmxwYCaY5tELZl2vJgFJRvQ/dwU9EJviYnvZEsekO aD3zoJmk0M1bt7h0JMTJe6B8aEpzjdI1hL62twAAFawp7ITa/OU4ByQMMD/DiRSjm3qL Lclw==
Attaching an updated patch, incorporating comments from the PR.
I still can't figure out how to merge and upload from hg/sourcetree, maybe some issue with my personal clone of the repository.
Attaching a small patch:
Add a new EIGEN_HAS_INTRINSIC_INT128 macro, and use this instead of
__SIZEOF_INT128__. This fixes related issues with TensorIntDiv.h when
building with Clang for Windows, where support for 128-bit integer
arithmetic is advertised but broken in practice.
I'm new to bitbucket and hg, and I couldn't figure out how to resolve the merge conflict in my related pull request manually:
or how to push the attached patch using hg CLI tools.
Thanks in advance,
Sam
# HG changeset patch
# User Sam Hasinoff <hasinoff@xxxxxxxxxx>
# Date 1573077159 28800
# Wed Nov 06 13:52:39 2019 -0800
# Node ID 36e3e863e06d5fc805cace990b86acfd1959e3d4
# Parent afc120bc03bdc4265858d6f86218eb1fed51b1b9
Add EIGEN_HAS_INTRINSIC_INT128 macro
Add a new EIGEN_HAS_INTRINSIC_INT128 macro, and use this instead of
__SIZEOF_INT128__. This fixes related issues with TensorIntDiv.h
when building with Clang for Windows, where support for 128-bit
integer arithmetic is advertised but broken in practice.
diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h
--- a/Eigen/src/Core/util/Macros.h
+++ b/Eigen/src/Core/util/Macros.h
@@ -752,16 +752,32 @@
#define EIGEN_CONSTEXPR_ARE_DEVICE_FUNC
#endif
#elif defined(__clang__) && defined(__CUDA__) && __has_feature(cxx_relaxed_constexpr)
// clang++ always considers constexpr functions as implicitly __host__ __device__
#define EIGEN_CONSTEXPR_ARE_DEVICE_FUNC
#endif
#endif
+// Does the compiler support the __int128 and __uint128_t extensions for 128-bit
+// integer arithmetic?
+//
+// Clang and GCC define __SIZEOF_INT128__ when these extensions are supported,
+// but we avoid using them in certain cases:
+//
+// * Building using Clang for Windows, where the Clang runtime library has
+// 128-bit support only on LP64 architectures, but Windows is LLP64.
+#ifndef EIGEN_HAS_BUILTIN_INT128
+#if defined(__SIZEOF_INT128__) && !(EIGEN_OS_WIN && EIGEN_COMP_CLANG)
+#define EIGEN_HAS_BUILTIN_INT128 1
+#else
+#define EIGEN_HAS_BUILTIN_INT128 0
+#endif
+#endif
+
//------------------------------------------------------------------------------------------
// Preprocessor programming helpers
//------------------------------------------------------------------------------------------
// This macro can be used to prevent from macro expansion, e.g.:
// std::max EIGEN_NOT_A_MACRO(a,b)
#define EIGEN_NOT_A_MACRO
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h b/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h
@@ -100,17 +100,17 @@ namespace {
}
template <typename T>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE uint64_t muluh(const uint64_t a, const T b) {
#if defined(EIGEN_GPU_COMPILE_PHASE)
return __umul64hi(a, b);
#elif defined(SYCL_DEVICE_ONLY)
return cl::sycl::mul_hi(a, static_cast<uint64_t>(b));
-#elif defined(__SIZEOF_INT128__)
+#elif EIGEN_HAS_BUILTIN_INT128
__uint128_t v = static_cast<__uint128_t>(a) * static_cast<__uint128_t>(b);
return static_cast<uint64_t>(v >> 64);
#else
return (TensorUInt128<static_val<0>, uint64_t>(a) * TensorUInt128<static_val<0>, uint64_t>(b)).upper();
#endif
}
template <int N, typename T>
@@ -119,17 +119,17 @@ namespace {
EIGEN_STATIC_ASSERT(N == 32, YOU_MADE_A_PROGRAMMING_MISTAKE);
return static_cast<uint32_t>((static_cast<uint64_t>(1) << (N+log_div)) / divider - (static_cast<uint64_t>(1) << N) + 1);
}
};
template <typename T>
struct DividerHelper<64, T> {
static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE uint64_t computeMultiplier(const int log_div, const T divider) {
-#if defined(__SIZEOF_INT128__) && !defined(EIGEN_GPU_COMPILE_PHASE) && !defined(SYCL_DEVICE_ONLY)
+#if EIGEN_HAS_BUILTIN_INT128 && !defined(EIGEN_GPU_COMPILE_PHASE) && !defined(SYCL_DEVICE_ONLY)
return static_cast<uint64_t>((static_cast<__uint128_t>(1) << (64+log_div)) / static_cast<__uint128_t>(divider) - (static_cast<__uint128_t>(1) << 64) + 1);
#else
const uint64_t shift = 1ULL << log_div;
TensorUInt128<uint64_t, uint64_t> result = TensorUInt128<uint64_t, static_val<0> >(shift, 0) / TensorUInt128<static_val<0>, uint64_t>(divider)
- TensorUInt128<static_val<1>, static_val<0> >(1, 0)
+ TensorUInt128<static_val<0>, static_val<1> >(1);
return static_cast<uint64_t>(result);
#endif