Merge pull request #1578 from Idclip/float_to_half_ax

Idclip · web-flow · commit 2188e2829d9c · 2023-02-03T15:45:08.000Z
Fixed a crash in AX when target does not have f16c support
diff --git a/openvdb_ax/openvdb_ax/CMakeLists.txt b/openvdb_ax/openvdb_ax/CMakeLists.txt
@@ -158,6 +158,7 @@ set(OPENVDB_AX_LIBRARY_SOURCE_FILES
   compiler/PointExecutable.cc
   compiler/VolumeExecutable.cc
   math/OpenSimplexNoise.cc
+  util/x86.cc
 )
 
 if(OPENVDB_BUILD_AX_GRAMMAR)
@@ -194,6 +195,7 @@ set(OPENVDB_AX_CODEGEN_INCLUDE_FILES
   codegen/Utils.h
   codegen/VolumeComputeGenerator.h
   math/OpenSimplexNoise.h
+  util/x86.h
 )
 
 set(OPENVDB_AX_COMPILER_INCLUDE_FILES
diff --git a/openvdb_ax/openvdb_ax/codegen/Codecs.cc b/openvdb_ax/openvdb_ax/codegen/Codecs.cc
@@ -9,6 +9,7 @@
 #include "openvdb_ax/codegen/FunctionTypes.h"
 #include "openvdb_ax/codegen/Types.h"
 #include "openvdb_ax/codegen/Utils.h"
+#include "openvdb_ax/util/x86.h"
 
 namespace openvdb {
 OPENVDB_USE_VERSION_NAMESPACE
@@ -324,24 +325,55 @@ const CodecTypeMap& getCodecTypeMap()
         std::make_unique<Codec>(axprfxpt16encode(), axprfxpt16decode(), 1<<4),
     };
 
-    static CodecTypeMap map {
-        {
-            ast::tokens::FLOAT,
-            {
+    // If on X86, see if the hardware supports f16c. For other platforms we
+    // currently assume hardware support for half/float conversion. This only
+    // applies to the truncate codec.
+    // @todo  Add software support. Will be simpler with AX function support.
+    static bool HasF16C =
+        ax::x86::CheckX86Feature("f16c") != ax::x86::CpuFlagStatus::Unsupported;
+
+    static auto GetFloatCodecs = []() -> CodecNameMap {
+        if (HasF16C) {
+            return {
                 { points::TruncateCodec::name(),                             codecs[0].get() },
                 { points::FixedPointCodec<true, points::UnitRange>::name(),  codecs[1].get() },
                 { points::FixedPointCodec<false, points::UnitRange>::name(), codecs[2].get() }
-            }
-        },
-        {
-            ast::tokens::VEC3F,
-            {
+            };
+        }
+        else {
+            return {
+                { points::FixedPointCodec<true, points::UnitRange>::name(),  codecs[1].get() },
+                { points::FixedPointCodec<false, points::UnitRange>::name(), codecs[2].get() }
+            };
+        }
+    };
+
+    static auto GetVectorCodecs = []() -> CodecNameMap {
+        if (HasF16C) {
+            return {
                 { points::TruncateCodec::name(),                                 codecs[0].get() },
                 { points::FixedPointCodec<true, points::UnitRange>::name(),      codecs[1].get() },
                 { points::FixedPointCodec<false, points::UnitRange>::name(),     codecs[2].get() },
                 { points::FixedPointCodec<true, points::PositionRange>::name(),  codecs[3].get() },
                 { points::FixedPointCodec<false, points::PositionRange>::name(), codecs[4].get() }
-            }
+            };
+        }
+        else {
+            return {
+                { points::FixedPointCodec<true, points::UnitRange>::name(),      codecs[1].get() },
+                { points::FixedPointCodec<false, points::UnitRange>::name(),     codecs[2].get() },
+                { points::FixedPointCodec<true, points::PositionRange>::name(),  codecs[3].get() },
+                { points::FixedPointCodec<false, points::PositionRange>::name(), codecs[4].get() }
+            };
+        }
+    };
+
+    static CodecTypeMap map {
+        {
+            ast::tokens::FLOAT, GetFloatCodecs()
+        },
+        {
+            ast::tokens::VEC3F, GetVectorCodecs()
         },
     };
 
diff --git a/openvdb_ax/openvdb_ax/test/backend/TestCodecs.cc b/openvdb_ax/openvdb_ax/test/backend/TestCodecs.cc
@@ -5,6 +5,7 @@
 
 #include <openvdb_ax/codegen/Types.h>
 #include <openvdb_ax/codegen/Codecs.h>
+#include <openvdb_ax/util/x86.h>
 
 #include <openvdb/points/AttributeArray.h> // for native codec types
 
@@ -75,8 +76,14 @@ void TestCodecs::testRegisteredCodecs()
         }
     }
 
+    size_t count = 5;
+#if defined(__i386__) || defined(_M_IX86) || \
+    defined(__x86_64__) || defined(_M_X64)
+    if (x86::CheckX86Feature("f16c") == x86::CpuFlagStatus::Unsupported) count = 4;
+#endif
+
     // currently only 5 codecs are registered by default
-    CPPUNIT_ASSERT_EQUAL(codecs.size(), size_t(5));
+    CPPUNIT_ASSERT_EQUAL(codecs.size(), count);
 
     //  for each codec, check:
     //    make sure the codecs flags are unique
@@ -176,6 +183,11 @@ void TestCodecs::testRegisteredCodecs()
 
 void TestCodecs::testTruncateCodec()
 {
+#if defined(__i386__) || defined(_M_IX86) || \
+    defined(__x86_64__) || defined(_M_X64)
+    if (x86::CheckX86Feature("f16c") == x86::CpuFlagStatus::Unsupported) return;
+#endif
+
     unittest_util::LLVMState state;
     llvm::LLVMContext& C = state.context();
     llvm::Module& M = state.module();
diff --git a/openvdb_ax/openvdb_ax/test/compiler/TestPointExecutable.cc b/openvdb_ax/openvdb_ax/test/compiler/TestPointExecutable.cc
@@ -3,6 +3,7 @@
 
 #include <openvdb_ax/compiler/Compiler.h>
 #include <openvdb_ax/compiler/PointExecutable.h>
+#include <openvdb_ax/util/x86.h>
 
 #include <openvdb/points/PointDataGrid.h>
 #include <openvdb/points/PointConversion.h>
@@ -586,7 +587,20 @@ TestPointExecutable::testAttributeCodecs()
                  "if (v@P.x > 0.5) { v@vnu[0] = 7.135e-7f; v@vnu[1] = 200000.0f; v@vnu[2] = -5e-3f; }"
                  "else             { v@vnu[0] = -1.0f;     v@vnu[1] = 80123.14f; v@vnu[2] = 9019.53123f; }");
 
+#if defined(__i386__) || defined(_M_IX86) || \
+    defined(__x86_64__) || defined(_M_X64)
+    if (openvdb::ax::x86::CheckX86Feature("f16c") ==
+        openvdb::ax::x86::CpuFlagStatus::Unsupported)
+    {
+        CPPUNIT_ASSERT(!executable->usesAcceleratedKernel(points->tree()));
+    }
+    else {
         CPPUNIT_ASSERT(executable->usesAcceleratedKernel(points->tree()));
+    }
+#else
+        CPPUNIT_ASSERT(executable->usesAcceleratedKernel(points->tree()));
+#endif
+
         CPPUNIT_ASSERT_NO_THROW(executable->execute(*points));
 
         CPPUNIT_ASSERT_EQUAL(3.245e-7f, handle0.get(0));
@@ -727,7 +741,20 @@ TestPointExecutable::testAttributeCodecs()
                  "v@P.y -= 1.0f;"
                  "v@P.z += 2.0f;");
 
+#if defined(__i386__) || defined(_M_IX86) || \
+    defined(__x86_64__) || defined(_M_X64)
+    if (openvdb::ax::x86::CheckX86Feature("f16c") ==
+        openvdb::ax::x86::CpuFlagStatus::Unsupported)
+    {
+        CPPUNIT_ASSERT(!executable->usesAcceleratedKernel(points->tree()));
+    }
+    else {
         CPPUNIT_ASSERT(executable->usesAcceleratedKernel(points->tree()));
+    }
+#else
+        CPPUNIT_ASSERT(executable->usesAcceleratedKernel(points->tree()));
+#endif
+
         CPPUNIT_ASSERT_NO_THROW(executable->execute(*points));
 
         const auto leafIter = points->tree().cbeginLeaf();
diff --git a/openvdb_ax/openvdb_ax/util/x86.cc b/openvdb_ax/openvdb_ax/util/x86.cc
@@ -0,0 +1,38 @@
+// Copyright Contributors to the OpenVDB Project
+// SPDX-License-Identifier: MPL-2.0
+
+/// @file util/x86.cc
+
+#include "x86.h"
+
+#include <llvm/Support/Host.h>
+#include <llvm/ADT/StringMap.h>
+
+namespace openvdb {
+OPENVDB_USE_VERSION_NAMESPACE
+namespace OPENVDB_VERSION_NAME {
+namespace ax {
+namespace x86 {
+
+CpuFlagStatus CheckX86Feature(const std::string& flag)
+{
+    llvm::StringMap<bool> HostFeatures;
+    if (!llvm::sys::getHostCPUFeatures(HostFeatures)) {
+        return CpuFlagStatus::Unknown;
+    }
+    if (!HostFeatures.empty()) {
+        for (auto& feature : HostFeatures) {
+            if (feature.first() == flag) {
+                return feature.second ?
+                    CpuFlagStatus::Supported :
+                    CpuFlagStatus::Unsupported;
+            }
+        }
+    }
+    return CpuFlagStatus::Unknown;
+}
+
+}
+}
+}
+}
diff --git a/openvdb_ax/openvdb_ax/util/x86.h b/openvdb_ax/openvdb_ax/util/x86.h
@@ -0,0 +1,34 @@
+// Copyright Contributors to the OpenVDB Project
+// SPDX-License-Identifier: MPL-2.0
+
+/// @file util/x86.h
+
+#ifndef OPENVDB_AX_UTIL_X86_HAS_BEEN_INCLUDED
+#define OPENVDB_AX_UTIL_X86_HAS_BEEN_INCLUDED
+
+#include <openvdb/version.h>
+#include <string>
+
+namespace openvdb {
+OPENVDB_USE_VERSION_NAMESPACE
+namespace OPENVDB_VERSION_NAME {
+namespace ax {
+namespace x86 {
+
+enum class CpuFlagStatus {
+    Unknown, Unsupported, Supported
+};
+
+/// @brief  On X86, get the status if a particular CPU instruction
+/// @param flag  The flag to check. e.g. avx, bmi, f16c, etc
+/// @note   Returns Unknown if the flag was not found. This could either be
+///   because the platform is not X86, because the flag is not a valid X86
+///   feature or because the feature is too new for this version of AX/LLVM.
+OPENVDB_AX_API CpuFlagStatus CheckX86Feature(const std::string& flag);
+
+}
+}
+}
+}
+
+#endif // OPENVDB_AX_UTIL_X86_HAS_BEEN_INCLUDED
diff --git a/pendingchanges/ax_f16c.txt b/pendingchanges/ax_f16c.txt
@@ -0,0 +1,4 @@
+AX:
+- Bug Fix:
+    Fixed a bug in AX on older X86 hardware which could cause a crash when
+    accessing point attributes with half compression (bug introduced in 9.1.0).

Original file line number	Diff line number	Diff line change
`@@ -158,6 +158,7 @@ set(OPENVDB_AX_LIBRARY_SOURCE_FILES`
`158`	`158`	`compiler/PointExecutable.cc`
`159`	`159`	`compiler/VolumeExecutable.cc`
`160`	`160`	`math/OpenSimplexNoise.cc`
	`161`	`+ util/x86.cc`
`161`	`162`	`)`
`162`	`163`
`163`	`164`	`if(OPENVDB_BUILD_AX_GRAMMAR)`
`@@ -194,6 +195,7 @@ set(OPENVDB_AX_CODEGEN_INCLUDE_FILES`
`194`	`195`	`codegen/Utils.h`
`195`	`196`	`codegen/VolumeComputeGenerator.h`
`196`	`197`	`math/OpenSimplexNoise.h`
	`198`	`+ util/x86.h`
`197`	`199`	`)`
`198`	`200`
`199`	`201`	`set(OPENVDB_AX_COMPILER_INCLUDE_FILES`