Skip to content

Commit 2188e28

Browse files
authored
Merge pull request #1578 from Idclip/float_to_half_ax
Fixed a crash in AX when target does not have f16c support
2 parents b865fbd + c9c2528 commit 2188e28

File tree

7 files changed

+160
-11
lines changed

7 files changed

+160
-11
lines changed

openvdb_ax/openvdb_ax/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,7 @@ set(OPENVDB_AX_LIBRARY_SOURCE_FILES
158158
compiler/PointExecutable.cc
159159
compiler/VolumeExecutable.cc
160160
math/OpenSimplexNoise.cc
161+
util/x86.cc
161162
)
162163

163164
if(OPENVDB_BUILD_AX_GRAMMAR)
@@ -194,6 +195,7 @@ set(OPENVDB_AX_CODEGEN_INCLUDE_FILES
194195
codegen/Utils.h
195196
codegen/VolumeComputeGenerator.h
196197
math/OpenSimplexNoise.h
198+
util/x86.h
197199
)
198200

199201
set(OPENVDB_AX_COMPILER_INCLUDE_FILES

openvdb_ax/openvdb_ax/codegen/Codecs.cc

Lines changed: 42 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#include "openvdb_ax/codegen/FunctionTypes.h"
1010
#include "openvdb_ax/codegen/Types.h"
1111
#include "openvdb_ax/codegen/Utils.h"
12+
#include "openvdb_ax/util/x86.h"
1213

1314
namespace openvdb {
1415
OPENVDB_USE_VERSION_NAMESPACE
@@ -324,24 +325,55 @@ const CodecTypeMap& getCodecTypeMap()
324325
std::make_unique<Codec>(axprfxpt16encode(), axprfxpt16decode(), 1<<4),
325326
};
326327

327-
static CodecTypeMap map {
328-
{
329-
ast::tokens::FLOAT,
330-
{
328+
// If on X86, see if the hardware supports f16c. For other platforms we
329+
// currently assume hardware support for half/float conversion. This only
330+
// applies to the truncate codec.
331+
// @todo Add software support. Will be simpler with AX function support.
332+
static bool HasF16C =
333+
ax::x86::CheckX86Feature("f16c") != ax::x86::CpuFlagStatus::Unsupported;
334+
335+
static auto GetFloatCodecs = []() -> CodecNameMap {
336+
if (HasF16C) {
337+
return {
331338
{ points::TruncateCodec::name(), codecs[0].get() },
332339
{ points::FixedPointCodec<true, points::UnitRange>::name(), codecs[1].get() },
333340
{ points::FixedPointCodec<false, points::UnitRange>::name(), codecs[2].get() }
334-
}
335-
},
336-
{
337-
ast::tokens::VEC3F,
338-
{
341+
};
342+
}
343+
else {
344+
return {
345+
{ points::FixedPointCodec<true, points::UnitRange>::name(), codecs[1].get() },
346+
{ points::FixedPointCodec<false, points::UnitRange>::name(), codecs[2].get() }
347+
};
348+
}
349+
};
350+
351+
static auto GetVectorCodecs = []() -> CodecNameMap {
352+
if (HasF16C) {
353+
return {
339354
{ points::TruncateCodec::name(), codecs[0].get() },
340355
{ points::FixedPointCodec<true, points::UnitRange>::name(), codecs[1].get() },
341356
{ points::FixedPointCodec<false, points::UnitRange>::name(), codecs[2].get() },
342357
{ points::FixedPointCodec<true, points::PositionRange>::name(), codecs[3].get() },
343358
{ points::FixedPointCodec<false, points::PositionRange>::name(), codecs[4].get() }
344-
}
359+
};
360+
}
361+
else {
362+
return {
363+
{ points::FixedPointCodec<true, points::UnitRange>::name(), codecs[1].get() },
364+
{ points::FixedPointCodec<false, points::UnitRange>::name(), codecs[2].get() },
365+
{ points::FixedPointCodec<true, points::PositionRange>::name(), codecs[3].get() },
366+
{ points::FixedPointCodec<false, points::PositionRange>::name(), codecs[4].get() }
367+
};
368+
}
369+
};
370+
371+
static CodecTypeMap map {
372+
{
373+
ast::tokens::FLOAT, GetFloatCodecs()
374+
},
375+
{
376+
ast::tokens::VEC3F, GetVectorCodecs()
345377
},
346378
};
347379

openvdb_ax/openvdb_ax/test/backend/TestCodecs.cc

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66
#include <openvdb_ax/codegen/Types.h>
77
#include <openvdb_ax/codegen/Codecs.h>
8+
#include <openvdb_ax/util/x86.h>
89

910
#include <openvdb/points/AttributeArray.h> // for native codec types
1011

@@ -75,8 +76,14 @@ void TestCodecs::testRegisteredCodecs()
7576
}
7677
}
7778

79+
size_t count = 5;
80+
#if defined(__i386__) || defined(_M_IX86) || \
81+
defined(__x86_64__) || defined(_M_X64)
82+
if (x86::CheckX86Feature("f16c") == x86::CpuFlagStatus::Unsupported) count = 4;
83+
#endif
84+
7885
// currently only 5 codecs are registered by default
79-
CPPUNIT_ASSERT_EQUAL(codecs.size(), size_t(5));
86+
CPPUNIT_ASSERT_EQUAL(codecs.size(), count);
8087

8188
// for each codec, check:
8289
// make sure the codecs flags are unique
@@ -176,6 +183,11 @@ void TestCodecs::testRegisteredCodecs()
176183

177184
void TestCodecs::testTruncateCodec()
178185
{
186+
#if defined(__i386__) || defined(_M_IX86) || \
187+
defined(__x86_64__) || defined(_M_X64)
188+
if (x86::CheckX86Feature("f16c") == x86::CpuFlagStatus::Unsupported) return;
189+
#endif
190+
179191
unittest_util::LLVMState state;
180192
llvm::LLVMContext& C = state.context();
181193
llvm::Module& M = state.module();

openvdb_ax/openvdb_ax/test/compiler/TestPointExecutable.cc

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
#include <openvdb_ax/compiler/Compiler.h>
55
#include <openvdb_ax/compiler/PointExecutable.h>
6+
#include <openvdb_ax/util/x86.h>
67

78
#include <openvdb/points/PointDataGrid.h>
89
#include <openvdb/points/PointConversion.h>
@@ -586,7 +587,20 @@ TestPointExecutable::testAttributeCodecs()
586587
"if (v@P.x > 0.5) { v@vnu[0] = 7.135e-7f; v@vnu[1] = 200000.0f; v@vnu[2] = -5e-3f; }"
587588
"else { v@vnu[0] = -1.0f; v@vnu[1] = 80123.14f; v@vnu[2] = 9019.53123f; }");
588589

590+
#if defined(__i386__) || defined(_M_IX86) || \
591+
defined(__x86_64__) || defined(_M_X64)
592+
if (openvdb::ax::x86::CheckX86Feature("f16c") ==
593+
openvdb::ax::x86::CpuFlagStatus::Unsupported)
594+
{
595+
CPPUNIT_ASSERT(!executable->usesAcceleratedKernel(points->tree()));
596+
}
597+
else {
589598
CPPUNIT_ASSERT(executable->usesAcceleratedKernel(points->tree()));
599+
}
600+
#else
601+
CPPUNIT_ASSERT(executable->usesAcceleratedKernel(points->tree()));
602+
#endif
603+
590604
CPPUNIT_ASSERT_NO_THROW(executable->execute(*points));
591605

592606
CPPUNIT_ASSERT_EQUAL(3.245e-7f, handle0.get(0));
@@ -727,7 +741,20 @@ TestPointExecutable::testAttributeCodecs()
727741
"v@P.y -= 1.0f;"
728742
"v@P.z += 2.0f;");
729743

744+
#if defined(__i386__) || defined(_M_IX86) || \
745+
defined(__x86_64__) || defined(_M_X64)
746+
if (openvdb::ax::x86::CheckX86Feature("f16c") ==
747+
openvdb::ax::x86::CpuFlagStatus::Unsupported)
748+
{
749+
CPPUNIT_ASSERT(!executable->usesAcceleratedKernel(points->tree()));
750+
}
751+
else {
730752
CPPUNIT_ASSERT(executable->usesAcceleratedKernel(points->tree()));
753+
}
754+
#else
755+
CPPUNIT_ASSERT(executable->usesAcceleratedKernel(points->tree()));
756+
#endif
757+
731758
CPPUNIT_ASSERT_NO_THROW(executable->execute(*points));
732759

733760
const auto leafIter = points->tree().cbeginLeaf();

openvdb_ax/openvdb_ax/util/x86.cc

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
// Copyright Contributors to the OpenVDB Project
2+
// SPDX-License-Identifier: MPL-2.0
3+
4+
/// @file util/x86.cc
5+
6+
#include "x86.h"
7+
8+
#include <llvm/Support/Host.h>
9+
#include <llvm/ADT/StringMap.h>
10+
11+
namespace openvdb {
12+
OPENVDB_USE_VERSION_NAMESPACE
13+
namespace OPENVDB_VERSION_NAME {
14+
namespace ax {
15+
namespace x86 {
16+
17+
CpuFlagStatus CheckX86Feature(const std::string& flag)
18+
{
19+
llvm::StringMap<bool> HostFeatures;
20+
if (!llvm::sys::getHostCPUFeatures(HostFeatures)) {
21+
return CpuFlagStatus::Unknown;
22+
}
23+
if (!HostFeatures.empty()) {
24+
for (auto& feature : HostFeatures) {
25+
if (feature.first() == flag) {
26+
return feature.second ?
27+
CpuFlagStatus::Supported :
28+
CpuFlagStatus::Unsupported;
29+
}
30+
}
31+
}
32+
return CpuFlagStatus::Unknown;
33+
}
34+
35+
}
36+
}
37+
}
38+
}

openvdb_ax/openvdb_ax/util/x86.h

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
// Copyright Contributors to the OpenVDB Project
2+
// SPDX-License-Identifier: MPL-2.0
3+
4+
/// @file util/x86.h
5+
6+
#ifndef OPENVDB_AX_UTIL_X86_HAS_BEEN_INCLUDED
7+
#define OPENVDB_AX_UTIL_X86_HAS_BEEN_INCLUDED
8+
9+
#include <openvdb/version.h>
10+
#include <string>
11+
12+
namespace openvdb {
13+
OPENVDB_USE_VERSION_NAMESPACE
14+
namespace OPENVDB_VERSION_NAME {
15+
namespace ax {
16+
namespace x86 {
17+
18+
enum class CpuFlagStatus {
19+
Unknown, Unsupported, Supported
20+
};
21+
22+
/// @brief On X86, get the status if a particular CPU instruction
23+
/// @param flag The flag to check. e.g. avx, bmi, f16c, etc
24+
/// @note Returns Unknown if the flag was not found. This could either be
25+
/// because the platform is not X86, because the flag is not a valid X86
26+
/// feature or because the feature is too new for this version of AX/LLVM.
27+
OPENVDB_AX_API CpuFlagStatus CheckX86Feature(const std::string& flag);
28+
29+
}
30+
}
31+
}
32+
}
33+
34+
#endif // OPENVDB_AX_UTIL_X86_HAS_BEEN_INCLUDED

pendingchanges/ax_f16c.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
AX:
2+
- Bug Fix:
3+
Fixed a bug in AX on older X86 hardware which could cause a crash when
4+
accessing point attributes with half compression (bug introduced in 9.1.0).

0 commit comments

Comments
 (0)