diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 00efbd515..9e7646da8 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -79,7 +79,7 @@ jobs: - name: Create Build Directory run: cmake -E make_directory ${{ github.workspace }}/build ${{ github.workspace }}/packages - name: Generating Build Scripts - run: cmake -DCMAKE_OSX_ARCHITECTURES=${{ matrix.arch }} -DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_INSTALL_PREFIX=${{ github.workspace }} -DCPACK_PACKAGE_DIRECTORY=${{ github.workspace }}/packages ${{ github.workspace }} + run: cmake -DCMAKE_OSX_ARCHITECTURES=${{ matrix.arch }} -DCMAKE_BUILD_TYPE=RelWithDebInfo -DLIBDDWAF_ENABLE_LTO=ON -DCMAKE_INSTALL_PREFIX=${{ github.workspace }} -DCPACK_PACKAGE_DIRECTORY=${{ github.workspace }}/packages ${{ github.workspace }} working-directory: ${{ github.workspace }}/build - name: Build Binaries run: cmake --build . --config RelWithDebInfo --verbose --target all --target waf_test -j $(getconf _NPROCESSORS_ONLN) diff --git a/cmake/objects.cmake b/cmake/objects.cmake index 421ed063d..38ec40915 100644 --- a/cmake/objects.cmake +++ b/cmake/objects.cmake @@ -21,6 +21,8 @@ set(LIBDDWAF_SOURCE ${libddwaf_SOURCE_DIR}/src/dynamic_string.cpp ${libddwaf_SOURCE_DIR}/src/attribute_collector.cpp ${libddwaf_SOURCE_DIR}/src/rule.cpp + ${libddwaf_SOURCE_DIR}/src/semver.cpp + ${libddwaf_SOURCE_DIR}/src/utils.cpp ${libddwaf_SOURCE_DIR}/src/utf8.cpp ${libddwaf_SOURCE_DIR}/src/builder/action_mapper_builder.cpp ${libddwaf_SOURCE_DIR}/src/builder/matcher_builder.cpp @@ -91,6 +93,7 @@ set(LIBDDWAF_SOURCE ${libddwaf_SOURCE_DIR}/src/transformer/css_decode.cpp ${libddwaf_SOURCE_DIR}/src/transformer/html_entity_decode.cpp ${libddwaf_SOURCE_DIR}/src/transformer/js_decode.cpp + ${libddwaf_SOURCE_DIR}/src/vendor/fmt/format.cc ${libddwaf_SOURCE_DIR}/src/vendor/radixlib/radixlib.c ${libddwaf_SOURCE_DIR}/src/vendor/lua-aho-corasick/ac_fast.cxx ${libddwaf_SOURCE_DIR}/src/vendor/lua-aho-corasick/ac_slow.cxx @@ -147,7 +150,7 @@ function(gen_objects target_name) target_include_directories(${target_name} PUBLIC ${LIBDDWAF_PUBLIC_INCLUDES}) target_include_directories(${target_name} PRIVATE ${LIBDDWAF_PRIVATE_INCLUDES}) - target_compile_definitions(${target_name} PRIVATE UTF8PROC_STATIC=1) + target_compile_definitions(${target_name} PRIVATE UTF8PROC_STATIC=1 FMT_OPTIMIZE_SIZE=2) if (MSVC) target_compile_definitions(${target_name} PRIVATE NOMINMAX) endif() diff --git a/cmake/shared.cmake b/cmake/shared.cmake index cf86991e3..42964059a 100644 --- a/cmake/shared.cmake +++ b/cmake/shared.cmake @@ -70,14 +70,4 @@ elseif (MSVC) PUBLIC ${LIBDDWAF_INTERFACE_LIBRARIES}) install(FILES $ DESTINATION lib OPTIONAL) -elseif (MINGW) - target_link_libraries(libddwaf_shared PUBLIC ${LIBDDWAF_INTERFACE_LIBRARIES}) - target_link_libraries(libddwaf_shared PRIVATE - $<$:-flto> - -Wl,--no-undefined - -Wl,-version-script=${libddwaf_SOURCE_DIR}/libddwaf.version - -Wl,--build-id=0x${BUILD_ID} - ${LIBDDWAF_PRIVATE_LIBRARIES} - -static-libstdc++ - glibc_compat_time64 glibc_compat_math) endif() diff --git a/src/object.hpp b/src/object.hpp index dbbcd6038..9895829c0 100644 --- a/src/object.hpp +++ b/src/object.hpp @@ -1136,13 +1136,13 @@ template <> struct object_converter { case object_type::small_string: return view.as(); case object_type::boolean: - return ddwaf::to_string(view.as()); + return ddwaf::to_string(view.as()); case object_type::uint64: - return ddwaf::to_string(view.as()); + return ddwaf::to_string(view.as()); case object_type::int64: - return ddwaf::to_string(view.as()); + return ddwaf::to_string(view.as()); case object_type::float64: - return ddwaf::to_string(view.as()); + return ddwaf::to_string(view.as()); default: break; } diff --git a/src/processor/fingerprint.cpp b/src/processor/fingerprint.cpp index 2407b0b8c..6169149a9 100644 --- a/src/processor/fingerprint.cpp +++ b/src/processor/fingerprint.cpp @@ -178,7 +178,7 @@ template struct unsigned_field : field_generator> { explicit unsigned_field(T input) : value(input) {} - [[nodiscard]] dynamic_string generate() { return ddwaf::to_string(value); } + [[nodiscard]] dynamic_string generate() { return ddwaf::to_string(value); } T value; }; @@ -564,7 +564,7 @@ owned_object http_header_fingerprint::eval_impl(const unary_argument & } std::sort(unknown_headers.begin(), unknown_headers.end()); - auto unknown_header_size = unknown_headers.size(); + const uint64_t unknown_header_size = unknown_headers.size(); owned_object res; try { res = generate_fragment("hdr", alloc, string_field{known_header_bitset}, @@ -611,7 +611,7 @@ owned_object http_network_fingerprint::eval_impl(const unary_argument } } - unsigned ip_count = 0; + uint64_t ip_count = 0; if (!chosen_header_value.empty()) { // For now, count commas ++ip_count; diff --git a/src/ruleset_info.hpp b/src/ruleset_info.hpp index 13e801b58..91df86757 100644 --- a/src/ruleset_info.hpp +++ b/src/ruleset_info.hpp @@ -19,7 +19,7 @@ namespace ddwaf { -inline std::string index_to_id(unsigned idx) { return "index:" + to_string(idx); } +inline std::string index_to_id(unsigned idx) { return "index:" + to_string(idx); } enum class ruleset_info_state : uint8_t { empty, invalid, valid }; diff --git a/src/semver.cpp b/src/semver.cpp new file mode 100644 index 000000000..f05d925a2 --- /dev/null +++ b/src/semver.cpp @@ -0,0 +1,67 @@ +// Unless explicitly stated otherwise all files in this repository are +// dual-licensed under the Apache-2.0 License or BSD-3-Clause License. +// +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2025 Datadog, Inc. + +#include +#include +#include +#include +#include + +#include "semver.hpp" +#include "utils.hpp" + +namespace ddwaf { + +semantic_version::semantic_version(std::string_view version) : str_(version) +{ + // The expected version string is: xxx.yyy.zzz[-label] + // We only try to extract xxx, yyy and zzz, while discarding the label. + // Each element can be 1 to 3 digits long, but no longer. + // Any deviation from this will be rejected. + + // Major + std::size_t start = 0; + auto end = version.find('.'); + if (end == std::string_view::npos) { + throw std::invalid_argument("invalid version syntax"); + } + auto major_str = version.substr(start, end - start); + if (major_str.empty() || major_str.size() > 3 || !parse_number(major_str, major_)) { + throw std::invalid_argument("invalid major version: " + std::string{major_str}); + } + + // Minor + start = end + 1; + end = version.find('.', start); + if (end == std::string_view::npos) { + throw std::invalid_argument("invalid version syntax"); + } + auto minor_str = version.substr(start, end - start); + if (minor_str.empty() || minor_str.size() > 3 || !parse_number(minor_str, minor_)) { + throw std::invalid_argument("invalid minor version: " + std::string{minor_str}); + } + + // Patch + start = end + 1; + end = version.find('-', start); + auto patch_str = version.substr(start, end - start); + if (patch_str.empty() || patch_str.size() > 3 || !parse_number(patch_str, patch_)) { + throw std::invalid_argument("invalid patch version: " + std::string{patch_str}); + } + + number_ = major_ * 1000000 + minor_ * 1000 + patch_; +} + +bool semantic_version::parse_number(std::string_view str, uint16_t &output) +{ + if (auto [res, value] = from_string(str); res) { + output = value; + return true; + } + return false; +} + +} // namespace ddwaf diff --git a/src/semver.hpp b/src/semver.hpp index 405197954..7224a1b53 100644 --- a/src/semver.hpp +++ b/src/semver.hpp @@ -6,58 +6,16 @@ #pragma once -#include #include #include -#include #include #include -#include "fmt/core.h" -#include "utils.hpp" - namespace ddwaf { class semantic_version { public: - explicit semantic_version(std::string_view version) : str_(version) - { - // The expected version string is: xxx.yyy.zzz[-label] - // We only try to extract xxx, yyy and zzz, while discarding the label. - // Each element can be 1 to 3 digits long, but no longer. - // Any deviation from this will be rejected. - - // Major - std::size_t start = 0; - auto end = version.find('.'); - if (end == std::string_view::npos) { - throw std::invalid_argument("invalid version syntax"); - } - auto major_str = version.substr(start, end - start); - if (major_str.empty() || major_str.size() > 3 || !parse_number(major_str, major_)) { - throw std::invalid_argument("invalid major version: " + std::string{major_str}); - } - - // Minor - start = end + 1; - end = version.find('.', start); - if (end == std::string_view::npos) { - throw std::invalid_argument("invalid version syntax"); - } - auto minor_str = version.substr(start, end - start); - if (minor_str.empty() || minor_str.size() > 3 || !parse_number(minor_str, minor_)) { - throw std::invalid_argument("invalid minor version: " + std::string{minor_str}); - } + explicit semantic_version(std::string_view version); - // Patch - start = end + 1; - end = version.find('-', start); - auto patch_str = version.substr(start, end - start); - if (patch_str.empty() || patch_str.size() > 3 || !parse_number(patch_str, patch_)) { - throw std::invalid_argument("invalid patch version: " + std::string{patch_str}); - } - - number_ = major_ * 1000000 + minor_ * 1000 + patch_; - } semantic_version(semantic_version &&other) = default; semantic_version &operator=(semantic_version &&other) noexcept = default; semantic_version(const semantic_version &other) = default; @@ -99,14 +57,7 @@ class semantic_version { : str_(str), major_(major), minor_(minor), patch_(patch), number_(number) {} - static bool parse_number(std::string_view str, uint16_t &output) - { - if (auto [res, value] = from_string(str); res) { - output = value; - return true; - } - return false; - } + static bool parse_number(std::string_view str, uint16_t &output); std::string str_; uint16_t major_{0}; diff --git a/src/utils.cpp b/src/utils.cpp new file mode 100644 index 000000000..5658de685 --- /dev/null +++ b/src/utils.cpp @@ -0,0 +1,113 @@ +// Unless explicitly stated otherwise all files in this repository are +// dual-licensed under the Apache-2.0 License or BSD-3-Clause License. +// +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2025 Datadog, Inc. + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "utils.hpp" + +namespace ddwaf { +namespace { + +template +concept has_from_chars = requires(T v) { std::from_chars(nullptr, nullptr, std::declval()); }; + +} // namespace + +std::vector split(std::string_view str, char sep) +{ + std::vector components; + + std::size_t start = 0; + while (start < str.size()) { + const std::size_t end = str.find(sep, start); + + if (end == start) { + // Ignore zero-sized strings + start = end + 1; + continue; + } + + if (end == std::string_view::npos) { + // Last element + components.emplace_back(str.substr(start)); + start = str.size(); + } else { + components.emplace_back(str.substr(start, end - start)); + start = end + 1; + } + } + + return components; +} + +std::vector> convert_key_path( + std::span> key_path) +{ + std::vector> result; + result.reserve(key_path.size()); + + for (const auto &key : key_path) { + std::visit([&result](auto &&k) { result.emplace_back(k); }, key); + } + return result; +} + +bool string_iequals(std::string_view left, std::string_view right) +{ + return left.size() == right.size() && + std::equal(left.begin(), left.end(), right.begin(), + [](char l, char r) { return tolower(l) == tolower(r); }); +} + +template std::string to_string(T value) { return ddwaf::fmt::format("{}", value); } + +template std::pair from_string(std::string_view str) +{ + T result; + if constexpr (has_from_chars) { + const auto *end = str.data() + str.size(); + auto [endConv, err] = std::from_chars(str.data(), end, result); + if (err == std::errc{} && endConv == end) { + return {true, result}; + } + } else { + // NOLINTNEXTLINE(misc-const-correctness) + std::istringstream iss(std::string{str}); + iss >> result; + if (!iss.fail() && iss.eof()) { + return {true, result}; + } + } + + return {false, {}}; +} + +template std::string to_string(bool value); +template std::string to_string(int64_t value); +template std::string to_string(uint64_t value); +template std::string to_string(unsigned value); +template std::string to_string(double value); + +template std::pair from_string(std::string_view str); +template std::pair from_string(std::string_view str); +template std::pair from_string(std::string_view str); +template std::pair from_string(std::string_view str); +template std::pair from_string(std::string_view str); + +} // namespace ddwaf diff --git a/src/utils.hpp b/src/utils.hpp index e838517a2..0b18e550c 100644 --- a/src/utils.hpp +++ b/src/utils.hpp @@ -8,21 +8,13 @@ #include #include -#include #include #include #include -#include -#include -#include #include -#include #include -#include #include #include -#include -#include #include #include #include @@ -74,137 +66,10 @@ template class defer { Fn fn_; }; -template -concept has_to_chars = requires(T v) { std::to_chars(nullptr, nullptr, std::declval()); }; +template std::string to_string(T value); +template std::pair from_string(std::string_view str); -template -concept has_from_chars = requires(T v) { std::from_chars(nullptr, nullptr, std::declval()); }; - -template -StringType to_string(T value) - requires std::is_integral_v && (!std::is_same_v) && - std::is_same_v> -{ - // Maximum number of characters required to represent a 64 bit integer as a string - // 20 bytes for UINT64_MAX or INT64_MIN - static constexpr size_t max_chars = 20; - - std::array str{}; - auto [ptr, ec] = std::to_chars(str.data(), str.data() + str.size(), value); - [[unlikely]] if (ec != std::errc()) { - return {}; - } - return {str.data(), ptr}; -} - -template - requires std::is_same_v || std::is_same_v -// XXX: add long double, though it's tricker, we don't know if it's quad-precision -// or x87 80-bit "extended precision" or even the same as double -inline constexpr std::size_t max_exp_digits = sizeof(T) == 4 ? 2 : 4; - -template -StringType to_string(T value) - requires(std::is_same_v || std::is_same_v) && - std::is_same_v> -{ - if constexpr (has_to_chars) { - static constexpr std::size_t max_chars = std::numeric_limits::digits10 + 1 + - 1 /* sign */ + 1 /* dot */ + 1 /* e */ + - 1 /* exp sign */ - + (sizeof(T) == 4 ? 2 : 4); - - std::array str{}; - auto [ptr, ec] = std::to_chars(str.data(), str.data() + str.size(), value); - [[unlikely]] if (ec != std::errc()) { - // This is likely unreachable if the max_chars calculation is accurate - return {}; - } - return {str.data(), ptr}; - } else { - using char_type = typename StringType::value_type; - using traits_type = typename StringType::traits_type; - using allocator_type = typename StringType::allocator_type; - std::basic_ostringstream ss; - ss << std::setprecision(std::numeric_limits::digits10) << value; - return std::move(ss).str(); - } -} - -template -StringType to_string(T value) - requires std::is_same_v && - std::is_same_v> -{ - return value ? "true" : "false"; -} - -template std::pair from_string(std::string_view str) -{ - T result; - if constexpr (has_from_chars) { - const auto *end = str.data() + str.size(); - auto [endConv, err] = std::from_chars(str.data(), end, result); - if (err == std::errc{} && endConv == end) { - return {true, result}; - } - } else { - // NOLINTNEXTLINE(misc-const-correctness) - std::istringstream iss(std::string{str}); - iss >> result; - if (!iss.fail() && iss.eof()) { - return {true, result}; - } - } - - return {false, {}}; -} - -inline std::vector split(std::string_view str, char sep) -{ - std::vector components; - - std::size_t start = 0; - while (start < str.size()) { - const std::size_t end = str.find(sep, start); - - if (end == start) { - // Ignore zero-sized strings - start = end + 1; - continue; - } - - if (end == std::string_view::npos) { - // Last element - components.emplace_back(str.substr(start)); - start = str.size(); - } else { - components.emplace_back(str.substr(start, end - start)); - start = end + 1; - } - } - - return components; -} - -// NOLINTNEXTLINE(fuchsia-multiple-inheritance) -class null_ostream : public std::ostream { -public: - null_ostream() = default; - ~null_ostream() override = default; - null_ostream(const null_ostream & /*unused*/) = delete; - null_ostream(null_ostream && /*unused*/) = delete; - null_ostream &operator=(const null_ostream & /*unused*/) = delete; - null_ostream &operator=(null_ostream && /*unused*/) = delete; -}; - -template const null_ostream &operator<<(null_ostream &os, const T & /*unused*/) -{ - return os; -} +std::vector split(std::string_view str, char sep); template // NOLINTNEXTLINE(modernize-avoid-c-arrays,readability-named-parameter) @@ -222,23 +87,9 @@ constexpr bool string_iequals_literal(std::string_view left, const char (&right) [](char l, char r) { return tolower(l) == r; }); } -inline bool string_iequals(std::string_view left, std::string_view right) -{ - return left.size() == right.size() && - std::equal(left.begin(), left.end(), right.begin(), - [](char l, char r) { return tolower(l) == tolower(r); }); -} - -inline std::vector> convert_key_path( - std::span> key_path) -{ - std::vector> result; - result.reserve(key_path.size()); +bool string_iequals(std::string_view left, std::string_view right); - for (const auto &key : key_path) { - std::visit([&result](auto &&k) { result.emplace_back(k); }, key); - } - return result; -} +std::vector> convert_key_path( + std::span> key_path); } // namespace ddwaf diff --git a/src/vendor/fmt/base.h b/src/vendor/fmt/base.h index 3c1200058..f9dfbd101 100644 --- a/src/vendor/fmt/base.h +++ b/src/vendor/fmt/base.h @@ -8,8 +8,6 @@ #ifndef FMT_BASE_H_ #define FMT_BASE_H_ -#define FMT_HEADER_ONLY - #if defined(FMT_IMPORT_STD) && !defined(FMT_MODULE) # define FMT_MODULE #endif @@ -354,7 +352,7 @@ template constexpr auto max_of(T a, T b) -> T { return a > b ? a : b; } -FMT_NORETURN FMT_API void assert_fail(const char* file, int line, +FMT_NORETURN void assert_fail(const char* file, int line, const char* message); namespace detail { @@ -384,7 +382,7 @@ template FMT_ALWAYS_INLINE constexpr auto const_check(T val) -> T { return val; } -FMT_NORETURN FMT_API void assert_fail(const char* file, int line, +FMT_NORETURN void assert_fail(const char* file, int line, const char* message); #if defined(FMT_ASSERT) @@ -663,7 +661,7 @@ struct formatter { /// Reports a format error at compile time or, via a `format_error` exception, /// at runtime. // This function is intentionally not constexpr to give a compile-time error. -FMT_NORETURN FMT_API void report_error(const char* message); +FMT_NORETURN void report_error(const char* message); enum class presentation_type : unsigned char { // Common specifiers: @@ -2451,11 +2449,11 @@ FMT_CONSTEXPR inline auto is_locking() -> bool { return locking::value || is_locking(); } -FMT_API void vformat_to(buffer& buf, string_view fmt, format_args args, +void vformat_to(buffer& buf, string_view fmt, format_args args, locale_ref loc = {}); #if FMT_WIN32 -FMT_API void vprint_mojibake(FILE*, string_view, format_args, bool); +void vprint_mojibake(FILE*, string_view, format_args, bool); #else // format_args is passed by reference since it is defined later. inline void vprint_mojibake(FILE*, string_view, const format_args&, bool) {} #endif @@ -2947,10 +2945,10 @@ FMT_NODISCARD FMT_INLINE auto formatted_size(format_string fmt, return buf.count(); } -FMT_API void vprint(string_view fmt, format_args args); -FMT_API void vprint(FILE* f, string_view fmt, format_args args); -FMT_API void vprintln(FILE* f, string_view fmt, format_args args); -FMT_API void vprint_buffered(FILE* f, string_view fmt, format_args args); +void vprint(string_view fmt, format_args args); +void vprint(FILE* f, string_view fmt, format_args args); +void vprintln(FILE* f, string_view fmt, format_args args); +void vprint_buffered(FILE* f, string_view fmt, format_args args); /** * Formats `args` according to specifications in `fmt` and writes the output diff --git a/src/vendor/fmt/format.cc b/src/vendor/fmt/format.cc new file mode 100644 index 000000000..526082e34 --- /dev/null +++ b/src/vendor/fmt/format.cc @@ -0,0 +1,43 @@ +// Formatting library for C++ +// +// Copyright (c) 2012 - 2016, Victor Zverovich +// All rights reserved. +// +// For the license information refer to format.h. + +#include "fmt/format-inl.h" + +FMT_BEGIN_NAMESPACE + +#if FMT_USE_LOCALE +template FMT_API locale_ref::locale_ref(const std::locale& loc); // DEPRECATED! +template FMT_API auto locale_ref::get() const -> std::locale; +#endif + +namespace detail { + +template FMT_API auto dragonbox::to_decimal(float x) noexcept + -> dragonbox::decimal_fp; +template FMT_API auto dragonbox::to_decimal(double x) noexcept + -> dragonbox::decimal_fp; + +// Explicit instantiations for char. + +template FMT_API auto thousands_sep_impl(locale_ref) + -> thousands_sep_result; +template FMT_API auto decimal_point_impl(locale_ref) -> char; + +// DEPRECATED! +template FMT_API void buffer::append(const char*, const char*); + +// Explicit instantiations for wchar_t. + +template FMT_API auto thousands_sep_impl(locale_ref) + -> thousands_sep_result; +template FMT_API auto decimal_point_impl(locale_ref) -> wchar_t; + +// DEPRECATED! +template FMT_API void buffer::append(const wchar_t*, const wchar_t*); + +} // namespace detail +FMT_END_NAMESPACE diff --git a/tests/unit/utils_test.cpp b/tests/unit/utils_test.cpp index 625ab516d..2864c6e93 100644 --- a/tests/unit/utils_test.cpp +++ b/tests/unit/utils_test.cpp @@ -175,6 +175,12 @@ TEST(TestUtils, Split) EXPECT_VEC(ddwaf::split("a,b,c,d,e,f,g", ','), "a", "b", "c", "d", "e", "f", "g"); } +TEST(TestUtils, SplitEmpty) +{ + auto result = ddwaf::split("", '|'); + EXPECT_TRUE(result.empty()); +} + TEST(TestUtils, ConvertKeyPath) { std::vector> input{"root", "key", 0, -1, "leaf"}; @@ -183,4 +189,11 @@ TEST(TestUtils, ConvertKeyPath) EXPECT_EQ(converted, expected); } +TEST(TestUtils, ConvertKeyPathEmpty) +{ + std::vector> input; + auto converted = convert_key_path(input); + EXPECT_TRUE(converted.empty()); +} + } // namespace