From 23c21e8b1376db44d791dd6b0956da5c47841712 Mon Sep 17 00:00:00 2001 From: Robert Kaye Date: Thu, 15 May 2025 19:02:12 +0200 Subject: [PATCH 1/3] Add range checking and add string function --- include/unidecode/unidecode.hpp | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/include/unidecode/unidecode.hpp b/include/unidecode/unidecode.hpp index 89cdf99..7f7fdb7 100644 --- a/include/unidecode/unidecode.hpp +++ b/include/unidecode/unidecode.hpp @@ -24,10 +24,11 @@ namespace unidecode { uint32_t section = codepoint >> 8; uint32_t position = codepoint & 0xff; // only ast two hex digits - // TODO: assert section < then xyz + if (section >= 256 || position >= 256) + continue; auto table = kUnidecodeData[section]; - if (table != nullptr) { // TODO: check if position < table size + if (table != nullptr) { const char* symbol = table[position]; while (*symbol != 0) { *out_it = *symbol; @@ -40,4 +41,12 @@ namespace unidecode { } + string UnidecodeString(const string &str) { + unidecode::Utf8StringIterator begin = str.c_str(); + unidecode::Utf8StringIterator end = str.c_str() + str.length(); + string output; + unidecode::Unidecode(begin, end, std::back_inserter(output)); + return output; + } + } From d8922ea3461559e3f3959df4f5199e0145a7049b Mon Sep 17 00:00:00 2001 From: Robert Kaye Date: Fri, 16 May 2025 00:31:49 +0200 Subject: [PATCH 2/3] Now actually test the string function, lol --- CMakeLists.txt | 4 ++-- include/unidecode/unidecode.hpp | 12 +++++++----- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 2888c13..70f2206 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -3,7 +3,7 @@ project(unidecode) set(CMAKE_CXX_STANDARD 17) -add_subdirectory(tests) +#add_subdirectory(tests) include_directories(include) @@ -11,4 +11,4 @@ file(GLOB_RECURSE sources src/*.cpp) string (REPLACE ";" "\n " FILES_MSG "${sources}") message(STATUS "source files:\n ${FILES_MSG}") -add_library(unidecode ${sources}) \ No newline at end of file +add_library(unidecode ${sources}) diff --git a/include/unidecode/unidecode.hpp b/include/unidecode/unidecode.hpp index 7f7fdb7..d837079 100644 --- a/include/unidecode/unidecode.hpp +++ b/include/unidecode/unidecode.hpp @@ -5,8 +5,10 @@ #pragma once +#include #include "common.hpp" #include "sections.hpp" +#include "unidecode/utf8_string_iterator.hpp" namespace unidecode { @@ -41,11 +43,11 @@ namespace unidecode { } - string UnidecodeString(const string &str) { - unidecode::Utf8StringIterator begin = str.c_str(); - unidecode::Utf8StringIterator end = str.c_str() + str.length(); - string output; - unidecode::Unidecode(begin, end, std::back_inserter(output)); + std::string UnidecodeString(const std::string &str) { + unidecode::Utf8StringIterator _begin = str.c_str(); + unidecode::Utf8StringIterator _end = str.c_str() + str.length(); + std::string output; + unidecode::Unidecode(_begin, _end, std::back_inserter(output)); return output; } From de1284ad6f5e27c5e6a4b0596b55f5c2b10c72b2 Mon Sep 17 00:00:00 2001 From: Robert Kaye Date: Fri, 16 May 2025 00:32:32 +0200 Subject: [PATCH 3/3] Undo --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 70f2206..6d1f628 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -3,7 +3,7 @@ project(unidecode) set(CMAKE_CXX_STANDARD 17) -#add_subdirectory(tests) +add_subdirectory(tests) include_directories(include)