diff --git a/CMakeLists.txt b/CMakeLists.txt index 2888c13..6d1f628 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -11,4 +11,4 @@ file(GLOB_RECURSE sources src/*.cpp) string (REPLACE ";" "\n " FILES_MSG "${sources}") message(STATUS "source files:\n ${FILES_MSG}") -add_library(unidecode ${sources}) \ No newline at end of file +add_library(unidecode ${sources}) diff --git a/include/unidecode/unidecode.hpp b/include/unidecode/unidecode.hpp index 89cdf99..d837079 100644 --- a/include/unidecode/unidecode.hpp +++ b/include/unidecode/unidecode.hpp @@ -5,8 +5,10 @@ #pragma once +#include #include "common.hpp" #include "sections.hpp" +#include "unidecode/utf8_string_iterator.hpp" namespace unidecode { @@ -24,10 +26,11 @@ namespace unidecode { uint32_t section = codepoint >> 8; uint32_t position = codepoint & 0xff; // only ast two hex digits - // TODO: assert section < then xyz + if (section >= 256 || position >= 256) + continue; auto table = kUnidecodeData[section]; - if (table != nullptr) { // TODO: check if position < table size + if (table != nullptr) { const char* symbol = table[position]; while (*symbol != 0) { *out_it = *symbol; @@ -40,4 +43,12 @@ namespace unidecode { } + std::string UnidecodeString(const std::string &str) { + unidecode::Utf8StringIterator _begin = str.c_str(); + unidecode::Utf8StringIterator _end = str.c_str() + str.length(); + std::string output; + unidecode::Unidecode(_begin, _end, std::back_inserter(output)); + return output; + } + }