diff --git a/CMakeLists.txt b/CMakeLists.txt index e391197..86428c5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -24,6 +24,7 @@ set(SRC_FILES ) include_directories(include) +include_directories(3rd) if(DEFINED USE_TEST) message(STATUS "USE TEST") enable_testing() diff --git a/include/of_util.h b/include/of_util.h index e0b145e..8834554 100644 --- a/include/of_util.h +++ b/include/of_util.h @@ -245,6 +245,11 @@ public: static std::string u8ToGBK(const std::string& str); static std::string GBKTou8(const std::string& str); #endif + /// @brief 删除,段落中的空白字符,如[你好 啊,在 哪里 ?] => [你好啊,在哪里?] + /// 仅处理非 ASCII 码附近的内容。 + /// @param str + /// @return + static ofString rbs(const ofString& str); }; typedef class CThreadSleep diff --git a/src/of_util.cpp b/src/of_util.cpp index 06bbbc1..88a1075 100644 --- a/src/of_util.cpp +++ b/src/of_util.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #ifdef _WIN32 #include @@ -150,6 +151,47 @@ std::string CCodec::GBKTou8(const std::string& str) return utf8Str; } #endif + +ofString CCodec::rbs(const ofString& str) +{ + std::string utf8_str; +#ifdef UNICODE_OFSTR + utf8::utf16to8(str.begin(), str.end(), std::back_inserter(utf8_str)); +#else + utf8_str = str; +#endif + std::vector unicode_chars; + utf8::utf8to32(utf8_str.begin(), utf8_str.end(), std::back_inserter(unicode_chars)); + std::vector processed_chars; + for (size_t i = 0; i < unicode_chars.size(); ++i) { + char32_t current = unicode_chars[i]; + if (current == U' ' || current == U'\t' || current == U'\n' || current == U'\r') { + bool near_non_ascii = false; + if (i > 0 && unicode_chars[i - 1] > 0x7F) { + near_non_ascii = true; + } + if (i + 1 < unicode_chars.size() && unicode_chars[i + 1] > 0x7F) { + near_non_ascii = true; + } + if (near_non_ascii) { + continue; + } + } + processed_chars.push_back(current); + } + + std::string result_utf8; + utf8::utf32to8(processed_chars.begin(), processed_chars.end(), std::back_inserter(result_utf8)); + + ofString result; +#ifdef UNICODE_OFSTR + utf8::utf8to16(result_utf8.begin(), result_utf8.end(), std::back_inserter(result)); +#else + result = result_utf8; +#endif + return result; +} + CThreadSleep::CThreadSleep() { is_stop_sleep_ = false; diff --git a/test/main.cpp b/test/main.cpp index 0e05048..8c8b5b6 100644 --- a/test/main.cpp +++ b/test/main.cpp @@ -1,6 +1,7 @@ #include -#include "of_str.h" +#include #include +#include #include using namespace ofen; @@ -19,10 +20,18 @@ void testB() assert(rp == ofT("cpNiz")); } +void testC() +{ + std::string source(u8"这是 一 个测试 用例。 "); + std::string expect(u8"这是一个测试用例。"); + assert(CCodec::rbs(source) == expect); +} + int main() { testA(); testB(); + testC(); std::cout << "Done" << std::endl; return 0; } \ No newline at end of file