From a1fb6ec779b2e3f6808de14c02d859241594a81e Mon Sep 17 00:00:00 2001 From: taynpg Date: Tue, 14 Jan 2025 12:06:55 +0800 Subject: [PATCH] =?UTF-8?q?fix=EF=BC=9A=E5=A4=84=E7=90=86=E8=BF=9E?= =?UTF-8?q?=E7=BB=AD=E7=A9=BA=E7=99=BD=E7=9A=84=E6=83=85=E5=86=B5=E3=80=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/of_util.cpp | 20 +++++++++++--------- test/main.cpp | 2 +- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/src/of_util.cpp b/src/of_util.cpp index 6fc5d55..f68b5f2 100644 --- a/src/of_util.cpp +++ b/src/of_util.cpp @@ -160,23 +160,25 @@ ofString CCodec::rbs(const ofString& str) #else utf8_str = str; #endif + std::vector unicode_chars; utf8::utf8to32(utf8_str.begin(), utf8_str.end(), std::back_inserter(unicode_chars)); std::vector processed_chars; + + bool last_was_whitespace = false; for (size_t i = 0; i < unicode_chars.size(); ++i) { char32_t current = unicode_chars[i]; - if (current == U' ' || current == U'\t' || current == U'\n' || current == U'\r') { - bool near_non_ascii = false; - if (i > 0 && unicode_chars[i - 1] > 0x7F) { - near_non_ascii = true; - } - if (i + 1 < unicode_chars.size() && unicode_chars[i + 1] > 0x7F) { - near_non_ascii = true; - } - if (near_non_ascii) { + + bool is_whitespace = (current == U' ' || current == U'\t' || current == U'\n' || current == U'\r'); + if (is_whitespace) { + bool near_non_ascii = + (i > 0 && unicode_chars[i - 1] > 0x7F) || (i + 1 < unicode_chars.size() && unicode_chars[i + 1] > 0x7F); + if (near_non_ascii || last_was_whitespace) { continue; } + current = U' '; } + last_was_whitespace = is_whitespace; processed_chars.push_back(current); } diff --git a/test/main.cpp b/test/main.cpp index 8c8b5b6..bed1164 100644 --- a/test/main.cpp +++ b/test/main.cpp @@ -22,7 +22,7 @@ void testB() void testC() { - std::string source(u8"这是 一 个测试 用例。 "); + std::string source(u8"这是 一 个测试 用例。 "); std::string expect(u8"这是一个测试用例。"); assert(CCodec::rbs(source) == expect); }