fix:处理连续空白的情况。

This commit is contained in:
taynpg 2025-01-14 12:06:55 +08:00
parent 7108d1efa9
commit a1fb6ec779
2 changed files with 12 additions and 10 deletions

View File

@ -160,23 +160,25 @@ ofString CCodec::rbs(const ofString& str)
#else #else
utf8_str = str; utf8_str = str;
#endif #endif
std::vector<char32_t> unicode_chars; std::vector<char32_t> unicode_chars;
utf8::utf8to32(utf8_str.begin(), utf8_str.end(), std::back_inserter(unicode_chars)); utf8::utf8to32(utf8_str.begin(), utf8_str.end(), std::back_inserter(unicode_chars));
std::vector<char32_t> processed_chars; std::vector<char32_t> processed_chars;
bool last_was_whitespace = false;
for (size_t i = 0; i < unicode_chars.size(); ++i) { for (size_t i = 0; i < unicode_chars.size(); ++i) {
char32_t current = unicode_chars[i]; char32_t current = unicode_chars[i];
if (current == U' ' || current == U'\t' || current == U'\n' || current == U'\r') {
bool near_non_ascii = false; bool is_whitespace = (current == U' ' || current == U'\t' || current == U'\n' || current == U'\r');
if (i > 0 && unicode_chars[i - 1] > 0x7F) { if (is_whitespace) {
near_non_ascii = true; bool near_non_ascii =
} (i > 0 && unicode_chars[i - 1] > 0x7F) || (i + 1 < unicode_chars.size() && unicode_chars[i + 1] > 0x7F);
if (i + 1 < unicode_chars.size() && unicode_chars[i + 1] > 0x7F) { if (near_non_ascii || last_was_whitespace) {
near_non_ascii = true;
}
if (near_non_ascii) {
continue; continue;
} }
current = U' ';
} }
last_was_whitespace = is_whitespace;
processed_chars.push_back(current); processed_chars.push_back(current);
} }

View File

@ -22,7 +22,7 @@ void testB()
void testC() void testC()
{ {
std::string source(u8"这是 一 个测试 用例。 "); std::string source(u8"这是 一 个测试 用例。 ");
std::string expect(u8"这是一个测试用例。"); std::string expect(u8"这是一个测试用例。");
assert(CCodec::rbs(source) == expect); assert(CCodec::rbs(source) == expect);
} }