fix:处理连续空白的情况。

This commit is contained in:
taynpg 2025-01-14 12:06:55 +08:00
parent 7108d1efa9
commit a1fb6ec779
2 changed files with 12 additions and 10 deletions

View File

@ -160,23 +160,25 @@ ofString CCodec::rbs(const ofString& str)
#else
utf8_str = str;
#endif
std::vector<char32_t> unicode_chars;
utf8::utf8to32(utf8_str.begin(), utf8_str.end(), std::back_inserter(unicode_chars));
std::vector<char32_t> processed_chars;
bool last_was_whitespace = false;
for (size_t i = 0; i < unicode_chars.size(); ++i) {
char32_t current = unicode_chars[i];
if (current == U' ' || current == U'\t' || current == U'\n' || current == U'\r') {
bool near_non_ascii = false;
if (i > 0 && unicode_chars[i - 1] > 0x7F) {
near_non_ascii = true;
}
if (i + 1 < unicode_chars.size() && unicode_chars[i + 1] > 0x7F) {
near_non_ascii = true;
}
if (near_non_ascii) {
bool is_whitespace = (current == U' ' || current == U'\t' || current == U'\n' || current == U'\r');
if (is_whitespace) {
bool near_non_ascii =
(i > 0 && unicode_chars[i - 1] > 0x7F) || (i + 1 < unicode_chars.size() && unicode_chars[i + 1] > 0x7F);
if (near_non_ascii || last_was_whitespace) {
continue;
}
current = U' ';
}
last_was_whitespace = is_whitespace;
processed_chars.push_back(current);
}