From a1fb6ec779b2e3f6808de14c02d859241594a81e Mon Sep 17 00:00:00 2001
From: taynpg <taynpg@163.com>
Date: Tue, 14 Jan 2025 12:06:55 +0800
Subject: [PATCH] =?UTF-8?q?fix=EF=BC=9A=E5=A4=84=E7=90=86=E8=BF=9E?=
 =?UTF-8?q?=E7=BB=AD=E7=A9=BA=E7=99=BD=E7=9A=84=E6=83=85=E5=86=B5=E3=80=82?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/of_util.cpp | 20 +++++++++++---------
 test/main.cpp   |  2 +-
 2 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/src/of_util.cpp b/src/of_util.cpp
index 6fc5d55..f68b5f2 100644
--- a/src/of_util.cpp
+++ b/src/of_util.cpp
@@ -160,23 +160,25 @@ ofString CCodec::rbs(const ofString& str)
 #else
     utf8_str = str;
 #endif
+
     std::vector<char32_t> unicode_chars;
     utf8::utf8to32(utf8_str.begin(), utf8_str.end(), std::back_inserter(unicode_chars));
     std::vector<char32_t> processed_chars;
+
+    bool last_was_whitespace = false;
     for (size_t i = 0; i < unicode_chars.size(); ++i) {
         char32_t current = unicode_chars[i];
-        if (current == U' ' || current == U'\t' || current == U'\n' || current == U'\r') {
-            bool near_non_ascii = false;
-            if (i > 0 && unicode_chars[i - 1] > 0x7F) {
-                near_non_ascii = true;
-            }
-            if (i + 1 < unicode_chars.size() && unicode_chars[i + 1] > 0x7F) {
-                near_non_ascii = true;
-            }
-            if (near_non_ascii) {
+
+        bool is_whitespace = (current == U' ' || current == U'\t' || current == U'\n' || current == U'\r');
+        if (is_whitespace) {
+            bool near_non_ascii =
+                (i > 0 && unicode_chars[i - 1] > 0x7F) || (i + 1 < unicode_chars.size() && unicode_chars[i + 1] > 0x7F);
+            if (near_non_ascii || last_was_whitespace) {
                 continue;
             }
+            current = U' ';
         }
+        last_was_whitespace = is_whitespace;
         processed_chars.push_back(current);
     }
 
diff --git a/test/main.cpp b/test/main.cpp
index 8c8b5b6..bed1164 100644
--- a/test/main.cpp
+++ b/test/main.cpp
@@ -22,7 +22,7 @@ void testB()
 
 void testC()
 {
-    std::string source(u8"这是 一 个测试 用例。 ");
+    std::string source(u8"这是 一 个测试  用例。 ");
     std::string expect(u8"这是一个测试用例。");
     assert(CCodec::rbs(source) == expect);
 }