add:添加去除非ASCII附近空白函数。

This commit is contained in:
taynpg 2025-01-14 10:30:07 +08:00
parent 6f2ab1c560
commit 6cada82609
4 changed files with 58 additions and 1 deletions

View File

@ -24,6 +24,7 @@ set(SRC_FILES
)
include_directories(include)
include_directories(3rd)
if(DEFINED USE_TEST)
message(STATUS "USE TEST")
enable_testing()

View File

@ -245,6 +245,11 @@ public:
static std::string u8ToGBK(const std::string& str);
static std::string GBKTou8(const std::string& str);
#endif
/// @brief 删除,段落中的空白字符,如[你好 啊,在 哪里 ?] => [你好啊,在哪里?]
/// 仅处理非 ASCII 码附近的内容。
/// @param str
/// @return
static ofString rbs(const ofString& str);
};
typedef class CThreadSleep

View File

@ -2,6 +2,7 @@
#include <chrono>
#include <iomanip>
#include <sstream>
#include <utf8.h>
#ifdef _WIN32
#include <windows.h>
@ -150,6 +151,47 @@ std::string CCodec::GBKTou8(const std::string& str)
return utf8Str;
}
#endif
ofString CCodec::rbs(const ofString& str)
{
std::string utf8_str;
#ifdef UNICODE_OFSTR
utf8::utf16to8(str.begin(), str.end(), std::back_inserter(utf8_str));
#else
utf8_str = str;
#endif
std::vector<char32_t> unicode_chars;
utf8::utf8to32(utf8_str.begin(), utf8_str.end(), std::back_inserter(unicode_chars));
std::vector<char32_t> processed_chars;
for (size_t i = 0; i < unicode_chars.size(); ++i) {
char32_t current = unicode_chars[i];
if (current == U' ' || current == U'\t' || current == U'\n' || current == U'\r') {
bool near_non_ascii = false;
if (i > 0 && unicode_chars[i - 1] > 0x7F) {
near_non_ascii = true;
}
if (i + 1 < unicode_chars.size() && unicode_chars[i + 1] > 0x7F) {
near_non_ascii = true;
}
if (near_non_ascii) {
continue;
}
}
processed_chars.push_back(current);
}
std::string result_utf8;
utf8::utf32to8(processed_chars.begin(), processed_chars.end(), std::back_inserter(result_utf8));
ofString result;
#ifdef UNICODE_OFSTR
utf8::utf8to16(result_utf8.begin(), result_utf8.end(), std::back_inserter(result));
#else
result = result_utf8;
#endif
return result;
}
CThreadSleep::CThreadSleep()
{
is_stop_sleep_ = false;

View File

@ -1,6 +1,7 @@
#include <iostream>
#include "of_str.h"
#include <of_str.h>
#include <of_path.h>
#include <of_util.h>
#include <cassert>
using namespace ofen;
@ -19,10 +20,18 @@ void testB()
assert(rp == ofT("cpNiz"));
}
void testC()
{
std::string source(u8"这是 一 个测试 用例。 ");
std::string expect(u8"这是一个测试用例。");
assert(CCodec::rbs(source) == expect);
}
int main()
{
testA();
testB();
testC();
std::cout << "Done" << std::endl;
return 0;
}