use:初步能用。

This commit is contained in:
taynpg 2025-02-12 13:36:28 +08:00
parent 9bcf36cd90
commit 71f18a8919
5 changed files with 188 additions and 22 deletions

View File

@ -18,6 +18,7 @@
} }
], ],
"args": [ "args": [
"/home/yun/Code/filecomplete", "cpp,h", "/home/yun/Code/question.txt"
] ]
}, },
"cmake.configureSettings": { "cmake.configureSettings": {
@ -91,6 +92,7 @@
"stdexcept": "cpp", "stdexcept": "cpp",
"streambuf": "cpp", "streambuf": "cpp",
"typeinfo": "cpp", "typeinfo": "cpp",
"valarray": "cpp" "valarray": "cpp",
"fstream": "cpp"
} }
} }

View File

@ -1,7 +1,7 @@
# DeepSeek访问 # DeepSeek访问
尝试实现一个可以提交大量附件,并让`DeepSeek`帮分析提取数据等的一个工具。 尝试实现一个可以提交大量附件(按照官方要求是最多50个文件,单个不超过500MB,且仅支持文本文件),并让`DeepSeek`帮分析提取数据等的一个工具。
# 说明 # 编译
如果是`windows`环境,必须使用`windows terminal`(或者其他支持读入`utf-8`字符串的终端)以支持从`cmd`命令行读入`utf-8`字符串 需要`cpp17`

View File

@ -1,7 +1,8 @@
#include "jsondata.h" #include "jsondata.h"
#include <iostream> #include <iostream>
CJsonOper::CJsonOper() CJsonOper::CJsonOper(const std::string& user_name, const std::string& model, const std::string& assistant_name)
: user_(user_name), model_(model), assistant_(assistant_name)
{ {
} }
@ -11,15 +12,53 @@ CJsonOper::~CJsonOper()
std::string CJsonOper::format_request(const std::string& content) std::string CJsonOper::format_request(const std::string& content)
{ {
// 定义变量 std::string model = model_;
std::string model = "deepseek-r1"; std::string role = user_;
std::string role = "user";
// 构造 JSON 对象
nlohmann::json json_data = {{"model", model}, {"messages", {{{"role", role}, {"content", content}}}}}; nlohmann::json json_data = {{"model", model}, {"messages", {{{"role", role}, {"content", content}}}}};
return json_data.dump(); return json_data.dump();
} }
std::vector<std::string> CJsonOper::split(const std::string& input, const std::string& delimiter)
{
std::vector<std::string> result;
size_t pos = 0, prev = 0;
while ((pos = input.find(delimiter, prev)) != std::string::npos) {
result.push_back(input.substr(prev, pos - prev));
prev = pos + delimiter.size();
}
result.push_back(input.substr(prev));
return result;
}
std::string CJsonOper::multi_format_reuqest(const std::string& content, size_t per_sec_size)
{
std::string model = model_;
std::string role = user_;
nlohmann::json json_data;
json_data["model"] = model;
std::vector<nlohmann::json> messages;
size_t s = 0;
while (s < content.size()) {
size_t i = 0;
size_t t = 0;
while (i < per_sec_size && s + i < content.size()) {
t = get_u8_len(content[s + i]);
if (t == 0) {
std::cerr << "invalid codec!!!" << std::endl;
exit(1);
}
i += t;
}
std::string part = content.substr(s, i);
messages.push_back({{"role", role}, {"content", "\n附加数据:\n" + part}});
s += i;
}
json_data["messages"] = messages;
return json_data.dump();
}
Message CJsonOper::parse(const std::string& data) Message CJsonOper::parse(const std::string& data)
{ {
Message re; Message re;
@ -62,3 +101,85 @@ bool CJsonOper::save_md(const std::string& data, const std::string& id)
of.close(); of.close();
return true; return true;
} }
bool CJsonOper::read_txt(const std::string& path, std::string& out)
{
std::ifstream file(path);
if (!file.is_open()) {
std::cout << "open failed: " << path << std::endl;
return false;
}
std::istreambuf_iterator<char> iterf(file);
std::istreambuf_iterator<char> iter;
std::string content(iterf, iter);
out = content;
return true;
}
size_t CJsonOper::get_u8_len(unsigned char ch)
{
if (ch <= 0x7F) {
return 1;
} else if ((ch & 0xE0) == 0xC0) {
return 2;
} else if ((ch & 0xF0) == 0xE0) {
return 3;
} else if ((ch & 0xF8) == 0xF0) {
return 4;
} else if ((ch & 0xFC) == 0xF8) {
return 5;
} else if ((ch & 0xFE) == 0xFC) {
return 6;
} else {
std::cerr << "invalid u8 first ch." << std::endl;
exit(1);
}
return 0;
}
std::string CJsonOper::trim(const std::string& input)
{
size_t start = input.find_first_not_of(" \t\n\r\f\v");
if (start == std::string::npos) {
return "";
}
size_t end = input.find_last_not_of(" \t\n\r\f\v");
return input.substr(start, end - start + 1);
}
std::string CJsonOper::get_all_dir_content(const std::string& dir, const std::string& types)
{
auto vec = split(types, ",");
std::vector<std::string> t;
for (const auto& item : vec) {
auto c = trim(item);
if (c.empty()) {
continue;
}
t.push_back("." + item);
std::cout << "use type:" << item << std::endl;
}
std::vector<std::string> task;
for (const auto& entry : fs::directory_iterator(dir)) {
if (!fs::is_regular_file(entry)) {
continue;
}
auto exten = entry.path().filename().extension().string();
if (std::find(t.begin(), t.end(), exten) != t.end()) {
std::cout << "Parse:" << entry.path().string() << std::endl;
task.push_back(entry.path().string());
}
}
// 提取内容
std::string content;
for (const auto& item : task) {
std::string one;
if (read_txt(item, one)) {
content.append("\n\n" + one);
} else {
std::cerr << "Can't read file: " << item << std::endl;
exit(1);
}
}
return content;
}

View File

@ -1,11 +1,14 @@
#ifndef JSON_DATA #ifndef JSON_DATA
#define JSON_DATA #define JSON_DATA
#include <fstream>
#include <nlohmann/json.hpp> #include <nlohmann/json.hpp>
#include <optional> #include <optional>
#include <string> #include <string>
#include <fstream> #include <vector>
#include <filesystem>
namespace fs = std::filesystem;
struct Message { struct Message {
std::string message_content; std::string message_content;
std::string reasoning_content; std::string reasoning_content;
@ -19,13 +22,24 @@ using json = nlohmann::json;
class CJsonOper class CJsonOper
{ {
public: public:
CJsonOper(); CJsonOper(const std::string& user_name, const std::string& model, const std::string& assistant_name);
~CJsonOper(); ~CJsonOper();
public: public:
std::string format_request(const std::string& content); std::string format_request(const std::string& content);
std::string multi_format_reuqest(const std::string& content, size_t per_sec_size);
Message parse(const std::string& data); Message parse(const std::string& data);
static bool save_md(const std::string& data, const std::string& id); static bool save_md(const std::string& data, const std::string& id);
static bool read_txt(const std::string& path, std::string& out);
static std::vector<std::string> split(const std::string& input, const std::string& delimiter);
static size_t get_u8_len(unsigned char ch);
static std::string get_all_dir_content(const std::string& dir, const std::string& types);
static std::string trim(const std::string& input);
private:
std::string user_{};
std::string model_{};
std::string assistant_{};
}; };
#endif #endif

View File

@ -1,40 +1,69 @@
#include "jsondata.h"
#include "zapi.h" #include "zapi.h"
#include <iostream> #include <iostream>
#include <memory> #include <memory>
#include <string> #include <string>
#include "jsondata.h"
constexpr auto API_ENV_KEY = "DASHSCOPE_API_KEY";
constexpr auto BASE_URL = "https://dashscope.aliyuncs.com/compatible-mode/v1/chat/completions";
constexpr auto USER_NAME = "user";
constexpr auto MODEL_NAME = "deepseek-r1";
constexpr auto ASSISTANT_NAME = "assistant";
std::string get_key() std::string get_key()
{ {
char* v = getenv("DASHSCOPE_API_KEY"); char* v = getenv(API_ENV_KEY);
if (v) { if (v) {
return std::string(v); return std::string(v);
} }
return ""; return "";
} }
int main() int main(int argc, char* argv[])
{ {
if (argc < 4) {
std::cout << "note: you need set env[" << API_ENV_KEY << "] before you start." << std::endl;
std::cout << "argument: text type files dir, types(split with ,), question file." << std::endl;
std::cout << "example: deepseek-use /home/zhang/cpps cpp,xml,h /home/zhang/question.txt" << std::endl;
return 0;
}
std::string api_key = get_key(); std::string api_key = get_key();
if (api_key.empty()) { if (api_key.empty()) {
std::cerr << "api key not found." << std::endl; std::cerr << "api key not found." << std::endl;
return -1; return -1;
} }
std::string question{};
if (!CJsonOper::read_txt(argv[3], question)) {
return -1;
}
std::string all_content = CJsonOper::get_all_dir_content(argv[1], argv[2]);
if (all_content.empty()) {
std::cerr << "content is empty." << std::endl;
return -1;
}
std::string req_str = question + "\n\n请查看附加数据:\n" + all_content;
auto api = std::make_shared<COpenAI>(); auto api = std::make_shared<COpenAI>();
auto json_oper = std::make_shared<CJsonOper>(); auto json_oper = std::make_shared<CJsonOper>(USER_NAME, MODEL_NAME, ASSISTANT_NAME);
api->set_base("https://dashscope.aliyuncs.com/compatible-mode/v1/chat/completions", api_key); api->set_base(BASE_URL, api_key);
std::string q = json_oper->multi_format_reuqest(req_str, 1024 * 1);
// 请求的 JSON 数据
std::string question("对于DeepSeek API,既然对话交互使用的是json格式,那我是否可以把所有的文本内容合并到json中post给deepseek,deepseek有没有说明json格式的数据大小上限值?");
std::string q = json_oper->format_request(question);
//std::cout << q << std::endl;
std::string recv; std::string recv;
if (api->post(q, recv)) { if (api->post(q, recv)) {
auto re = json_oper->parse(recv); auto re = json_oper->parse(recv);
CJsonOper::save_md("**最终结果:**\n\n" + re.message_content + "\n\n **思考过程:** \n" + re.reasoning_content, re.id); std::string use = "本次`tokens`消耗:" + std::to_string(re.prompt_tokens) + "+" + std::to_string(re.completion_tokens) +
"=" + std::to_string(re.total_tokens);
CJsonOper::save_md(
use + "\n\n**最终结果:**\n\n" + re.message_content + "\n\n **思考过程:** \n\n" + re.reasoning_content, re.id);
std::cout << "success." << std::endl; std::cout << "success." << std::endl;
} }
else {
std::cout << "request failed." << std::endl;
}
return 0; return 0;
} }