use:初步能用。
This commit is contained in:
		
							parent
							
								
									9bcf36cd90
								
							
						
					
					
						commit
						71f18a8919
					
				
							
								
								
									
										4
									
								
								.vscode/settings.json
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										4
									
								
								.vscode/settings.json
									
									
									
									
										vendored
									
									
								
							@ -18,6 +18,7 @@
 | 
			
		||||
            }
 | 
			
		||||
        ],
 | 
			
		||||
        "args": [
 | 
			
		||||
            "/home/yun/Code/filecomplete", "cpp,h", "/home/yun/Code/question.txt"
 | 
			
		||||
        ]
 | 
			
		||||
    },
 | 
			
		||||
    "cmake.configureSettings": {
 | 
			
		||||
@ -91,6 +92,7 @@
 | 
			
		||||
        "stdexcept": "cpp",
 | 
			
		||||
        "streambuf": "cpp",
 | 
			
		||||
        "typeinfo": "cpp",
 | 
			
		||||
        "valarray": "cpp"
 | 
			
		||||
        "valarray": "cpp",
 | 
			
		||||
        "fstream": "cpp"
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
@ -1,7 +1,7 @@
 | 
			
		||||
# DeepSeek访问
 | 
			
		||||
 | 
			
		||||
尝试实现一个可以提交大量附件,并让`DeepSeek`帮分析提取数据等的一个工具。
 | 
			
		||||
尝试实现一个可以提交大量附件(按照官方要求是最多50个文件,单个不超过500MB,且仅支持文本文件),并让`DeepSeek`帮分析提取数据等的一个工具。
 | 
			
		||||
 | 
			
		||||
# 说明
 | 
			
		||||
# 编译
 | 
			
		||||
 | 
			
		||||
如果是`windows`环境,必须使用`windows terminal`(或者其他支持读入`utf-8`字符串的终端)以支持从`cmd`命令行读入`utf-8`字符串。
 | 
			
		||||
需要`cpp17`。
 | 
			
		||||
							
								
								
									
										133
									
								
								jsondata.cxx
									
									
									
									
									
								
							
							
						
						
									
										133
									
								
								jsondata.cxx
									
									
									
									
									
								
							@ -1,7 +1,8 @@
 | 
			
		||||
#include "jsondata.h"
 | 
			
		||||
#include <iostream>
 | 
			
		||||
 | 
			
		||||
CJsonOper::CJsonOper()
 | 
			
		||||
CJsonOper::CJsonOper(const std::string& user_name, const std::string& model, const std::string& assistant_name)
 | 
			
		||||
    : user_(user_name), model_(model), assistant_(assistant_name)
 | 
			
		||||
{
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@ -11,15 +12,53 @@ CJsonOper::~CJsonOper()
 | 
			
		||||
 | 
			
		||||
std::string CJsonOper::format_request(const std::string& content)
 | 
			
		||||
{
 | 
			
		||||
    // 定义变量
 | 
			
		||||
    std::string model = "deepseek-r1";
 | 
			
		||||
    std::string role = "user";
 | 
			
		||||
 | 
			
		||||
    // 构造 JSON 对象
 | 
			
		||||
    std::string model = model_;
 | 
			
		||||
    std::string role = user_;
 | 
			
		||||
    nlohmann::json json_data = {{"model", model}, {"messages", {{{"role", role}, {"content", content}}}}};
 | 
			
		||||
    return json_data.dump();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
std::vector<std::string> CJsonOper::split(const std::string& input, const std::string& delimiter)
 | 
			
		||||
{
 | 
			
		||||
    std::vector<std::string> result;
 | 
			
		||||
    size_t pos = 0, prev = 0;
 | 
			
		||||
    while ((pos = input.find(delimiter, prev)) != std::string::npos) {
 | 
			
		||||
        result.push_back(input.substr(prev, pos - prev));
 | 
			
		||||
        prev = pos + delimiter.size();
 | 
			
		||||
    }
 | 
			
		||||
    result.push_back(input.substr(prev));
 | 
			
		||||
    return result;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
std::string CJsonOper::multi_format_reuqest(const std::string& content, size_t per_sec_size)
 | 
			
		||||
{
 | 
			
		||||
    std::string model = model_;
 | 
			
		||||
    std::string role = user_;
 | 
			
		||||
    nlohmann::json json_data;
 | 
			
		||||
    json_data["model"] = model;
 | 
			
		||||
 | 
			
		||||
    std::vector<nlohmann::json> messages;
 | 
			
		||||
    size_t s = 0;
 | 
			
		||||
    while (s < content.size()) {
 | 
			
		||||
        size_t i = 0;
 | 
			
		||||
        size_t t = 0;
 | 
			
		||||
        while (i < per_sec_size && s + i < content.size()) {
 | 
			
		||||
            t = get_u8_len(content[s + i]);
 | 
			
		||||
            if (t == 0) {
 | 
			
		||||
                std::cerr << "invalid codec!!!" << std::endl;
 | 
			
		||||
                exit(1);
 | 
			
		||||
            }
 | 
			
		||||
            i += t;
 | 
			
		||||
        }
 | 
			
		||||
        std::string part = content.substr(s, i);
 | 
			
		||||
        messages.push_back({{"role", role}, {"content", "\n附加数据:\n" + part}});
 | 
			
		||||
        s += i;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    json_data["messages"] = messages;
 | 
			
		||||
    return json_data.dump();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
Message CJsonOper::parse(const std::string& data)
 | 
			
		||||
{
 | 
			
		||||
    Message re;
 | 
			
		||||
@ -62,3 +101,85 @@ bool CJsonOper::save_md(const std::string& data, const std::string& id)
 | 
			
		||||
    of.close();
 | 
			
		||||
    return true;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
bool CJsonOper::read_txt(const std::string& path, std::string& out)
 | 
			
		||||
{
 | 
			
		||||
    std::ifstream file(path);
 | 
			
		||||
    if (!file.is_open()) {
 | 
			
		||||
        std::cout << "open failed: " << path << std::endl;
 | 
			
		||||
        return false;
 | 
			
		||||
    }
 | 
			
		||||
    std::istreambuf_iterator<char> iterf(file);
 | 
			
		||||
    std::istreambuf_iterator<char> iter;
 | 
			
		||||
    std::string content(iterf, iter);
 | 
			
		||||
    out = content;
 | 
			
		||||
    return true;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
size_t CJsonOper::get_u8_len(unsigned char ch)
 | 
			
		||||
{
 | 
			
		||||
    if (ch <= 0x7F) {
 | 
			
		||||
        return 1;
 | 
			
		||||
    } else if ((ch & 0xE0) == 0xC0) {
 | 
			
		||||
        return 2;
 | 
			
		||||
    } else if ((ch & 0xF0) == 0xE0) {
 | 
			
		||||
        return 3;
 | 
			
		||||
    } else if ((ch & 0xF8) == 0xF0) {
 | 
			
		||||
        return 4;
 | 
			
		||||
    } else if ((ch & 0xFC) == 0xF8) {
 | 
			
		||||
        return 5;
 | 
			
		||||
    } else if ((ch & 0xFE) == 0xFC) {
 | 
			
		||||
        return 6;
 | 
			
		||||
    } else {
 | 
			
		||||
        std::cerr << "invalid u8 first ch." << std::endl;
 | 
			
		||||
        exit(1);
 | 
			
		||||
    }
 | 
			
		||||
    return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
std::string CJsonOper::trim(const std::string& input)
 | 
			
		||||
{
 | 
			
		||||
    size_t start = input.find_first_not_of(" \t\n\r\f\v");
 | 
			
		||||
    if (start == std::string::npos) {
 | 
			
		||||
        return "";
 | 
			
		||||
    }
 | 
			
		||||
    size_t end = input.find_last_not_of(" \t\n\r\f\v");
 | 
			
		||||
    return input.substr(start, end - start + 1);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
std::string CJsonOper::get_all_dir_content(const std::string& dir, const std::string& types)
 | 
			
		||||
{
 | 
			
		||||
    auto vec = split(types, ",");
 | 
			
		||||
    std::vector<std::string> t;
 | 
			
		||||
    for (const auto& item : vec) {
 | 
			
		||||
        auto c = trim(item);
 | 
			
		||||
        if (c.empty()) {
 | 
			
		||||
            continue;
 | 
			
		||||
        }
 | 
			
		||||
        t.push_back("." + item);
 | 
			
		||||
        std::cout << "use type:" << item << std::endl;
 | 
			
		||||
    }
 | 
			
		||||
    std::vector<std::string> task;
 | 
			
		||||
    for (const auto& entry : fs::directory_iterator(dir)) {
 | 
			
		||||
        if (!fs::is_regular_file(entry)) {
 | 
			
		||||
            continue;
 | 
			
		||||
        }
 | 
			
		||||
        auto exten = entry.path().filename().extension().string();
 | 
			
		||||
        if (std::find(t.begin(), t.end(), exten) != t.end()) {
 | 
			
		||||
            std::cout << "Parse:" << entry.path().string() << std::endl;
 | 
			
		||||
            task.push_back(entry.path().string());
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
    // 提取内容
 | 
			
		||||
    std::string content;
 | 
			
		||||
    for (const auto& item : task) {
 | 
			
		||||
        std::string one;
 | 
			
		||||
        if (read_txt(item, one)) {
 | 
			
		||||
            content.append("\n\n" + one);
 | 
			
		||||
        } else {
 | 
			
		||||
            std::cerr << "Can't read file: " << item << std::endl;
 | 
			
		||||
            exit(1);
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
    return content;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										18
									
								
								jsondata.h
									
									
									
									
									
								
							
							
						
						
									
										18
									
								
								jsondata.h
									
									
									
									
									
								
							@ -1,11 +1,14 @@
 | 
			
		||||
#ifndef JSON_DATA
 | 
			
		||||
#define JSON_DATA
 | 
			
		||||
 | 
			
		||||
#include <fstream>
 | 
			
		||||
#include <nlohmann/json.hpp>
 | 
			
		||||
#include <optional>
 | 
			
		||||
#include <string>
 | 
			
		||||
#include <fstream>
 | 
			
		||||
#include <vector>
 | 
			
		||||
#include <filesystem>
 | 
			
		||||
 | 
			
		||||
namespace fs = std::filesystem;
 | 
			
		||||
struct Message {
 | 
			
		||||
    std::string message_content;
 | 
			
		||||
    std::string reasoning_content;
 | 
			
		||||
@ -19,13 +22,24 @@ using json = nlohmann::json;
 | 
			
		||||
class CJsonOper
 | 
			
		||||
{
 | 
			
		||||
public:
 | 
			
		||||
    CJsonOper();
 | 
			
		||||
    CJsonOper(const std::string& user_name, const std::string& model, const std::string& assistant_name);
 | 
			
		||||
    ~CJsonOper();
 | 
			
		||||
 | 
			
		||||
public:
 | 
			
		||||
    std::string format_request(const std::string& content);
 | 
			
		||||
    std::string multi_format_reuqest(const std::string& content, size_t per_sec_size);
 | 
			
		||||
    Message parse(const std::string& data);
 | 
			
		||||
    static bool save_md(const std::string& data, const std::string& id);
 | 
			
		||||
    static bool read_txt(const std::string& path, std::string& out);
 | 
			
		||||
    static std::vector<std::string> split(const std::string& input, const std::string& delimiter);
 | 
			
		||||
    static size_t get_u8_len(unsigned char ch);
 | 
			
		||||
    static std::string get_all_dir_content(const std::string& dir, const std::string& types);
 | 
			
		||||
    static std::string trim(const std::string& input);
 | 
			
		||||
 | 
			
		||||
private:
 | 
			
		||||
    std::string user_{};
 | 
			
		||||
    std::string model_{};
 | 
			
		||||
    std::string assistant_{};
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
							
								
								
									
										49
									
								
								main.cxx
									
									
									
									
									
								
							
							
						
						
									
										49
									
								
								main.cxx
									
									
									
									
									
								
							@ -1,40 +1,69 @@
 | 
			
		||||
 | 
			
		||||
#include "jsondata.h"
 | 
			
		||||
#include "zapi.h"
 | 
			
		||||
#include <iostream>
 | 
			
		||||
#include <memory>
 | 
			
		||||
#include <string>
 | 
			
		||||
#include "jsondata.h"
 | 
			
		||||
 | 
			
		||||
constexpr auto API_ENV_KEY = "DASHSCOPE_API_KEY";
 | 
			
		||||
constexpr auto BASE_URL = "https://dashscope.aliyuncs.com/compatible-mode/v1/chat/completions";
 | 
			
		||||
constexpr auto USER_NAME = "user";
 | 
			
		||||
constexpr auto MODEL_NAME = "deepseek-r1";
 | 
			
		||||
constexpr auto ASSISTANT_NAME = "assistant";
 | 
			
		||||
 | 
			
		||||
std::string get_key()
 | 
			
		||||
{
 | 
			
		||||
    char* v = getenv("DASHSCOPE_API_KEY");
 | 
			
		||||
    char* v = getenv(API_ENV_KEY);
 | 
			
		||||
    if (v) {
 | 
			
		||||
        return std::string(v);
 | 
			
		||||
    }
 | 
			
		||||
    return "";
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int main()
 | 
			
		||||
int main(int argc, char* argv[])
 | 
			
		||||
{
 | 
			
		||||
    if (argc < 4) {
 | 
			
		||||
        std::cout << "note: you need set env[" << API_ENV_KEY << "] before you start." << std::endl;
 | 
			
		||||
        std::cout << "argument: text type files dir, types(split with ,), question file." << std::endl;
 | 
			
		||||
        std::cout << "example: deepseek-use /home/zhang/cpps cpp,xml,h /home/zhang/question.txt" << std::endl;
 | 
			
		||||
        return 0;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    std::string api_key = get_key();
 | 
			
		||||
    if (api_key.empty()) {
 | 
			
		||||
        std::cerr << "api key not found." << std::endl;
 | 
			
		||||
        return -1;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    std::string question{};
 | 
			
		||||
    if (!CJsonOper::read_txt(argv[3], question)) {
 | 
			
		||||
        return -1;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    std::string all_content = CJsonOper::get_all_dir_content(argv[1], argv[2]);
 | 
			
		||||
    if (all_content.empty()) {
 | 
			
		||||
        std::cerr << "content is empty." << std::endl;
 | 
			
		||||
        return -1;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    std::string req_str = question + "\n\n请查看附加数据:\n" + all_content; 
 | 
			
		||||
    auto api = std::make_shared<COpenAI>();
 | 
			
		||||
    auto json_oper = std::make_shared<CJsonOper>();
 | 
			
		||||
    api->set_base("https://dashscope.aliyuncs.com/compatible-mode/v1/chat/completions", api_key);
 | 
			
		||||
 | 
			
		||||
    // 请求的 JSON 数据
 | 
			
		||||
    std::string question("对于DeepSeek API,既然对话交互使用的是json格式,那我是否可以把所有的文本内容合并到json中post给deepseek,deepseek有没有说明json格式的数据大小上限值?");
 | 
			
		||||
    std::string q = json_oper->format_request(question);
 | 
			
		||||
    auto json_oper = std::make_shared<CJsonOper>(USER_NAME, MODEL_NAME, ASSISTANT_NAME);
 | 
			
		||||
    api->set_base(BASE_URL, api_key);
 | 
			
		||||
    std::string q = json_oper->multi_format_reuqest(req_str, 1024 * 1);
 | 
			
		||||
 | 
			
		||||
    //std::cout << q << std::endl;
 | 
			
		||||
    std::string recv;
 | 
			
		||||
    if (api->post(q, recv)) {
 | 
			
		||||
        auto re = json_oper->parse(recv);
 | 
			
		||||
        CJsonOper::save_md("**最终结果:**\n\n" + re.message_content + "\n\n **思考过程:** \n" + re.reasoning_content, re.id);
 | 
			
		||||
        std::string use = "本次`tokens`消耗:" + std::to_string(re.prompt_tokens) + "+" + std::to_string(re.completion_tokens) +
 | 
			
		||||
                          "=" + std::to_string(re.total_tokens);
 | 
			
		||||
        CJsonOper::save_md(
 | 
			
		||||
            use + "\n\n**最终结果:**\n\n" + re.message_content + "\n\n **思考过程:** \n\n" + re.reasoning_content, re.id);
 | 
			
		||||
        std::cout << "success." << std::endl;
 | 
			
		||||
    }
 | 
			
		||||
    else {
 | 
			
		||||
        std::cout << "request failed." << std::endl;
 | 
			
		||||
    }
 | 
			
		||||
    return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user