diff --git a/Dockerfile+ChatGLM b/Dockerfile+ChatGLM new file mode 100644 index 00000000..e7db211e --- /dev/null +++ b/Dockerfile+ChatGLM @@ -0,0 +1,47 @@ +# How to build | 如何构建: docker build -t gpt-academic --network=host -f Dockerfile+ChatGLM . +# How to run | 如何运行 (1) 直接运行: docker run --rm -it --net=host gpt-academic +# How to run | 如何运行 (2) 我想运行之前进容器做一些调整: docker run --rm -it --net=host --gpu=all gpt-academic bash + +# 从NVIDIA源,从而支持显卡运损(检查宿主的nvidia-smi中的cuda版本必须>=11.3) +FROM nvidia/cuda:11.3.1-runtime-ubuntu20.04 +ARG useProxyNetwork='' +RUN apt-get update +RUN apt-get install -y curl proxychains curl +RUN apt-get install -y git python python3 python-dev python3-dev --fix-missing + +# 配置代理网络(构建Docker镜像时使用) +# # comment out below if you do not need proxy network | 如果不需要翻墙 - 从此行向下删除 +RUN $useProxyNetwork curl cip.cc +RUN sed -i '$ d' /etc/proxychains.conf +RUN sed -i '$ d' /etc/proxychains.conf +RUN echo "socks5 127.0.0.1 10880" >> /etc/proxychains.conf +ARG useProxyNetwork=proxychains +# # comment out above if you do not need proxy network | 如果不需要翻墙 - 从此行向上删除 + + +# use python3 as the system default python +RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.8 + +# 下载分支 +WORKDIR /gpt +RUN $useProxyNetwork git clone https://github.com/binary-husky/chatgpt_academic.git -b v3.0 +WORKDIR /gpt/chatgpt_academic +RUN $useProxyNetwork python3 -m pip install -r requirements.txt +RUN $useProxyNetwork python3 -m pip install -r request_llm/requirements_chatglm.txt +RUN $useProxyNetwork python3 -m pip install torch --extra-index-url https://download.pytorch.org/whl/cu113 + +# 为chatgpt-academic配置代理和API-KEY (非必要 可选步骤) +RUN echo ' \n\ +API_KEY = "sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" \n\ +USE_PROXY = True \n\ +proxies = { "http": "socks5h://localhost:10880", "https": "socks5h://localhost:10880", } ' >> config_private.py + +# 预热CHATGLM参数(非必要 可选步骤) +RUN echo ' \n\ +from transformers import AutoModel, AutoTokenizer \n\ +chatglm_tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True) \n\ +chatglm_model = AutoModel.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True).float() ' >> warm_up_chatglm.py +RUN python3 -u warm_up_chatglm.py + +# 启动 +CMD ["python3", "-u", "main.py"] diff --git a/crazy_functions/Latex全文润色.py b/crazy_functions/Latex全文润色.py index cee35479..77c11020 100644 --- a/crazy_functions/Latex全文润色.py +++ b/crazy_functions/Latex全文润色.py @@ -45,7 +45,7 @@ def 多文件润色(file_manifest, project_folder, llm_kwargs, plugin_kwargs, ch pfg = PaperFileGroup() for index, fp in enumerate(file_manifest): - with open(fp, 'r', encoding='utf-8') as f: + with open(fp, 'r', encoding='utf-8', errors='replace') as f: file_content = f.read() # 定义注释的正则表达式 comment_pattern = r'%.*' diff --git a/crazy_functions/Latex全文翻译.py b/crazy_functions/Latex全文翻译.py index c1684b31..2808a8e0 100644 --- a/crazy_functions/Latex全文翻译.py +++ b/crazy_functions/Latex全文翻译.py @@ -44,7 +44,7 @@ def 多文件翻译(file_manifest, project_folder, llm_kwargs, plugin_kwargs, ch pfg = PaperFileGroup() for index, fp in enumerate(file_manifest): - with open(fp, 'r', encoding='utf-8') as f: + with open(fp, 'r', encoding='utf-8', errors='replace') as f: file_content = f.read() # 定义注释的正则表达式 comment_pattern = r'%.*' diff --git a/crazy_functions/代码重写为全英文_多线程.py b/crazy_functions/代码重写为全英文_多线程.py index eef95179..f74704ae 100644 --- a/crazy_functions/代码重写为全英文_多线程.py +++ b/crazy_functions/代码重写为全英文_多线程.py @@ -49,7 +49,7 @@ def 全项目切换英文(txt, llm_kwargs, plugin_kwargs, chatbot, history, sys_ # 第4步:随便显示点什么防止卡顿的感觉 for index, fp in enumerate(file_manifest): # if 'test_project' in fp: continue - with open(fp, 'r', encoding='utf-8') as f: + with open(fp, 'r', encoding='utf-8', errors='replace') as f: file_content = f.read() i_say_show_user =f'[{index}/{len(file_manifest)}] 接下来请将以下代码中包含的所有中文转化为英文,只输出转化后的英文代码,请用代码块输出代码: {os.path.abspath(fp)}' i_say_show_user_buffer.append(i_say_show_user) @@ -72,7 +72,7 @@ def 全项目切换英文(txt, llm_kwargs, plugin_kwargs, chatbot, history, sys_ if index > 10: time.sleep(60) print('Openai 限制免费用户每分钟20次请求,降低请求频率中。') - with open(fp, 'r', encoding='utf-8') as f: + with open(fp, 'r', encoding='utf-8', errors='replace') as f: file_content = f.read() i_say_template = lambda fp, file_content: f'接下来请将以下代码中包含的所有中文转化为英文,只输出代码,文件名是{fp},文件代码是 ```{file_content}```' try: diff --git a/crazy_functions/批量总结PDF文档pdfminer.py b/crazy_functions/批量总结PDF文档pdfminer.py index 3868885d..ffbb0559 100644 --- a/crazy_functions/批量总结PDF文档pdfminer.py +++ b/crazy_functions/批量总结PDF文档pdfminer.py @@ -68,7 +68,7 @@ def 解析Paper(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbo print('begin analysis on:', file_manifest) for index, fp in enumerate(file_manifest): if ".tex" in fp: - with open(fp, 'r', encoding='utf-8') as f: + with open(fp, 'r', encoding='utf-8', errors='replace') as f: file_content = f.read() if ".pdf" in fp.lower(): file_content = readPdf(fp) diff --git a/crazy_functions/生成函数注释.py b/crazy_functions/生成函数注释.py index a5568b98..a564f21d 100644 --- a/crazy_functions/生成函数注释.py +++ b/crazy_functions/生成函数注释.py @@ -7,7 +7,7 @@ def 生成函数注释(file_manifest, project_folder, llm_kwargs, plugin_kwargs, import time, os print('begin analysis on:', file_manifest) for index, fp in enumerate(file_manifest): - with open(fp, 'r', encoding='utf-8') as f: + with open(fp, 'r', encoding='utf-8', errors='replace') as f: file_content = f.read() i_say = f'请对下面的程序文件做一个概述,并对文件中的所有函数生成注释,使用markdown表格输出结果,文件名是{os.path.relpath(fp, project_folder)},文件内容是 ```{file_content}```' diff --git a/crazy_functions/解析项目源代码.py b/crazy_functions/解析项目源代码.py index a35cd8c9..0edcbf18 100644 --- a/crazy_functions/解析项目源代码.py +++ b/crazy_functions/解析项目源代码.py @@ -14,7 +14,7 @@ def 解析源代码新(file_manifest, project_folder, llm_kwargs, plugin_kwargs, ############################## <第一步,逐个文件分析,多线程> ################################## for index, fp in enumerate(file_manifest): - with open(fp, 'r', encoding='utf-8') as f: + with open(fp, 'r', encoding='utf-8', errors='replace') as f: file_content = f.read() prefix = "接下来请你逐文件分析下面的工程" if index==0 else "" i_say = prefix + f'请对下面的程序文件做一个概述文件名是{os.path.relpath(fp, project_folder)},文件代码是 ```{file_content}```' diff --git a/crazy_functions/读文章写摘要.py b/crazy_functions/读文章写摘要.py index b910f5dc..72ffe6b1 100644 --- a/crazy_functions/读文章写摘要.py +++ b/crazy_functions/读文章写摘要.py @@ -8,7 +8,7 @@ def 解析Paper(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbo import time, glob, os print('begin analysis on:', file_manifest) for index, fp in enumerate(file_manifest): - with open(fp, 'r', encoding='utf-8') as f: + with open(fp, 'r', encoding='utf-8', errors='replace') as f: file_content = f.read() prefix = "接下来请你逐文件分析下面的论文文件,概括其内容" if index==0 else ""