镜像自地址
https://github.com/binary-husky/gpt_academic.git
已同步 2025-12-06 14:36:48 +00:00
new
这个提交包含在:
@@ -26,8 +26,8 @@ class PaperFileGroup():
|
||||
self.sp_file_index.append(index)
|
||||
self.sp_file_tag.append(self.file_paths[index])
|
||||
else:
|
||||
from .crazy_utils import breakdown_txt_to_satisfy_token_limit_for_pdf
|
||||
segments = breakdown_txt_to_satisfy_token_limit_for_pdf(file_content, self.get_token_num, max_token_limit)
|
||||
from crazy_functions.pdf_fns.breakdown_txt import breakdown_text_to_satisfy_token_limit
|
||||
segments = breakdown_text_to_satisfy_token_limit(file_content, max_token_limit)
|
||||
for j, segment in enumerate(segments):
|
||||
self.sp_file_contents.append(segment)
|
||||
self.sp_file_index.append(index)
|
||||
|
||||
@@ -26,8 +26,8 @@ class PaperFileGroup():
|
||||
self.sp_file_index.append(index)
|
||||
self.sp_file_tag.append(self.file_paths[index])
|
||||
else:
|
||||
from .crazy_utils import breakdown_txt_to_satisfy_token_limit_for_pdf
|
||||
segments = breakdown_txt_to_satisfy_token_limit_for_pdf(file_content, self.get_token_num, max_token_limit)
|
||||
from crazy_functions.pdf_fns.breakdown_txt import breakdown_text_to_satisfy_token_limit
|
||||
segments = breakdown_text_to_satisfy_token_limit(file_content, max_token_limit)
|
||||
for j, segment in enumerate(segments):
|
||||
self.sp_file_contents.append(segment)
|
||||
self.sp_file_index.append(index)
|
||||
|
||||
@@ -88,6 +88,9 @@ def arxiv_download(chatbot, history, txt, allow_cache=True):
|
||||
target_file = pj(translation_dir, 'translate_zh.pdf')
|
||||
if os.path.exists(target_file):
|
||||
promote_file_to_downloadzone(target_file, rename_file=None, chatbot=chatbot)
|
||||
target_file_compare = pj(translation_dir, 'comparison.pdf')
|
||||
if os.path.exists(target_file_compare):
|
||||
promote_file_to_downloadzone(target_file_compare, rename_file=None, chatbot=chatbot)
|
||||
return target_file
|
||||
return False
|
||||
def is_float(s):
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
from toolbox import update_ui, get_conf, trimmed_format_exc, get_max_token
|
||||
from toolbox import update_ui, get_conf, trimmed_format_exc, get_max_token, Singleton
|
||||
import threading
|
||||
import os
|
||||
import logging
|
||||
@@ -139,6 +139,8 @@ def can_multi_process(llm):
|
||||
if llm.startswith('gpt-'): return True
|
||||
if llm.startswith('api2d-'): return True
|
||||
if llm.startswith('azure-'): return True
|
||||
if llm.startswith('spark'): return True
|
||||
if llm.startswith('zhipuai'): return True
|
||||
return False
|
||||
|
||||
def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
|
||||
@@ -312,95 +314,6 @@ def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
|
||||
return gpt_response_collection
|
||||
|
||||
|
||||
def breakdown_txt_to_satisfy_token_limit(txt, get_token_fn, limit):
|
||||
def cut(txt_tocut, must_break_at_empty_line): # 递归
|
||||
if get_token_fn(txt_tocut) <= limit:
|
||||
return [txt_tocut]
|
||||
else:
|
||||
lines = txt_tocut.split('\n')
|
||||
estimated_line_cut = limit / get_token_fn(txt_tocut) * len(lines)
|
||||
estimated_line_cut = int(estimated_line_cut)
|
||||
for cnt in reversed(range(estimated_line_cut)):
|
||||
if must_break_at_empty_line:
|
||||
if lines[cnt] != "":
|
||||
continue
|
||||
print(cnt)
|
||||
prev = "\n".join(lines[:cnt])
|
||||
post = "\n".join(lines[cnt:])
|
||||
if get_token_fn(prev) < limit:
|
||||
break
|
||||
if cnt == 0:
|
||||
raise RuntimeError("存在一行极长的文本!")
|
||||
# print(len(post))
|
||||
# 列表递归接龙
|
||||
result = [prev]
|
||||
result.extend(cut(post, must_break_at_empty_line))
|
||||
return result
|
||||
try:
|
||||
return cut(txt, must_break_at_empty_line=True)
|
||||
except RuntimeError:
|
||||
return cut(txt, must_break_at_empty_line=False)
|
||||
|
||||
|
||||
def force_breakdown(txt, limit, get_token_fn):
|
||||
"""
|
||||
当无法用标点、空行分割时,我们用最暴力的方法切割
|
||||
"""
|
||||
for i in reversed(range(len(txt))):
|
||||
if get_token_fn(txt[:i]) < limit:
|
||||
return txt[:i], txt[i:]
|
||||
return "Tiktoken未知错误", "Tiktoken未知错误"
|
||||
|
||||
def breakdown_txt_to_satisfy_token_limit_for_pdf(txt, get_token_fn, limit):
|
||||
# 递归
|
||||
def cut(txt_tocut, must_break_at_empty_line, break_anyway=False):
|
||||
if get_token_fn(txt_tocut) <= limit:
|
||||
return [txt_tocut]
|
||||
else:
|
||||
lines = txt_tocut.split('\n')
|
||||
estimated_line_cut = limit / get_token_fn(txt_tocut) * len(lines)
|
||||
estimated_line_cut = int(estimated_line_cut)
|
||||
cnt = 0
|
||||
for cnt in reversed(range(estimated_line_cut)):
|
||||
if must_break_at_empty_line:
|
||||
if lines[cnt] != "":
|
||||
continue
|
||||
prev = "\n".join(lines[:cnt])
|
||||
post = "\n".join(lines[cnt:])
|
||||
if get_token_fn(prev) < limit:
|
||||
break
|
||||
if cnt == 0:
|
||||
if break_anyway:
|
||||
prev, post = force_breakdown(txt_tocut, limit, get_token_fn)
|
||||
else:
|
||||
raise RuntimeError(f"存在一行极长的文本!{txt_tocut}")
|
||||
# print(len(post))
|
||||
# 列表递归接龙
|
||||
result = [prev]
|
||||
result.extend(cut(post, must_break_at_empty_line, break_anyway=break_anyway))
|
||||
return result
|
||||
try:
|
||||
# 第1次尝试,将双空行(\n\n)作为切分点
|
||||
return cut(txt, must_break_at_empty_line=True)
|
||||
except RuntimeError:
|
||||
try:
|
||||
# 第2次尝试,将单空行(\n)作为切分点
|
||||
return cut(txt, must_break_at_empty_line=False)
|
||||
except RuntimeError:
|
||||
try:
|
||||
# 第3次尝试,将英文句号(.)作为切分点
|
||||
res = cut(txt.replace('.', '。\n'), must_break_at_empty_line=False) # 这个中文的句号是故意的,作为一个标识而存在
|
||||
return [r.replace('。\n', '.') for r in res]
|
||||
except RuntimeError as e:
|
||||
try:
|
||||
# 第4次尝试,将中文句号(。)作为切分点
|
||||
res = cut(txt.replace('。', '。。\n'), must_break_at_empty_line=False)
|
||||
return [r.replace('。。\n', '。') for r in res]
|
||||
except RuntimeError as e:
|
||||
# 第5次尝试,没办法了,随便切一下敷衍吧
|
||||
return cut(txt, must_break_at_empty_line=False, break_anyway=True)
|
||||
|
||||
|
||||
|
||||
def read_and_clean_pdf_text(fp):
|
||||
"""
|
||||
@@ -631,90 +544,6 @@ def get_files_from_everything(txt, type): # type='.md'
|
||||
|
||||
|
||||
|
||||
|
||||
def Singleton(cls):
|
||||
_instance = {}
|
||||
|
||||
def _singleton(*args, **kargs):
|
||||
if cls not in _instance:
|
||||
_instance[cls] = cls(*args, **kargs)
|
||||
return _instance[cls]
|
||||
|
||||
return _singleton
|
||||
|
||||
|
||||
@Singleton
|
||||
class knowledge_archive_interface():
|
||||
def __init__(self) -> None:
|
||||
self.threadLock = threading.Lock()
|
||||
self.current_id = ""
|
||||
self.kai_path = None
|
||||
self.qa_handle = None
|
||||
self.text2vec_large_chinese = None
|
||||
|
||||
def get_chinese_text2vec(self):
|
||||
if self.text2vec_large_chinese is None:
|
||||
# < -------------------预热文本向量化模组--------------- >
|
||||
from toolbox import ProxyNetworkActivate
|
||||
print('Checking Text2vec ...')
|
||||
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
|
||||
with ProxyNetworkActivate('Download_LLM'): # 临时地激活代理网络
|
||||
self.text2vec_large_chinese = HuggingFaceEmbeddings(model_name="GanymedeNil/text2vec-large-chinese")
|
||||
|
||||
return self.text2vec_large_chinese
|
||||
|
||||
|
||||
def feed_archive(self, file_manifest, id="default"):
|
||||
self.threadLock.acquire()
|
||||
# import uuid
|
||||
self.current_id = id
|
||||
from zh_langchain import construct_vector_store
|
||||
self.qa_handle, self.kai_path = construct_vector_store(
|
||||
vs_id=self.current_id,
|
||||
files=file_manifest,
|
||||
sentence_size=100,
|
||||
history=[],
|
||||
one_conent="",
|
||||
one_content_segmentation="",
|
||||
text2vec = self.get_chinese_text2vec(),
|
||||
)
|
||||
self.threadLock.release()
|
||||
|
||||
def get_current_archive_id(self):
|
||||
return self.current_id
|
||||
|
||||
def get_loaded_file(self):
|
||||
return self.qa_handle.get_loaded_file()
|
||||
|
||||
def answer_with_archive_by_id(self, txt, id):
|
||||
self.threadLock.acquire()
|
||||
if not self.current_id == id:
|
||||
self.current_id = id
|
||||
from zh_langchain import construct_vector_store
|
||||
self.qa_handle, self.kai_path = construct_vector_store(
|
||||
vs_id=self.current_id,
|
||||
files=[],
|
||||
sentence_size=100,
|
||||
history=[],
|
||||
one_conent="",
|
||||
one_content_segmentation="",
|
||||
text2vec = self.get_chinese_text2vec(),
|
||||
)
|
||||
VECTOR_SEARCH_SCORE_THRESHOLD = 0
|
||||
VECTOR_SEARCH_TOP_K = 4
|
||||
CHUNK_SIZE = 512
|
||||
resp, prompt = self.qa_handle.get_knowledge_based_conent_test(
|
||||
query = txt,
|
||||
vs_path = self.kai_path,
|
||||
score_threshold=VECTOR_SEARCH_SCORE_THRESHOLD,
|
||||
vector_search_top_k=VECTOR_SEARCH_TOP_K,
|
||||
chunk_conent=True,
|
||||
chunk_size=CHUNK_SIZE,
|
||||
text2vec = self.get_chinese_text2vec(),
|
||||
)
|
||||
self.threadLock.release()
|
||||
return resp, prompt
|
||||
|
||||
@Singleton
|
||||
class nougat_interface():
|
||||
def __init__(self):
|
||||
|
||||
@@ -175,7 +175,6 @@ class LatexPaperFileGroup():
|
||||
self.sp_file_contents = []
|
||||
self.sp_file_index = []
|
||||
self.sp_file_tag = []
|
||||
|
||||
# count_token
|
||||
from request_llms.bridge_all import model_info
|
||||
enc = model_info["gpt-3.5-turbo"]['tokenizer']
|
||||
@@ -192,13 +191,12 @@ class LatexPaperFileGroup():
|
||||
self.sp_file_index.append(index)
|
||||
self.sp_file_tag.append(self.file_paths[index])
|
||||
else:
|
||||
from ..crazy_utils import breakdown_txt_to_satisfy_token_limit_for_pdf
|
||||
segments = breakdown_txt_to_satisfy_token_limit_for_pdf(file_content, self.get_token_num, max_token_limit)
|
||||
from crazy_functions.pdf_fns.breakdown_txt import breakdown_text_to_satisfy_token_limit
|
||||
segments = breakdown_text_to_satisfy_token_limit(file_content, max_token_limit)
|
||||
for j, segment in enumerate(segments):
|
||||
self.sp_file_contents.append(segment)
|
||||
self.sp_file_index.append(index)
|
||||
self.sp_file_tag.append(self.file_paths[index] + f".part-{j}.tex")
|
||||
print('Segmentation: done')
|
||||
|
||||
def merge_result(self):
|
||||
self.file_result = ["" for _ in range(len(self.file_paths))]
|
||||
@@ -404,7 +402,7 @@ def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_f
|
||||
result_pdf = pj(work_folder_modified, f'merge_diff.pdf') # get pdf path
|
||||
promote_file_to_downloadzone(result_pdf, rename_file=None, chatbot=chatbot) # promote file to web UI
|
||||
if modified_pdf_success:
|
||||
yield from update_ui_lastest_msg(f'转化PDF编译已经成功, 即将退出 ...', chatbot, history) # 刷新Gradio前端界面
|
||||
yield from update_ui_lastest_msg(f'转化PDF编译已经成功, 正在尝试生成对比PDF, 请稍候 ...', chatbot, history) # 刷新Gradio前端界面
|
||||
result_pdf = pj(work_folder_modified, f'{main_file_modified}.pdf') # get pdf path
|
||||
origin_pdf = pj(work_folder_original, f'{main_file_original}.pdf') # get pdf path
|
||||
if os.path.exists(pj(work_folder, '..', 'translation')):
|
||||
@@ -416,8 +414,11 @@ def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_f
|
||||
from .latex_toolbox import merge_pdfs
|
||||
concat_pdf = pj(work_folder_modified, f'comparison.pdf')
|
||||
merge_pdfs(origin_pdf, result_pdf, concat_pdf)
|
||||
if os.path.exists(pj(work_folder, '..', 'translation')):
|
||||
shutil.copyfile(concat_pdf, pj(work_folder, '..', 'translation', 'comparison.pdf'))
|
||||
promote_file_to_downloadzone(concat_pdf, rename_file=None, chatbot=chatbot) # promote file to web UI
|
||||
except Exception as e:
|
||||
print(e)
|
||||
pass
|
||||
return True # 成功啦
|
||||
else:
|
||||
|
||||
@@ -493,11 +493,38 @@ def compile_latex_with_timeout(command, cwd, timeout=60):
|
||||
return False
|
||||
return True
|
||||
|
||||
def run_in_subprocess_wrapper_func(func, args, kwargs, return_dict, exception_dict):
|
||||
import sys
|
||||
try:
|
||||
result = func(*args, **kwargs)
|
||||
return_dict['result'] = result
|
||||
except Exception as e:
|
||||
exc_info = sys.exc_info()
|
||||
exception_dict['exception'] = exc_info
|
||||
|
||||
def run_in_subprocess(func):
|
||||
import multiprocessing
|
||||
def wrapper(*args, **kwargs):
|
||||
return_dict = multiprocessing.Manager().dict()
|
||||
exception_dict = multiprocessing.Manager().dict()
|
||||
process = multiprocessing.Process(target=run_in_subprocess_wrapper_func,
|
||||
args=(func, args, kwargs, return_dict, exception_dict))
|
||||
process.start()
|
||||
process.join()
|
||||
process.close()
|
||||
if 'exception' in exception_dict:
|
||||
# ooops, the subprocess ran into an exception
|
||||
exc_info = exception_dict['exception']
|
||||
raise exc_info[1].with_traceback(exc_info[2])
|
||||
if 'result' in return_dict.keys():
|
||||
# If the subprocess ran successfully, return the result
|
||||
return return_dict['result']
|
||||
return wrapper
|
||||
|
||||
def merge_pdfs(pdf1_path, pdf2_path, output_path):
|
||||
import PyPDF2
|
||||
def _merge_pdfs(pdf1_path, pdf2_path, output_path):
|
||||
import PyPDF2 # PyPDF2这个库有严重的内存泄露问题,把它放到子进程中运行,从而方便内存的释放
|
||||
Percent = 0.95
|
||||
# raise RuntimeError('PyPDF2 has a serious memory leak problem, please use other tools to merge PDF files.')
|
||||
# Open the first PDF file
|
||||
with open(pdf1_path, 'rb') as pdf1_file:
|
||||
pdf1_reader = PyPDF2.PdfFileReader(pdf1_file)
|
||||
@@ -531,3 +558,5 @@ def merge_pdfs(pdf1_path, pdf2_path, output_path):
|
||||
# Save the merged PDF file
|
||||
with open(output_path, 'wb') as output_file:
|
||||
output_writer.write(output_file)
|
||||
|
||||
merge_pdfs = run_in_subprocess(_merge_pdfs) # PyPDF2这个库有严重的内存泄露问题,把它放到子进程中运行,从而方便内存的释放
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
from pydantic import BaseModel, Field
|
||||
from typing import List
|
||||
from toolbox import update_ui_lastest_msg, disable_auto_promotion
|
||||
from toolbox import CatchException, update_ui, get_conf, select_api_key, get_log_folder
|
||||
from request_llms.bridge_all import predict_no_ui_long_connection
|
||||
from crazy_functions.json_fns.pydantic_io import GptJsonIO, JsonStringError
|
||||
import time
|
||||
@@ -21,11 +22,7 @@ class GptAcademicState():
|
||||
def reset(self):
|
||||
pass
|
||||
|
||||
def lock_plugin(self, chatbot):
|
||||
chatbot._cookies['plugin_state'] = pickle.dumps(self)
|
||||
|
||||
def unlock_plugin(self, chatbot):
|
||||
self.reset()
|
||||
def dump_state(self, chatbot):
|
||||
chatbot._cookies['plugin_state'] = pickle.dumps(self)
|
||||
|
||||
def set_state(self, chatbot, key, value):
|
||||
@@ -40,6 +37,57 @@ class GptAcademicState():
|
||||
state.chatbot = chatbot
|
||||
return state
|
||||
|
||||
class GatherMaterials():
|
||||
def __init__(self, materials) -> None:
|
||||
materials = ['image', 'prompt']
|
||||
|
||||
class GptAcademicGameBaseState():
|
||||
"""
|
||||
1. first init: __init__ ->
|
||||
"""
|
||||
def init_game(self, chatbot, lock_plugin):
|
||||
self.plugin_name = None
|
||||
self.callback_fn = None
|
||||
self.delete_game = False
|
||||
self.step_cnt = 0
|
||||
|
||||
def lock_plugin(self, chatbot):
|
||||
if self.callback_fn is None:
|
||||
raise ValueError("callback_fn is None")
|
||||
chatbot._cookies['lock_plugin'] = self.callback_fn
|
||||
self.dump_state(chatbot)
|
||||
|
||||
def get_plugin_name(self):
|
||||
if self.plugin_name is None:
|
||||
raise ValueError("plugin_name is None")
|
||||
return self.plugin_name
|
||||
|
||||
def dump_state(self, chatbot):
|
||||
chatbot._cookies[f'plugin_state/{self.get_plugin_name()}'] = pickle.dumps(self)
|
||||
|
||||
def set_state(self, chatbot, key, value):
|
||||
setattr(self, key, value)
|
||||
chatbot._cookies[f'plugin_state/{self.get_plugin_name()}'] = pickle.dumps(self)
|
||||
|
||||
@staticmethod
|
||||
def sync_state(chatbot, llm_kwargs, cls, plugin_name, callback_fn, lock_plugin=True):
|
||||
state = chatbot._cookies.get(f'plugin_state/{plugin_name}', None)
|
||||
if state is not None:
|
||||
state = pickle.loads(state)
|
||||
else:
|
||||
state = cls()
|
||||
state.init_game(chatbot, lock_plugin)
|
||||
state.plugin_name = plugin_name
|
||||
state.llm_kwargs = llm_kwargs
|
||||
state.chatbot = chatbot
|
||||
state.callback_fn = callback_fn
|
||||
return state
|
||||
|
||||
def continue_game(self, prompt, chatbot, history):
|
||||
# 游戏主体
|
||||
yield from self.step(prompt, chatbot, history)
|
||||
self.step_cnt += 1
|
||||
# 保存状态,收尾
|
||||
self.dump_state(chatbot)
|
||||
# 如果游戏结束,清理
|
||||
if self.delete_game:
|
||||
chatbot._cookies['lock_plugin'] = None
|
||||
chatbot._cookies[f'plugin_state/{self.get_plugin_name()}'] = None
|
||||
yield from update_ui(chatbot=chatbot, history=history)
|
||||
|
||||
@@ -74,7 +74,7 @@ def produce_report_markdown(gpt_response_collection, meta, paper_meta_info, chat
|
||||
|
||||
def translate_pdf(article_dict, llm_kwargs, chatbot, fp, generated_conclusion_files, TOKEN_LIMIT_PER_FRAGMENT, DST_LANG):
|
||||
from crazy_functions.pdf_fns.report_gen_html import construct_html
|
||||
from crazy_functions.crazy_utils import breakdown_txt_to_satisfy_token_limit_for_pdf
|
||||
from crazy_functions.pdf_fns.breakdown_txt import breakdown_text_to_satisfy_token_limit
|
||||
from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
|
||||
from crazy_functions.crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency
|
||||
|
||||
@@ -116,7 +116,7 @@ def translate_pdf(article_dict, llm_kwargs, chatbot, fp, generated_conclusion_fi
|
||||
# find a smooth token limit to achieve even seperation
|
||||
count = int(math.ceil(raw_token_num / TOKEN_LIMIT_PER_FRAGMENT))
|
||||
token_limit_smooth = raw_token_num // count + count
|
||||
return breakdown_txt_to_satisfy_token_limit_for_pdf(txt, get_token_fn=get_token_num, limit=token_limit_smooth)
|
||||
return breakdown_text_to_satisfy_token_limit(txt, limit=token_limit_smooth, llm_model=llm_kwargs['llm_model'])
|
||||
|
||||
for section in article_dict.get('sections'):
|
||||
if len(section['text']) == 0: continue
|
||||
|
||||
@@ -2,7 +2,7 @@ from toolbox import CatchException, update_ui, get_conf, select_api_key, get_log
|
||||
from crazy_functions.multi_stage.multi_stage_utils import GptAcademicState
|
||||
|
||||
|
||||
def gen_image(llm_kwargs, prompt, resolution="1024x1024", model="dall-e-2", quality=None):
|
||||
def gen_image(llm_kwargs, prompt, resolution="1024x1024", model="dall-e-2", quality=None, style=None):
|
||||
import requests, json, time, os
|
||||
from request_llms.bridge_all import model_info
|
||||
|
||||
@@ -25,7 +25,10 @@ def gen_image(llm_kwargs, prompt, resolution="1024x1024", model="dall-e-2", qual
|
||||
'model': model,
|
||||
'response_format': 'url'
|
||||
}
|
||||
if quality is not None: data.update({'quality': quality})
|
||||
if quality is not None:
|
||||
data['quality'] = quality
|
||||
if style is not None:
|
||||
data['style'] = style
|
||||
response = requests.post(url, headers=headers, json=data, proxies=proxies)
|
||||
print(response.content)
|
||||
try:
|
||||
@@ -54,19 +57,25 @@ def edit_image(llm_kwargs, prompt, image_path, resolution="1024x1024", model="da
|
||||
img_endpoint = chat_endpoint.replace('chat/completions','images/edits')
|
||||
# # Generate the image
|
||||
url = img_endpoint
|
||||
n = 1
|
||||
headers = {
|
||||
'Authorization': f"Bearer {api_key}",
|
||||
'Content-Type': 'application/json'
|
||||
}
|
||||
data = {
|
||||
'image': open(image_path, 'rb'),
|
||||
'prompt': prompt,
|
||||
'n': 1,
|
||||
'size': resolution,
|
||||
'model': model,
|
||||
'response_format': 'url'
|
||||
}
|
||||
response = requests.post(url, headers=headers, json=data, proxies=proxies)
|
||||
make_transparent(image_path, image_path+'.tsp.png')
|
||||
make_square_image(image_path+'.tsp.png', image_path+'.tspsq.png')
|
||||
resize_image(image_path+'.tspsq.png', image_path+'.ready.png', max_size=1024)
|
||||
image_path = image_path+'.ready.png'
|
||||
with open(image_path, 'rb') as f:
|
||||
file_content = f.read()
|
||||
files = {
|
||||
'image': (os.path.basename(image_path), file_content),
|
||||
# 'mask': ('mask.png', open('mask.png', 'rb'))
|
||||
'prompt': (None, prompt),
|
||||
"n": (None, str(n)),
|
||||
'size': (None, resolution),
|
||||
}
|
||||
|
||||
response = requests.post(url, headers=headers, files=files, proxies=proxies)
|
||||
print(response.content)
|
||||
try:
|
||||
image_url = json.loads(response.content.decode('utf8'))['data'][0]['url']
|
||||
@@ -95,7 +104,11 @@ def 图片生成_DALLE2(prompt, llm_kwargs, plugin_kwargs, chatbot, history, sys
|
||||
web_port 当前软件运行的端口号
|
||||
"""
|
||||
history = [] # 清空历史,以免输入溢出
|
||||
chatbot.append(("您正在调用“图像生成”插件。", "[Local Message] 生成图像, 请先把模型切换至gpt-*或者api2d-*。如果中文Prompt效果不理想, 请尝试英文Prompt。正在处理中 ....."))
|
||||
if prompt.strip() == "":
|
||||
chatbot.append((prompt, "[Local Message] 图像生成提示为空白,请在“输入区”输入图像生成提示。"))
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 界面更新
|
||||
return
|
||||
chatbot.append(("您正在调用“图像生成”插件。", "[Local Message] 生成图像, 请先把模型切换至gpt-*。如果中文Prompt效果不理想, 请尝试英文Prompt。正在处理中 ....."))
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 由于请求gpt需要一段时间,我们先及时地做一次界面更新
|
||||
if ("advanced_arg" in plugin_kwargs) and (plugin_kwargs["advanced_arg"] == ""): plugin_kwargs.pop("advanced_arg")
|
||||
resolution = plugin_kwargs.get("advanced_arg", '1024x1024')
|
||||
@@ -112,16 +125,25 @@ def 图片生成_DALLE2(prompt, llm_kwargs, plugin_kwargs, chatbot, history, sys
|
||||
@CatchException
|
||||
def 图片生成_DALLE3(prompt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
|
||||
history = [] # 清空历史,以免输入溢出
|
||||
chatbot.append(("您正在调用“图像生成”插件。", "[Local Message] 生成图像, 请先把模型切换至gpt-*或者api2d-*。如果中文Prompt效果不理想, 请尝试英文Prompt。正在处理中 ....."))
|
||||
if prompt.strip() == "":
|
||||
chatbot.append((prompt, "[Local Message] 图像生成提示为空白,请在“输入区”输入图像生成提示。"))
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 界面更新
|
||||
return
|
||||
chatbot.append(("您正在调用“图像生成”插件。", "[Local Message] 生成图像, 请先把模型切换至gpt-*。如果中文Prompt效果不理想, 请尝试英文Prompt。正在处理中 ....."))
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 由于请求gpt需要一段时间,我们先及时地做一次界面更新
|
||||
if ("advanced_arg" in plugin_kwargs) and (plugin_kwargs["advanced_arg"] == ""): plugin_kwargs.pop("advanced_arg")
|
||||
resolution = plugin_kwargs.get("advanced_arg", '1024x1024').lower()
|
||||
if resolution.endswith('-hd'):
|
||||
resolution = resolution.replace('-hd', '')
|
||||
quality = 'hd'
|
||||
else:
|
||||
quality = 'standard'
|
||||
image_url, image_path = gen_image(llm_kwargs, prompt, resolution, model="dall-e-3", quality=quality)
|
||||
resolution_arg = plugin_kwargs.get("advanced_arg", '1024x1024-standard-vivid').lower()
|
||||
parts = resolution_arg.split('-')
|
||||
resolution = parts[0] # 解析分辨率
|
||||
quality = 'standard' # 质量与风格默认值
|
||||
style = 'vivid'
|
||||
# 遍历检查是否有额外参数
|
||||
for part in parts[1:]:
|
||||
if part in ['hd', 'standard']:
|
||||
quality = part
|
||||
elif part in ['vivid', 'natural']:
|
||||
style = part
|
||||
image_url, image_path = gen_image(llm_kwargs, prompt, resolution, model="dall-e-3", quality=quality, style=style)
|
||||
chatbot.append([prompt,
|
||||
f'图像中转网址: <br/>`{image_url}`<br/>'+
|
||||
f'中转网址预览: <br/><div align="center"><img src="{image_url}"></div>'
|
||||
@@ -130,6 +152,7 @@ def 图片生成_DALLE3(prompt, llm_kwargs, plugin_kwargs, chatbot, history, sys
|
||||
])
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 界面更新
|
||||
|
||||
|
||||
class ImageEditState(GptAcademicState):
|
||||
# 尚未完成
|
||||
def get_image_file(self, x):
|
||||
@@ -142,18 +165,27 @@ class ImageEditState(GptAcademicState):
|
||||
file = None if not confirm else file_manifest[0]
|
||||
return confirm, file
|
||||
|
||||
def lock_plugin(self, chatbot):
|
||||
chatbot._cookies['lock_plugin'] = 'crazy_functions.图片生成->图片修改_DALLE2'
|
||||
self.dump_state(chatbot)
|
||||
|
||||
def unlock_plugin(self, chatbot):
|
||||
self.reset()
|
||||
chatbot._cookies['lock_plugin'] = None
|
||||
self.dump_state(chatbot)
|
||||
|
||||
def get_resolution(self, x):
|
||||
return (x in ['256x256', '512x512', '1024x1024']), x
|
||||
|
||||
|
||||
def get_prompt(self, x):
|
||||
confirm = (len(x)>=5) and (not self.get_resolution(x)[0]) and (not self.get_image_file(x)[0])
|
||||
return confirm, x
|
||||
|
||||
|
||||
def reset(self):
|
||||
self.req = [
|
||||
{'value':None, 'description': '请先上传图像(必须是.png格式), 然后再次点击本插件', 'verify_fn': self.get_image_file},
|
||||
{'value':None, 'description': '请输入分辨率,可选:256x256, 512x512 或 1024x1024', 'verify_fn': self.get_resolution},
|
||||
{'value':None, 'description': '请输入修改需求,建议您使用英文提示词', 'verify_fn': self.get_prompt},
|
||||
{'value':None, 'description': '请先上传图像(必须是.png格式), 然后再次点击本插件', 'verify_fn': self.get_image_file},
|
||||
{'value':None, 'description': '请输入分辨率,可选:256x256, 512x512 或 1024x1024, 然后再次点击本插件', 'verify_fn': self.get_resolution},
|
||||
{'value':None, 'description': '请输入修改需求,建议您使用英文提示词, 然后再次点击本插件', 'verify_fn': self.get_prompt},
|
||||
]
|
||||
self.info = ""
|
||||
|
||||
@@ -163,7 +195,7 @@ class ImageEditState(GptAcademicState):
|
||||
confirm, res = r['verify_fn'](prompt)
|
||||
if confirm:
|
||||
r['value'] = res
|
||||
self.set_state(chatbot, 'dummy_key', 'dummy_value')
|
||||
self.dump_state(chatbot)
|
||||
break
|
||||
return self
|
||||
|
||||
@@ -182,23 +214,63 @@ def 图片修改_DALLE2(prompt, llm_kwargs, plugin_kwargs, chatbot, history, sys
|
||||
history = [] # 清空历史
|
||||
state = ImageEditState.get_state(chatbot, ImageEditState)
|
||||
state = state.feed(prompt, chatbot)
|
||||
state.lock_plugin(chatbot)
|
||||
if not state.already_obtained_all_materials():
|
||||
chatbot.append(["图片修改(先上传图片,再输入修改需求,最后输入分辨率)", state.next_req()])
|
||||
chatbot.append(["图片修改\n\n1. 上传图片(图片中需要修改的位置用橡皮擦擦除为纯白色,即RGB=255,255,255)\n2. 输入分辨率 \n3. 输入修改需求", state.next_req()])
|
||||
yield from update_ui(chatbot=chatbot, history=history)
|
||||
return
|
||||
|
||||
image_path = state.req[0]
|
||||
resolution = state.req[1]
|
||||
prompt = state.req[2]
|
||||
image_path = state.req[0]['value']
|
||||
resolution = state.req[1]['value']
|
||||
prompt = state.req[2]['value']
|
||||
chatbot.append(["图片修改, 执行中", f"图片:`{image_path}`<br/>分辨率:`{resolution}`<br/>修改需求:`{prompt}`"])
|
||||
yield from update_ui(chatbot=chatbot, history=history)
|
||||
|
||||
image_url, image_path = edit_image(llm_kwargs, prompt, image_path, resolution)
|
||||
chatbot.append([state.prompt,
|
||||
chatbot.append([prompt,
|
||||
f'图像中转网址: <br/>`{image_url}`<br/>'+
|
||||
f'中转网址预览: <br/><div align="center"><img src="{image_url}"></div>'
|
||||
f'本地文件地址: <br/>`{image_path}`<br/>'+
|
||||
f'本地文件预览: <br/><div align="center"><img src="file={image_path}"></div>'
|
||||
])
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 界面更新
|
||||
state.unlock_plugin(chatbot)
|
||||
|
||||
def make_transparent(input_image_path, output_image_path):
|
||||
from PIL import Image
|
||||
image = Image.open(input_image_path)
|
||||
image = image.convert("RGBA")
|
||||
data = image.getdata()
|
||||
new_data = []
|
||||
for item in data:
|
||||
if item[0] == 255 and item[1] == 255 and item[2] == 255:
|
||||
new_data.append((255, 255, 255, 0))
|
||||
else:
|
||||
new_data.append(item)
|
||||
image.putdata(new_data)
|
||||
image.save(output_image_path, "PNG")
|
||||
|
||||
def resize_image(input_path, output_path, max_size=1024):
|
||||
from PIL import Image
|
||||
with Image.open(input_path) as img:
|
||||
width, height = img.size
|
||||
if width > max_size or height > max_size:
|
||||
if width >= height:
|
||||
new_width = max_size
|
||||
new_height = int((max_size / width) * height)
|
||||
else:
|
||||
new_height = max_size
|
||||
new_width = int((max_size / height) * width)
|
||||
|
||||
resized_img = img.resize(size=(new_width, new_height))
|
||||
resized_img.save(output_path)
|
||||
else:
|
||||
img.save(output_path)
|
||||
|
||||
def make_square_image(input_path, output_path):
|
||||
from PIL import Image
|
||||
with Image.open(input_path) as img:
|
||||
width, height = img.size
|
||||
size = max(width, height)
|
||||
new_img = Image.new("RGBA", (size, size), color="black")
|
||||
new_img.paste(img, ((size - width) // 2, (size - height) // 2))
|
||||
new_img.save(output_path)
|
||||
|
||||
@@ -29,17 +29,12 @@ def 解析docx(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot
|
||||
except:
|
||||
raise RuntimeError('请先将.doc文档转换为.docx文档。')
|
||||
|
||||
print(file_content)
|
||||
# private_upload里面的文件名在解压zip后容易出现乱码(rar和7z格式正常),故可以只分析文章内容,不输入文件名
|
||||
from .crazy_utils import breakdown_txt_to_satisfy_token_limit_for_pdf
|
||||
from crazy_functions.pdf_fns.breakdown_txt import breakdown_text_to_satisfy_token_limit
|
||||
from request_llms.bridge_all import model_info
|
||||
max_token = model_info[llm_kwargs['llm_model']]['max_token']
|
||||
TOKEN_LIMIT_PER_FRAGMENT = max_token * 3 // 4
|
||||
paper_fragments = breakdown_txt_to_satisfy_token_limit_for_pdf(
|
||||
txt=file_content,
|
||||
get_token_fn=model_info[llm_kwargs['llm_model']]['token_cnt'],
|
||||
limit=TOKEN_LIMIT_PER_FRAGMENT
|
||||
)
|
||||
paper_fragments = breakdown_text_to_satisfy_token_limit(txt=file_content, limit=TOKEN_LIMIT_PER_FRAGMENT, llm_model=llm_kwargs['llm_model'])
|
||||
this_paper_history = []
|
||||
for i, paper_frag in enumerate(paper_fragments):
|
||||
i_say = f'请对下面的文章片段用中文做概述,文件名是{os.path.relpath(fp, project_folder)},文章内容是 ```{paper_frag}```'
|
||||
|
||||
@@ -28,8 +28,8 @@ class PaperFileGroup():
|
||||
self.sp_file_index.append(index)
|
||||
self.sp_file_tag.append(self.file_paths[index])
|
||||
else:
|
||||
from .crazy_utils import breakdown_txt_to_satisfy_token_limit_for_pdf
|
||||
segments = breakdown_txt_to_satisfy_token_limit_for_pdf(file_content, self.get_token_num, max_token_limit)
|
||||
from crazy_functions.pdf_fns.breakdown_txt import breakdown_text_to_satisfy_token_limit
|
||||
segments = breakdown_text_to_satisfy_token_limit(file_content, max_token_limit)
|
||||
for j, segment in enumerate(segments):
|
||||
self.sp_file_contents.append(segment)
|
||||
self.sp_file_index.append(index)
|
||||
|
||||
@@ -20,14 +20,9 @@ def 解析PDF(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot,
|
||||
|
||||
TOKEN_LIMIT_PER_FRAGMENT = 2500
|
||||
|
||||
from .crazy_utils import breakdown_txt_to_satisfy_token_limit_for_pdf
|
||||
from request_llms.bridge_all import model_info
|
||||
enc = model_info["gpt-3.5-turbo"]['tokenizer']
|
||||
def get_token_num(txt): return len(enc.encode(txt, disallowed_special=()))
|
||||
paper_fragments = breakdown_txt_to_satisfy_token_limit_for_pdf(
|
||||
txt=file_content, get_token_fn=get_token_num, limit=TOKEN_LIMIT_PER_FRAGMENT)
|
||||
page_one_fragments = breakdown_txt_to_satisfy_token_limit_for_pdf(
|
||||
txt=str(page_one), get_token_fn=get_token_num, limit=TOKEN_LIMIT_PER_FRAGMENT//4)
|
||||
from crazy_functions.pdf_fns.breakdown_txt import breakdown_text_to_satisfy_token_limit
|
||||
paper_fragments = breakdown_text_to_satisfy_token_limit(txt=file_content, limit=TOKEN_LIMIT_PER_FRAGMENT, llm_model=llm_kwargs['llm_model'])
|
||||
page_one_fragments = breakdown_text_to_satisfy_token_limit(txt=str(page_one), limit=TOKEN_LIMIT_PER_FRAGMENT//4, llm_model=llm_kwargs['llm_model'])
|
||||
# 为了更好的效果,我们剥离Introduction之后的部分(如果有)
|
||||
paper_meta = page_one_fragments[0].split('introduction')[0].split('Introduction')[0].split('INTRODUCTION')[0]
|
||||
|
||||
|
||||
@@ -91,14 +91,9 @@ def 解析PDF(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot,
|
||||
page_one = str(page_one).encode('utf-8', 'ignore').decode() # avoid reading non-utf8 chars
|
||||
|
||||
# 递归地切割PDF文件
|
||||
from .crazy_utils import breakdown_txt_to_satisfy_token_limit_for_pdf
|
||||
from request_llms.bridge_all import model_info
|
||||
enc = model_info["gpt-3.5-turbo"]['tokenizer']
|
||||
def get_token_num(txt): return len(enc.encode(txt, disallowed_special=()))
|
||||
paper_fragments = breakdown_txt_to_satisfy_token_limit_for_pdf(
|
||||
txt=file_content, get_token_fn=get_token_num, limit=TOKEN_LIMIT_PER_FRAGMENT)
|
||||
page_one_fragments = breakdown_txt_to_satisfy_token_limit_for_pdf(
|
||||
txt=page_one, get_token_fn=get_token_num, limit=TOKEN_LIMIT_PER_FRAGMENT//4)
|
||||
from crazy_functions.pdf_fns.breakdown_txt import breakdown_text_to_satisfy_token_limit
|
||||
paper_fragments = breakdown_text_to_satisfy_token_limit(txt=file_content, limit=TOKEN_LIMIT_PER_FRAGMENT, llm_model=llm_kwargs['llm_model'])
|
||||
page_one_fragments = breakdown_text_to_satisfy_token_limit(txt=page_one, limit=TOKEN_LIMIT_PER_FRAGMENT//4, llm_model=llm_kwargs['llm_model'])
|
||||
|
||||
# 为了更好的效果,我们剥离Introduction之后的部分(如果有)
|
||||
paper_meta = page_one_fragments[0].split('introduction')[0].split('Introduction')[0].split('INTRODUCTION')[0]
|
||||
|
||||
@@ -18,14 +18,9 @@ def 解析PDF(file_name, llm_kwargs, plugin_kwargs, chatbot, history, system_pro
|
||||
|
||||
TOKEN_LIMIT_PER_FRAGMENT = 2500
|
||||
|
||||
from .crazy_utils import breakdown_txt_to_satisfy_token_limit_for_pdf
|
||||
from request_llms.bridge_all import model_info
|
||||
enc = model_info["gpt-3.5-turbo"]['tokenizer']
|
||||
def get_token_num(txt): return len(enc.encode(txt, disallowed_special=()))
|
||||
paper_fragments = breakdown_txt_to_satisfy_token_limit_for_pdf(
|
||||
txt=file_content, get_token_fn=get_token_num, limit=TOKEN_LIMIT_PER_FRAGMENT)
|
||||
page_one_fragments = breakdown_txt_to_satisfy_token_limit_for_pdf(
|
||||
txt=str(page_one), get_token_fn=get_token_num, limit=TOKEN_LIMIT_PER_FRAGMENT//4)
|
||||
from crazy_functions.pdf_fns.breakdown_txt import breakdown_text_to_satisfy_token_limit
|
||||
paper_fragments = breakdown_text_to_satisfy_token_limit(txt=file_content, limit=TOKEN_LIMIT_PER_FRAGMENT, llm_model=llm_kwargs['llm_model'])
|
||||
page_one_fragments = breakdown_text_to_satisfy_token_limit(txt=str(page_one), limit=TOKEN_LIMIT_PER_FRAGMENT//4, llm_model=llm_kwargs['llm_model'])
|
||||
# 为了更好的效果,我们剥离Introduction之后的部分(如果有)
|
||||
paper_meta = page_one_fragments[0].split('introduction')[0].split('Introduction')[0].split('INTRODUCTION')[0]
|
||||
|
||||
@@ -45,7 +40,7 @@ def 解析PDF(file_name, llm_kwargs, plugin_kwargs, chatbot, history, system_pro
|
||||
for i in range(n_fragment):
|
||||
NUM_OF_WORD = MAX_WORD_TOTAL // n_fragment
|
||||
i_say = f"Read this section, recapitulate the content of this section with less than {NUM_OF_WORD} words: {paper_fragments[i]}"
|
||||
i_say_show_user = f"[{i+1}/{n_fragment}] Read this section, recapitulate the content of this section with less than {NUM_OF_WORD} words: {paper_fragments[i][:200]}"
|
||||
i_say_show_user = f"[{i+1}/{n_fragment}] Read this section, recapitulate the content of this section with less than {NUM_OF_WORD} words: {paper_fragments[i][:200]} ...."
|
||||
gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(i_say, i_say_show_user, # i_say=真正给chatgpt的提问, i_say_show_user=给用户看的提问
|
||||
llm_kwargs, chatbot,
|
||||
history=["The main idea of the previous section is?", last_iteration_result], # 迭代上一次的结果
|
||||
|
||||
@@ -12,13 +12,6 @@ class PaperFileGroup():
|
||||
self.sp_file_index = []
|
||||
self.sp_file_tag = []
|
||||
|
||||
# count_token
|
||||
from request_llms.bridge_all import model_info
|
||||
enc = model_info["gpt-3.5-turbo"]['tokenizer']
|
||||
def get_token_num(txt): return len(
|
||||
enc.encode(txt, disallowed_special=()))
|
||||
self.get_token_num = get_token_num
|
||||
|
||||
def run_file_split(self, max_token_limit=1900):
|
||||
"""
|
||||
将长文本分离开来
|
||||
@@ -29,9 +22,8 @@ class PaperFileGroup():
|
||||
self.sp_file_index.append(index)
|
||||
self.sp_file_tag.append(self.file_paths[index])
|
||||
else:
|
||||
from .crazy_utils import breakdown_txt_to_satisfy_token_limit_for_pdf
|
||||
segments = breakdown_txt_to_satisfy_token_limit_for_pdf(
|
||||
file_content, self.get_token_num, max_token_limit)
|
||||
from crazy_functions.pdf_fns.breakdown_txt import breakdown_text_to_satisfy_token_limit
|
||||
segments = breakdown_text_to_satisfy_token_limit(file_content, max_token_limit)
|
||||
for j, segment in enumerate(segments):
|
||||
self.sp_file_contents.append(segment)
|
||||
self.sp_file_index.append(index)
|
||||
|
||||
在新工单中引用
屏蔽一个用户