镜像自地址
https://github.com/binary-husky/gpt_academic.git
已同步 2025-12-06 14:36:48 +00:00
Merge branch 'frontier'
这个提交包含在:
@@ -69,12 +69,15 @@ def request_gpt_model_in_new_thread_with_ui_alive(
|
||||
yield from update_ui(chatbot=chatbot, history=[]) # 刷新界面
|
||||
executor = ThreadPoolExecutor(max_workers=16)
|
||||
mutable = ["", time.time(), ""]
|
||||
# 看门狗耐心
|
||||
watch_dog_patience = 5
|
||||
# 请求任务
|
||||
def _req_gpt(inputs, history, sys_prompt):
|
||||
retry_op = retry_times_at_unknown_error
|
||||
exceeded_cnt = 0
|
||||
while True:
|
||||
# watchdog error
|
||||
if len(mutable) >= 2 and (time.time()-mutable[1]) > 5:
|
||||
if len(mutable) >= 2 and (time.time()-mutable[1]) > watch_dog_patience:
|
||||
raise RuntimeError("检测到程序终止。")
|
||||
try:
|
||||
# 【第一种情况】:顺利完成
|
||||
@@ -193,6 +196,9 @@ def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
|
||||
# 跨线程传递
|
||||
mutable = [["", time.time(), "等待中"] for _ in range(n_frag)]
|
||||
|
||||
# 看门狗耐心
|
||||
watch_dog_patience = 5
|
||||
|
||||
# 子线程任务
|
||||
def _req_gpt(index, inputs, history, sys_prompt):
|
||||
gpt_say = ""
|
||||
@@ -201,7 +207,7 @@ def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
|
||||
mutable[index][2] = "执行中"
|
||||
while True:
|
||||
# watchdog error
|
||||
if len(mutable[index]) >= 2 and (time.time()-mutable[index][1]) > 5:
|
||||
if len(mutable[index]) >= 2 and (time.time()-mutable[index][1]) > watch_dog_patience:
|
||||
raise RuntimeError("检测到程序终止。")
|
||||
try:
|
||||
# 【第一种情况】:顺利完成
|
||||
@@ -275,7 +281,7 @@ def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
|
||||
# 在前端打印些好玩的东西
|
||||
for thread_index, _ in enumerate(worker_done):
|
||||
print_something_really_funny = "[ ...`"+mutable[thread_index][0][-scroller_max_len:].\
|
||||
replace('\n', '').replace('```', '...').replace(
|
||||
replace('\n', '').replace('`', '.').replace(
|
||||
' ', '.').replace('<br/>', '.....').replace('$', '.')+"`... ]"
|
||||
observe_win.append(print_something_really_funny)
|
||||
# 在前端打印些好玩的东西
|
||||
@@ -301,7 +307,7 @@ def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
|
||||
gpt_res = f.result()
|
||||
chatbot.append([inputs_show_user, gpt_res])
|
||||
yield from update_ui(chatbot=chatbot, history=[]) # 刷新界面
|
||||
time.sleep(0.3)
|
||||
time.sleep(0.5)
|
||||
return gpt_response_collection
|
||||
|
||||
|
||||
|
||||
@@ -342,10 +342,33 @@ def merge_tex_files(project_foler, main_file, mode):
|
||||
pattern_opt2 = re.compile(r"\\abstract\{(.*?)\}", flags=re.DOTALL)
|
||||
match_opt1 = pattern_opt1.search(main_file)
|
||||
match_opt2 = pattern_opt2.search(main_file)
|
||||
if (match_opt1 is None) and (match_opt2 is None):
|
||||
# "Cannot find paper abstract section!"
|
||||
main_file = insert_abstract(main_file)
|
||||
match_opt1 = pattern_opt1.search(main_file)
|
||||
match_opt2 = pattern_opt2.search(main_file)
|
||||
assert (match_opt1 is not None) or (match_opt2 is not None), "Cannot find paper abstract section!"
|
||||
return main_file
|
||||
|
||||
|
||||
insert_missing_abs_str = r"""
|
||||
\begin{abstract}
|
||||
The GPT-Academic program cannot find abstract section in this paper.
|
||||
\end{abstract}
|
||||
"""
|
||||
|
||||
def insert_abstract(tex_content):
|
||||
if "\\maketitle" in tex_content:
|
||||
# find the position of "\maketitle"
|
||||
find_index = tex_content.index("\\maketitle")
|
||||
# find the nearest ending line
|
||||
end_line_index = tex_content.find("\n", find_index)
|
||||
# insert "abs_str" on the next line
|
||||
modified_tex = tex_content[:end_line_index+1] + '\n\n' + insert_missing_abs_str + '\n\n' + tex_content[end_line_index+1:]
|
||||
return modified_tex
|
||||
else:
|
||||
return tex_content
|
||||
|
||||
"""
|
||||
=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
|
||||
Post process
|
||||
|
||||
@@ -1,4 +1,106 @@
|
||||
import time, logging, json
|
||||
import time, logging, json, sys, struct
|
||||
import numpy as np
|
||||
from scipy.io.wavfile import WAVE_FORMAT
|
||||
|
||||
def write_numpy_to_wave(filename, rate, data, add_header=False):
|
||||
"""
|
||||
Write a NumPy array as a WAV file.
|
||||
"""
|
||||
def _array_tofile(fid, data):
|
||||
# ravel gives a c-contiguous buffer
|
||||
fid.write(data.ravel().view('b').data)
|
||||
|
||||
if hasattr(filename, 'write'):
|
||||
fid = filename
|
||||
else:
|
||||
fid = open(filename, 'wb')
|
||||
|
||||
fs = rate
|
||||
|
||||
try:
|
||||
dkind = data.dtype.kind
|
||||
if not (dkind == 'i' or dkind == 'f' or (dkind == 'u' and
|
||||
data.dtype.itemsize == 1)):
|
||||
raise ValueError("Unsupported data type '%s'" % data.dtype)
|
||||
|
||||
header_data = b''
|
||||
|
||||
header_data += b'RIFF'
|
||||
header_data += b'\x00\x00\x00\x00'
|
||||
header_data += b'WAVE'
|
||||
|
||||
# fmt chunk
|
||||
header_data += b'fmt '
|
||||
if dkind == 'f':
|
||||
format_tag = WAVE_FORMAT.IEEE_FLOAT
|
||||
else:
|
||||
format_tag = WAVE_FORMAT.PCM
|
||||
if data.ndim == 1:
|
||||
channels = 1
|
||||
else:
|
||||
channels = data.shape[1]
|
||||
bit_depth = data.dtype.itemsize * 8
|
||||
bytes_per_second = fs*(bit_depth // 8)*channels
|
||||
block_align = channels * (bit_depth // 8)
|
||||
|
||||
fmt_chunk_data = struct.pack('<HHIIHH', format_tag, channels, fs,
|
||||
bytes_per_second, block_align, bit_depth)
|
||||
if not (dkind == 'i' or dkind == 'u'):
|
||||
# add cbSize field for non-PCM files
|
||||
fmt_chunk_data += b'\x00\x00'
|
||||
|
||||
header_data += struct.pack('<I', len(fmt_chunk_data))
|
||||
header_data += fmt_chunk_data
|
||||
|
||||
# fact chunk (non-PCM files)
|
||||
if not (dkind == 'i' or dkind == 'u'):
|
||||
header_data += b'fact'
|
||||
header_data += struct.pack('<II', 4, data.shape[0])
|
||||
|
||||
# check data size (needs to be immediately before the data chunk)
|
||||
if ((len(header_data)-4-4) + (4+4+data.nbytes)) > 0xFFFFFFFF:
|
||||
raise ValueError("Data exceeds wave file size limit")
|
||||
if add_header:
|
||||
fid.write(header_data)
|
||||
# data chunk
|
||||
fid.write(b'data')
|
||||
fid.write(struct.pack('<I', data.nbytes))
|
||||
if data.dtype.byteorder == '>' or (data.dtype.byteorder == '=' and
|
||||
sys.byteorder == 'big'):
|
||||
data = data.byteswap()
|
||||
_array_tofile(fid, data)
|
||||
|
||||
if add_header:
|
||||
# Determine file size and place it in correct
|
||||
# position at start of the file.
|
||||
size = fid.tell()
|
||||
fid.seek(4)
|
||||
fid.write(struct.pack('<I', size-8))
|
||||
|
||||
finally:
|
||||
if not hasattr(filename, 'write'):
|
||||
fid.close()
|
||||
else:
|
||||
fid.seek(0)
|
||||
|
||||
def is_speaker_speaking(vad, data, sample_rate):
|
||||
# Function to detect if the speaker is speaking
|
||||
# The WebRTC VAD only accepts 16-bit mono PCM audio,
|
||||
# sampled at 8000, 16000, 32000 or 48000 Hz.
|
||||
# A frame must be either 10, 20, or 30 ms in duration:
|
||||
frame_duration = 30
|
||||
n_bit_each = int(sample_rate * frame_duration / 1000)*2 # x2 because audio is 16 bit (2 bytes)
|
||||
res_list = []
|
||||
for t in range(len(data)):
|
||||
if t!=0 and t % n_bit_each == 0:
|
||||
res_list.append(vad.is_speech(data[t-n_bit_each:t], sample_rate))
|
||||
|
||||
info = ''.join(['^' if r else '.' for r in res_list])
|
||||
info = info[:10]
|
||||
if any(res_list):
|
||||
return True, info
|
||||
else:
|
||||
return False, info
|
||||
|
||||
|
||||
class AliyunASR():
|
||||
@@ -66,12 +168,22 @@ class AliyunASR():
|
||||
on_close=self.test_on_close,
|
||||
callback_args=[uuid.hex]
|
||||
)
|
||||
|
||||
timeout_limit_second = 20
|
||||
r = sr.start(aformat="pcm",
|
||||
timeout=timeout_limit_second,
|
||||
enable_intermediate_result=True,
|
||||
enable_punctuation_prediction=True,
|
||||
enable_inverse_text_normalization=True)
|
||||
|
||||
import webrtcvad
|
||||
vad = webrtcvad.Vad()
|
||||
vad.set_mode(1)
|
||||
|
||||
is_previous_frame_transmitted = False # 上一帧是否有人说话
|
||||
previous_frame_data = None
|
||||
echo_cnt = 0 # 在没有声音之后,继续向服务器发送n次音频数据
|
||||
echo_cnt_max = 4 # 在没有声音之后,继续向服务器发送n次音频数据
|
||||
keep_alive_last_send_time = time.time()
|
||||
while not self.stop:
|
||||
# time.sleep(self.capture_interval)
|
||||
audio = rad.read(uuid.hex)
|
||||
@@ -79,12 +191,32 @@ class AliyunASR():
|
||||
# convert to pcm file
|
||||
temp_file = f'{temp_folder}/{uuid.hex}.pcm' #
|
||||
dsdata = change_sample_rate(audio, rad.rate, NEW_SAMPLERATE) # 48000 --> 16000
|
||||
io.wavfile.write(temp_file, NEW_SAMPLERATE, dsdata)
|
||||
write_numpy_to_wave(temp_file, NEW_SAMPLERATE, dsdata)
|
||||
# read pcm binary
|
||||
with open(temp_file, "rb") as f: data = f.read()
|
||||
# print('audio len:', len(audio), '\t ds len:', len(dsdata), '\t need n send:', len(data)//640)
|
||||
slices = zip(*(iter(data),) * 640) # 640个字节为一组
|
||||
for i in slices: sr.send_audio(bytes(i))
|
||||
is_speaking, info = is_speaker_speaking(vad, data, NEW_SAMPLERATE)
|
||||
|
||||
if is_speaking or echo_cnt > 0:
|
||||
# 如果话筒激活 / 如果处于回声收尾阶段
|
||||
echo_cnt -= 1
|
||||
if not is_previous_frame_transmitted: # 上一帧没有人声,但是我们把上一帧同样加上
|
||||
if previous_frame_data is not None: data = previous_frame_data + data
|
||||
if is_speaking:
|
||||
echo_cnt = echo_cnt_max
|
||||
slices = zip(*(iter(data),) * 640) # 640个字节为一组
|
||||
for i in slices: sr.send_audio(bytes(i))
|
||||
keep_alive_last_send_time = time.time()
|
||||
is_previous_frame_transmitted = True
|
||||
else:
|
||||
is_previous_frame_transmitted = False
|
||||
echo_cnt = 0
|
||||
# 保持链接激活,即使没有声音,也根据时间间隔,发送一些音频片段给服务器
|
||||
if time.time() - keep_alive_last_send_time > timeout_limit_second/2:
|
||||
slices = zip(*(iter(data),) * 640) # 640个字节为一组
|
||||
for i in slices: sr.send_audio(bytes(i))
|
||||
keep_alive_last_send_time = time.time()
|
||||
is_previous_frame_transmitted = True
|
||||
self.audio_shape = info
|
||||
else:
|
||||
time.sleep(0.1)
|
||||
|
||||
|
||||
@@ -35,7 +35,7 @@ class RealtimeAudioDistribution():
|
||||
def read(self, uuid):
|
||||
if uuid in self.data:
|
||||
res = self.data.pop(uuid)
|
||||
print('\r read-', len(res), '-', max(res), end='', flush=True)
|
||||
# print('\r read-', len(res), '-', max(res), end='', flush=True)
|
||||
else:
|
||||
res = None
|
||||
return res
|
||||
|
||||
@@ -6,6 +6,7 @@ import threading, time
|
||||
import numpy as np
|
||||
from .live_audio.aliyunASR import AliyunASR
|
||||
import json
|
||||
import re
|
||||
|
||||
class WatchDog():
|
||||
def __init__(self, timeout, bark_fn, interval=3, msg="") -> None:
|
||||
@@ -38,10 +39,22 @@ def chatbot2history(chatbot):
|
||||
history = []
|
||||
for c in chatbot:
|
||||
for q in c:
|
||||
if q not in ["[请讲话]", "[等待GPT响应]", "[正在等您说完问题]"]:
|
||||
if q in ["[ 请讲话 ]", "[ 等待GPT响应 ]", "[ 正在等您说完问题 ]"]:
|
||||
continue
|
||||
elif q.startswith("[ 正在等您说完问题 ]"):
|
||||
continue
|
||||
else:
|
||||
history.append(q.strip('<div class="markdown-body">').strip('</div>').strip('<p>').strip('</p>'))
|
||||
return history
|
||||
|
||||
def visualize_audio(chatbot, audio_shape):
|
||||
if len(chatbot) == 0: chatbot.append(["[ 请讲话 ]", "[ 正在等您说完问题 ]"])
|
||||
chatbot[-1] = list(chatbot[-1])
|
||||
p1 = '「'
|
||||
p2 = '」'
|
||||
chatbot[-1][-1] = re.sub(p1+r'(.*)'+p2, '', chatbot[-1][-1])
|
||||
chatbot[-1][-1] += (p1+f"`{audio_shape}`"+p2)
|
||||
|
||||
class AsyncGptTask():
|
||||
def __init__(self) -> None:
|
||||
self.observe_future = []
|
||||
@@ -81,8 +94,9 @@ class InterviewAssistant(AliyunASR):
|
||||
self.capture_interval = 0.5 # second
|
||||
self.stop = False
|
||||
self.parsed_text = "" # 下个句子中已经说完的部分, 由 test_on_result_chg() 写入
|
||||
self.parsed_sentence = "" # 某段话的整个句子,由 test_on_sentence_end() 写入
|
||||
self.parsed_sentence = "" # 某段话的整个句子, 由 test_on_sentence_end() 写入
|
||||
self.buffered_sentence = "" #
|
||||
self.audio_shape = "" # 音频的可视化表现, 由 audio_convertion_thread() 写入
|
||||
self.event_on_result_chg = threading.Event()
|
||||
self.event_on_entence_end = threading.Event()
|
||||
self.event_on_commit_question = threading.Event()
|
||||
@@ -117,7 +131,7 @@ class InterviewAssistant(AliyunASR):
|
||||
def begin(self, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt):
|
||||
# main plugin function
|
||||
self.init(chatbot)
|
||||
chatbot.append(["[请讲话]", "[正在等您说完问题]"])
|
||||
chatbot.append(["[ 请讲话 ]", "[ 正在等您说完问题 ]"])
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||
self.plugin_wd.begin_watch()
|
||||
self.agt = AsyncGptTask()
|
||||
@@ -157,14 +171,18 @@ class InterviewAssistant(AliyunASR):
|
||||
|
||||
self.commit_wd.begin_watch()
|
||||
chatbot[-1] = list(chatbot[-1])
|
||||
chatbot[-1] = [self.buffered_sentence, "[等待GPT响应]"]
|
||||
chatbot[-1] = [self.buffered_sentence, "[ 等待GPT响应 ]"]
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||
# add gpt task 创建子线程请求gpt,避免线程阻塞
|
||||
history = chatbot2history(chatbot)
|
||||
self.agt.add_async_gpt_task(self.buffered_sentence, len(chatbot)-1, llm_kwargs, history, system_prompt)
|
||||
|
||||
self.buffered_sentence = ""
|
||||
chatbot.append(["[请讲话]", "[正在等您说完问题]"])
|
||||
chatbot.append(["[ 请讲话 ]", "[ 正在等您说完问题 ]"])
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||
|
||||
if not self.event_on_result_chg.is_set() and not self.event_on_entence_end.is_set() and not self.event_on_commit_question.is_set():
|
||||
visualize_audio(chatbot, self.audio_shape)
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||
|
||||
if len(self.stop_msg) != 0:
|
||||
@@ -183,7 +201,7 @@ def 语音助手(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt
|
||||
import nls
|
||||
from scipy import io
|
||||
except:
|
||||
chatbot.append(["导入依赖失败", "使用该模块需要额外依赖, 安装方法:```pip install --upgrade aliyun-python-sdk-core==2.13.3 pyOpenSSL scipy git+https://github.com/aliyun/alibabacloud-nls-python-sdk.git```"])
|
||||
chatbot.append(["导入依赖失败", "使用该模块需要额外依赖, 安装方法:```pip install --upgrade aliyun-python-sdk-core==2.13.3 pyOpenSSL webrtcvad scipy git+https://github.com/aliyun/alibabacloud-nls-python-sdk.git```"])
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||
return
|
||||
|
||||
|
||||
在新工单中引用
屏蔽一个用户