镜像自地址
https://github.com/binary-husky/gpt_academic.git
已同步 2025-12-06 14:36:48 +00:00
同步音频输入
这个提交包含在:
@@ -0,0 +1,37 @@
|
||||
import numpy as np
|
||||
|
||||
def Singleton(cls):
|
||||
_instance = {}
|
||||
|
||||
def _singleton(*args, **kargs):
|
||||
if cls not in _instance:
|
||||
_instance[cls] = cls(*args, **kargs)
|
||||
return _instance[cls]
|
||||
|
||||
return _singleton
|
||||
|
||||
|
||||
@Singleton
|
||||
class RealtimeAudioDistribution():
|
||||
def __init__(self) -> None:
|
||||
self.data = {}
|
||||
self.max_len = 1024*64
|
||||
self.rate = 48000 # 只读,每秒采样数量
|
||||
|
||||
def feed(self, uuid, audio):
|
||||
print('feed')
|
||||
self.rate, audio_ = audio
|
||||
if uuid not in self.data:
|
||||
self.data[uuid] = audio_
|
||||
else:
|
||||
new_arr = np.concatenate((self.data[uuid], audio_))
|
||||
if len(new_arr) > self.max_len: new_arr = new_arr[-self.max_len:]
|
||||
self.data[uuid] = new_arr
|
||||
|
||||
def read(self, uuid):
|
||||
if uuid in self.data:
|
||||
res = self.data.pop(uuid)
|
||||
print('read', len(res))
|
||||
else:
|
||||
res = None
|
||||
return res
|
||||
@@ -1,45 +1,87 @@
|
||||
from toolbox import update_ui
|
||||
from toolbox import CatchException, report_execption, write_results_to_file
|
||||
from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
|
||||
import threading
|
||||
import threading, time
|
||||
import numpy as np
|
||||
|
||||
def take_audio_sentence_flagment(captured_audio):
|
||||
"""
|
||||
判断音频是否到达句尾,如果到了,截取片段
|
||||
"""
|
||||
ready_part = None
|
||||
other_part = captured_audio
|
||||
return ready_part, other_part
|
||||
|
||||
class InterviewAssistent():
|
||||
|
||||
def __init__(self):
|
||||
self.capture_interval = 1.0 # second
|
||||
self.stop = False
|
||||
pass
|
||||
|
||||
|
||||
# def audio_capture_thread(self):
|
||||
def init(self, chatbot):
|
||||
# 初始化音频采集线程
|
||||
self.captured_audio = np.array([])
|
||||
self.keep_latest_n_second = 10
|
||||
self.ready_audio_flagment = None
|
||||
self.stop = False
|
||||
th1 = threading.Thread(target=self.audio_capture_thread, args=(chatbot._cookies['uuid'],))
|
||||
th1.daemon = True
|
||||
th1.start()
|
||||
th2 = threading.Thread(target=self.audio2txt_thread, args=(chatbot._cookies['uuid'],))
|
||||
th2.daemon = True
|
||||
th2.start()
|
||||
|
||||
# 第7步:所有线程同时开始执行任务函数
|
||||
# handles = [ for index, fp in enumerate(file_manifest)]
|
||||
def audio_capture_thread(self, uuid):
|
||||
# 在一个异步线程中采集音频
|
||||
from .live_audio.audio_io import RealtimeAudioDistribution
|
||||
rad = RealtimeAudioDistribution()
|
||||
while not self.stop:
|
||||
time.sleep(self.capture_interval)
|
||||
self.captured_audio = np.concatenate((self.captured_audio, rad.read(uuid.hex)))
|
||||
if len(self.captured_audio) > self.keep_latest_n_second * rad.rate:
|
||||
self.captured_audio = self.captured_audio[-self.keep_latest_n_second * rad.rate:]
|
||||
|
||||
def audio2txt_thread(self, llm_kwargs):
|
||||
import whisper
|
||||
# 在一个异步线程中音频转文字
|
||||
while not self.stop:
|
||||
time.sleep(1)
|
||||
if len(self.captured_audio) > 0:
|
||||
model = whisper.load_model("base")
|
||||
result = model.transcribe("audio.mp3", language='Chinese')
|
||||
|
||||
|
||||
|
||||
def init(self):
|
||||
self.captured_words = ""
|
||||
# threading.Thread(target=self.audio_capture_thread, args=(self, 1))
|
||||
|
||||
def gpt_answer(self, text, chatbot, history, llm_kwargs):
|
||||
i_say = inputs_show_user = text
|
||||
gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
|
||||
inputs=i_say, inputs_show_user=inputs_show_user,
|
||||
llm_kwargs=llm_kwargs, chatbot=chatbot, history=history,
|
||||
sys_prompt="你是求职者,正在参加面试,请回答问题。"
|
||||
)
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||
history.extend([i_say, gpt_say])
|
||||
|
||||
def begin(self, llm_kwargs, plugin_kwargs, chatbot, history):
|
||||
# 面试插件主函数
|
||||
self.init(chatbot)
|
||||
while True:
|
||||
break
|
||||
# yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
time.sleep(self.capture_interval)
|
||||
if self.ready_audio_flagment:
|
||||
audio_for_whisper = self.ready_audio_flagment
|
||||
text = self.audio2txt(audio_for_whisper, llm_kwargs)
|
||||
yield from self.gpt_answer(text, chatbot, history, llm_kwargs)
|
||||
self.ready_audio_flagment = None
|
||||
|
||||
@CatchException
|
||||
def 辅助面试(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
|
||||
pass
|
||||
# pip install -U openai-whisper
|
||||
# while True:
|
||||
# time.sleep(4)
|
||||
# print(plugin_kwargs)
|
||||
# ia = InterviewAssistent()
|
||||
# yield from ia.begin(llm_kwargs, plugin_kwargs, chatbot, history)
|
||||
chatbot.append(["函数插件功能:辅助面试", "正在预热本地音频转文字模型 ..."])
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||
|
||||
import whisper
|
||||
whisper.load_model("base")
|
||||
chatbot.append(["预热本地音频转文字模型完成", "辅助面试助手, 正在监听音频 ..."])
|
||||
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||
ia = InterviewAssistent()
|
||||
yield from ia.begin(llm_kwargs, plugin_kwargs, chatbot, history)
|
||||
|
||||
|
||||
在新工单中引用
屏蔽一个用户