同步音频输入

2025-12-06 14:36:48 +00:00 · 2023-07-02 14:42:12 +08:00
--- a/crazy_functions/live_audio/audio_io.py
+++ b/crazy_functions/live_audio/audio_io.py
@@ -0,0 +1,37 @@
+import numpy as np
+
+def Singleton(cls):
+    _instance = {}
+ 
+    def _singleton(*args, **kargs):
+        if cls not in _instance:
+            _instance[cls] = cls(*args, **kargs)
+        return _instance[cls]
+ 
+    return _singleton
+
+
+@Singleton
+class RealtimeAudioDistribution():
+    def __init__(self) -> None:
+        self.data = {}
+        self.max_len = 1024*64
+        self.rate = 48000   # 只读，每秒采样数量
+
+    def feed(self, uuid, audio):
+        print('feed')
+        self.rate, audio_ = audio
+        if uuid not in self.data:
+            self.data[uuid] = audio_
+        else:
+            new_arr = np.concatenate((self.data[uuid], audio_))
+            if len(new_arr) > self.max_len: new_arr = new_arr[-self.max_len:]
+            self.data[uuid] = new_arr
+
+    def read(self, uuid):
+        if uuid in self.data:
+            res = self.data.pop(uuid)
+            print('read', len(res))
+        else:
+            res = None
+        return res
--- a/crazy_functions/辅助面试.py
+++ b/crazy_functions/辅助面试.py
@@ -1,45 +1,87 @@
 from toolbox import update_ui
 from toolbox import CatchException, report_execption, write_results_to_file
 from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
-import threading
+import threading, time
+import numpy as np
+
+def take_audio_sentence_flagment(captured_audio):
+    """
+    判断音频是否到达句尾，如果到了，截取片段
+    """
+    ready_part = None
+    other_part = captured_audio
+    return ready_part, other_part

 class InterviewAssistent():
-
    def __init__(self):
+        self.capture_interval = 1.0 # second
+        self.stop = False
        pass

-    
-    # def audio_capture_thread(self):
+    def init(self, chatbot):
+        # 初始化音频采集线程
+        self.captured_audio = np.array([])
+        self.keep_latest_n_second = 10
+        self.ready_audio_flagment = None
+        self.stop = False
+        th1 = threading.Thread(target=self.audio_capture_thread, args=(chatbot._cookies['uuid'],))
+        th1.daemon = True
+        th1.start()
+        th2 = threading.Thread(target=self.audio2txt_thread, args=(chatbot._cookies['uuid'],))
+        th2.daemon = True
+        th2.start()

-        # 第7步：所有线程同时开始执行任务函数
-        # handles = [ for index, fp in enumerate(file_manifest)]
+    def audio_capture_thread(self, uuid):
+        # 在一个异步线程中采集音频
+        from .live_audio.audio_io import RealtimeAudioDistribution
+        rad = RealtimeAudioDistribution()
+        while not self.stop:
+            time.sleep(self.capture_interval)
+            self.captured_audio = np.concatenate((self.captured_audio, rad.read(uuid.hex)))
+            if len(self.captured_audio) > self.keep_latest_n_second * rad.rate:
+                self.captured_audio = self.captured_audio[-self.keep_latest_n_second * rad.rate:]

+    def audio2txt_thread(self, llm_kwargs):
+        import whisper
+        # 在一个异步线程中音频转文字
+        while not self.stop:
+            time.sleep(1)
+            if len(self.captured_audio) > 0:
+                model = whisper.load_model("base")
+                result = model.transcribe("audio.mp3", language='Chinese')

-
-
-    def init(self):
-        self.captured_words = ""
-        # threading.Thread(target=self.audio_capture_thread, args=(self, 1))
-
+    def gpt_answer(self, text, chatbot, history, llm_kwargs):
+        i_say = inputs_show_user = text
+        gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
+            inputs=i_say, inputs_show_user=inputs_show_user,
+            llm_kwargs=llm_kwargs, chatbot=chatbot, history=history,
+            sys_prompt="你是求职者，正在参加面试，请回答问题。"
+        )
+        yield from update_ui(chatbot=chatbot, history=history)  # 刷新界面
+        history.extend([i_say, gpt_say])

    def begin(self, llm_kwargs, plugin_kwargs, chatbot, history):
+        # 面试插件主函数
+        self.init(chatbot)
        while True:
-            break
-            # yield from update_ui(chatbot=chatbot, history=history)  # 刷新界面
-
-
-
-
-
-
+            time.sleep(self.capture_interval)
+            if self.ready_audio_flagment:
+                audio_for_whisper = self.ready_audio_flagment
+                text = self.audio2txt(audio_for_whisper, llm_kwargs)
+                yield from self.gpt_answer(text, chatbot, history, llm_kwargs)
+                self.ready_audio_flagment = None

@CatchException
 def 辅助面试(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
-    pass
    # pip install -U openai-whisper
-    # while True:
-    #     time.sleep(4)
-    #     print(plugin_kwargs)
-    # ia = InterviewAssistent()
-    # yield from ia.begin(llm_kwargs, plugin_kwargs, chatbot, history)
+    chatbot.append(["函数插件功能：辅助面试", "正在预热本地音频转文字模型 ..."])
+    yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
+
+    import whisper
+    whisper.load_model("base")
+    chatbot.append(["预热本地音频转文字模型完成", "辅助面试助手, 正在监听音频 ..."])
+
+    yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
+    ia = InterviewAssistent()
+    yield from ia.begin(llm_kwargs, plugin_kwargs, chatbot, history)