Merge branch 'master' into threejs-app

2025-12-06 22:46:48 +00:00 · 2023-07-19 00:04:27 +08:00
--- a/crazy_functions/Langchain知识库.py
+++ b/crazy_functions/Langchain知识库.py
@@ -30,7 +30,7 @@ def 知识库问答(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_pro
        )
        yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
        from .crazy_utils import try_install_deps
-        try_install_deps(['zh_langchain==0.2.1'])
+        try_install_deps(['zh_langchain==0.2.1', 'pypinyin'])
    
    # < --------------------读取参数--------------- >
    if ("advanced_arg" in plugin_kwargs) and (plugin_kwargs["advanced_arg"] == ""): plugin_kwargs.pop("advanced_arg")
--- a/crazy_functions/latex_utils.py
+++ b/crazy_functions/latex_utils.py
@@ -147,7 +147,7 @@ def 寻找Latex主文件(file_manifest, mode):
    for texf in file_manifest:
        if os.path.basename(texf).startswith('merge'):
            continue
-        with open(texf, 'r', encoding='utf8') as f:
+        with open(texf, 'r', encoding='utf8', errors='ignore') as f:
            file_content = f.read()
        if r'\documentclass' in file_content:
            canidates.append(texf)
@@ -165,7 +165,7 @@ def 寻找Latex主文件(file_manifest, mode):
        expected_words = ['\input', '\ref', '\cite']
        for texf in canidates:
            canidates_score.append(0)
-            with open(texf, 'r', encoding='utf8') as f:
+            with open(texf, 'r', encoding='utf8', errors='ignore') as f:
                file_content = f.read()
            for uw in unexpected_words:
                if uw in file_content:
--- a/crazy_functions/live_audio/aliyunASR.py
+++ b/crazy_functions/live_audio/aliyunASR.py
@@ -0,0 +1,93 @@
+import time, threading, json
+
+
+class AliyunASR():
+
+    def test_on_sentence_begin(self, message, *args):
+        # print("test_on_sentence_begin:{}".format(message))
+        pass
+
+    def test_on_sentence_end(self, message, *args):
+        # print("test_on_sentence_end:{}".format(message))
+        message = json.loads(message)
+        self.parsed_sentence = message['payload']['result']
+        self.event_on_entence_end.set()
+        print(self.parsed_sentence)
+
+    def test_on_start(self, message, *args):
+        # print("test_on_start:{}".format(message))
+        pass
+
+    def test_on_error(self, message, *args):
+        # print("on_error args=>{}".format(args))
+        pass
+
+    def test_on_close(self, *args):
+        self.aliyun_service_ok = False
+        pass
+
+    def test_on_result_chg(self, message, *args):
+        # print("test_on_chg:{}".format(message))
+        message = json.loads(message)
+        self.parsed_text = message['payload']['result']
+        self.event_on_result_chg.set()
+
+    def test_on_completed(self, message, *args):
+        # print("on_completed:args=>{} message=>{}".format(args, message))
+        pass
+
+
+    def audio_convertion_thread(self, uuid):
+        # 在一个异步线程中采集音频
+        import nls  # pip install git+https://github.com/aliyun/alibabacloud-nls-python-sdk.git
+        import tempfile
+        from scipy import io
+        from toolbox import get_conf
+        from .audio_io import change_sample_rate
+        from .audio_io import RealtimeAudioDistribution
+        NEW_SAMPLERATE = 16000
+        rad = RealtimeAudioDistribution()
+        rad.clean_up()
+        temp_folder = tempfile.gettempdir()
+        TOKEN, APPKEY = get_conf('ALIYUN_TOKEN', 'ALIYUN_APPKEY')
+        self.aliyun_service_ok = True
+        URL="wss://nls-gateway.aliyuncs.com/ws/v1"
+        sr = nls.NlsSpeechTranscriber(
+                    url=URL,
+                    token=TOKEN,
+                    appkey=APPKEY,
+                    on_sentence_begin=self.test_on_sentence_begin,
+                    on_sentence_end=self.test_on_sentence_end,
+                    on_start=self.test_on_start,
+                    on_result_changed=self.test_on_result_chg,
+                    on_completed=self.test_on_completed,
+                    on_error=self.test_on_error,
+                    on_close=self.test_on_close,
+                    callback_args=[uuid.hex]
+                )
+
+        r = sr.start(aformat="pcm",
+                enable_intermediate_result=True,
+                enable_punctuation_prediction=True,
+                enable_inverse_text_normalization=True)
+
+        while not self.stop:
+            # time.sleep(self.capture_interval)
+            audio = rad.read(uuid.hex) 
+            if audio is not None:
+                # convert to pcm file
+                temp_file = f'{temp_folder}/{uuid.hex}.pcm' # 
+                dsdata = change_sample_rate(audio, rad.rate, NEW_SAMPLERATE) # 48000 --> 16000
+                io.wavfile.write(temp_file, NEW_SAMPLERATE, dsdata)
+                # read pcm binary
+                with open(temp_file, "rb") as f: data = f.read()
+                # print('audio len:', len(audio), '\t ds len:', len(dsdata), '\t need n send:', len(data)//640)
+                slices = zip(*(iter(data),) * 640)    # 640个字节为一组
+                for i in slices: sr.send_audio(bytes(i))
+            else:
+                time.sleep(0.1)
+
+            if not self.aliyun_service_ok:
+                self.stop = True
+                self.stop_msg = 'Aliyun音频服务异常，请检查ALIYUN_TOKEN和ALIYUN_APPKEY是否过期。'
+        r = sr.stop()
--- a/crazy_functions/live_audio/audio_io.py
+++ b/crazy_functions/live_audio/audio_io.py
@@ -0,0 +1,51 @@
+import numpy as np
+from scipy import interpolate
+
+def Singleton(cls):
+    _instance = {}
+ 
+    def _singleton(*args, **kargs):
+        if cls not in _instance:
+            _instance[cls] = cls(*args, **kargs)
+        return _instance[cls]
+ 
+    return _singleton
+
+
+@Singleton
+class RealtimeAudioDistribution():
+    def __init__(self) -> None:
+        self.data = {}
+        self.max_len = 1024*1024
+        self.rate = 48000   # 只读，每秒采样数量
+
+    def clean_up(self):
+        self.data = {}
+
+    def feed(self, uuid, audio):
+        self.rate, audio_ = audio
+        # print('feed', len(audio_), audio_[-25:])
+        if uuid not in self.data:
+            self.data[uuid] = audio_
+        else:
+            new_arr = np.concatenate((self.data[uuid], audio_))
+            if len(new_arr) > self.max_len: new_arr = new_arr[-self.max_len:]
+            self.data[uuid] = new_arr
+
+    def read(self, uuid):
+        if uuid in self.data:
+            res = self.data.pop(uuid)
+            print('\r read-', len(res), '-', max(res), end='', flush=True)
+        else:
+            res = None
+        return res
+    
+def change_sample_rate(audio, old_sr, new_sr):
+    duration = audio.shape[0] / old_sr
+
+    time_old  = np.linspace(0, duration, audio.shape[0])
+    time_new  = np.linspace(0, duration, int(audio.shape[0] * new_sr / old_sr))
+
+    interpolator = interpolate.interp1d(time_old, audio.T)
+    new_audio = interpolator(time_new).T
+    return new_audio.astype(np.int16)
--- a/crazy_functions/下载arxiv论文翻译摘要.py
+++ b/crazy_functions/下载arxiv论文翻译摘要.py
@@ -144,11 +144,11 @@ def 下载arxiv论文并翻译摘要(txt, llm_kwargs, plugin_kwargs, chatbot, hi

    # 尝试导入依赖，如果缺少依赖，则给出安装建议
    try:
-        import pdfminer, bs4
+        import bs4
    except:
        report_execption(chatbot, history, 
            a = f"解析项目: {txt}", 
-            b = f"导入软件依赖失败。使用该模块需要额外依赖，安装方法```pip install --upgrade pdfminer beautifulsoup4```。")
+            b = f"导入软件依赖失败。使用该模块需要额外依赖，安装方法```pip install --upgrade beautifulsoup4```。")
        yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
        return

--- a/crazy_functions/对话历史存档.py
+++ b/crazy_functions/对话历史存档.py
@@ -12,7 +12,7 @@ def write_chat_to_file(chatbot, history=None, file_name=None):
        file_name = 'chatGPT对话历史' + time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime()) + '.html'
    os.makedirs('./gpt_log/', exist_ok=True)
    with open(f'./gpt_log/{file_name}', 'w', encoding='utf8') as f:
-        from theme import advanced_css
+        from themes.theme import advanced_css
        f.write(f'<!DOCTYPE html><head><meta charset="utf-8"><title>对话历史</title><style>{advanced_css}</style></head>')
        for i, contents in enumerate(chatbot):
            for j, content in enumerate(contents):
--- a/crazy_functions/语音助手.py
+++ b/crazy_functions/语音助手.py
@@ -0,0 +1,195 @@
+from toolbox import update_ui
+from toolbox import CatchException, get_conf, markdown_convertion
+from crazy_functions.crazy_utils import input_clipping
+from request_llm.bridge_all import predict_no_ui_long_connection
+import threading, time
+import numpy as np
+from .live_audio.aliyunASR import AliyunASR
+import json
+
+class WatchDog():
+    def __init__(self, timeout, bark_fn, interval=3, msg="") -> None:
+        self.last_feed = None
+        self.timeout = timeout
+        self.bark_fn = bark_fn
+        self.interval = interval
+        self.msg = msg
+        self.kill_dog = False
+    
+    def watch(self):
+        while True:
+            if self.kill_dog: break
+            if time.time() - self.last_feed > self.timeout:
+                if len(self.msg) > 0: print(self.msg)
+                self.bark_fn()
+                break
+            time.sleep(self.interval)
+
+    def begin_watch(self):
+        self.last_feed = time.time()
+        th = threading.Thread(target=self.watch)
+        th.daemon = True
+        th.start()
+
+    def feed(self):
+        self.last_feed = time.time()
+
+def chatbot2history(chatbot):
+    history = []
+    for c in chatbot:
+        for q in c:
+            if q not in ["[请讲话]", "[等待GPT响应]", "[正在等您说完问题]"]:
+                history.append(q.strip('<div class="markdown-body">').strip('</div>').strip('<p>').strip('</p>'))
+    return history
+
+class AsyncGptTask():
+    def __init__(self) -> None:
+        self.observe_future = []
+        self.observe_future_chatbot_index = []
+
+    def gpt_thread_worker(self, i_say, llm_kwargs, history, sys_prompt, observe_window, index):
+        try:
+            MAX_TOKEN_ALLO = 2560
+            i_say, history = input_clipping(i_say, history, max_token_limit=MAX_TOKEN_ALLO)
+            gpt_say_partial = predict_no_ui_long_connection(inputs=i_say, llm_kwargs=llm_kwargs, history=history, sys_prompt=sys_prompt, 
+                                                            observe_window=observe_window[index], console_slience=True)
+        except ConnectionAbortedError as token_exceed_err:
+            print('至少一个线程任务Token溢出而失败', e)
+        except Exception as e:
+            print('至少一个线程任务意外失败', e)
+
+    def add_async_gpt_task(self, i_say, chatbot_index, llm_kwargs, history, system_prompt):
+        self.observe_future.append([""])
+        self.observe_future_chatbot_index.append(chatbot_index)
+        cur_index = len(self.observe_future)-1
+        th_new = threading.Thread(target=self.gpt_thread_worker, args=(i_say, llm_kwargs, history, system_prompt, self.observe_future, cur_index))
+        th_new.daemon = True
+        th_new.start()
+
+    def update_chatbot(self, chatbot):
+        for of, ofci in zip(self.observe_future, self.observe_future_chatbot_index):
+            try:
+                chatbot[ofci] = list(chatbot[ofci])
+                chatbot[ofci][1] = markdown_convertion(of[0])
+            except:
+                self.observe_future = []
+                self.observe_future_chatbot_index = []
+        return chatbot
+
+class InterviewAssistant(AliyunASR):
+    def __init__(self):
+        self.capture_interval = 0.5 # second
+        self.stop = False
+        self.parsed_text = ""
+        self.parsed_sentence = ""
+        self.buffered_sentence = ""
+        self.event_on_result_chg = threading.Event()
+        self.event_on_entence_end = threading.Event()
+        self.event_on_commit_question = threading.Event()
+
+    def __del__(self):
+        self.stop = True
+        self.stop_msg = ""
+        self.commit_wd.kill_dog = True
+        self.plugin_wd.kill_dog = True
+
+    def init(self, chatbot):
+        # 初始化音频采集线程
+        self.captured_audio = np.array([])
+        self.keep_latest_n_second = 10
+        self.commit_after_pause_n_second = 1.5
+        self.ready_audio_flagment = None
+        self.stop = False
+        self.plugin_wd = WatchDog(timeout=5, bark_fn=self.__del__, msg="程序终止")
+        self.aut = threading.Thread(target=self.audio_convertion_thread, args=(chatbot._cookies['uuid'],))
+        self.aut.daemon = True
+        self.aut.start()
+        # th2 = threading.Thread(target=self.audio2txt_thread, args=(chatbot._cookies['uuid'],))
+        # th2.daemon = True
+        # th2.start()
+
+    def no_audio_for_a_while(self):
+        if len(self.buffered_sentence) < 7: # 如果一句话小于7个字，暂不提交
+            self.commit_wd.begin_watch()
+        else:
+            self.event_on_commit_question.set()
+
+    def begin(self, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt):
+        # main plugin function
+        self.init(chatbot)
+        chatbot.append(["[请讲话]", "[正在等您说完问题]"])
+        yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
+        self.plugin_wd.begin_watch()
+        self.agt = AsyncGptTask()
+        self.commit_wd = WatchDog(timeout=self.commit_after_pause_n_second, bark_fn=self.no_audio_for_a_while, interval=0.2)
+        self.commit_wd.begin_watch()
+
+        while not self.stop:
+            self.event_on_result_chg.wait(timeout=0.25)  # run once every 0.25 second
+            chatbot = self.agt.update_chatbot(chatbot)   # 将子线程的gpt结果写入chatbot
+            history = chatbot2history(chatbot)
+            yield from update_ui(chatbot=chatbot, history=history)      # 刷新界面
+            self.plugin_wd.feed()
+
+            if self.event_on_result_chg.is_set(): 
+                # update audio decode result
+                self.event_on_result_chg.clear()
+                chatbot[-1] = list(chatbot[-1])
+                chatbot[-1][0] = self.buffered_sentence + self.parsed_text
+                history = chatbot2history(chatbot)
+                yield from update_ui(chatbot=chatbot, history=history)  # 刷新界面
+                self.commit_wd.feed()
+
+            if self.event_on_entence_end.is_set():
+                # called when a sentence has ended
+                self.event_on_entence_end.clear()
+                self.parsed_text = self.parsed_sentence
+                self.buffered_sentence += self.parsed_sentence
+
+            if self.event_on_commit_question.is_set():
+                # called when a question should be commited
+                self.event_on_commit_question.clear()
+                if len(self.buffered_sentence) == 0: raise RuntimeError
+
+                self.commit_wd.begin_watch()
+                chatbot[-1] = list(chatbot[-1])
+                chatbot[-1] = [self.buffered_sentence, "[等待GPT响应]"]
+                yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
+                # add gpt task 创建子线程请求gpt，避免线程阻塞
+                history = chatbot2history(chatbot)
+                self.agt.add_async_gpt_task(self.buffered_sentence, len(chatbot)-1, llm_kwargs, history, system_prompt)
+                
+                self.buffered_sentence = ""
+                chatbot.append(["[请讲话]", "[正在等您说完问题]"])
+                yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
+
+        if len(self.stop_msg) != 0:
+            raise RuntimeError(self.stop_msg)
+
+
+
+@CatchException
+def 语音助手(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
+    # pip install -U openai-whisper
+    chatbot.append(["对话助手函数插件：使用时，双手离开鼠标键盘吧", "音频助手, 正在听您讲话（点击“停止”键可终止程序）..."])
+    yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
+
+    # 尝试导入依赖，如果缺少依赖，则给出安装建议
+    try:
+        import nls
+        from scipy import io
+    except:
+        chatbot.append(["导入依赖失败", "使用该模块需要额外依赖, 安装方法:```pip install --upgrade pyOpenSSL scipy git+https://github.com/aliyun/alibabacloud-nls-python-sdk.git```"])
+        yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
+        return
+
+    TOKEN, APPKEY = get_conf('ALIYUN_TOKEN', 'ALIYUN_APPKEY')
+    if TOKEN == "" or APPKEY == "":
+        chatbot.append(["导入依赖失败", "没有阿里云语音识别APPKEY和TOKEN, 详情见https://help.aliyun.com/document_detail/450255.html"])
+        yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
+        return
+
+    yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
+    ia = InterviewAssistant()
+    yield from ia.begin(llm_kwargs, plugin_kwargs, chatbot, history, system_prompt)
+