multimodal support for gpt-4o etc

这个提交包含在:
binary-husky
2024-06-06 07:36:37 +00:00
父节点 46428b7c7a
当前提交 7de6015800
共有 5 个文件被更改,包括 164 次插入54 次删除

查看文件

@@ -283,10 +283,6 @@ WHEN_TO_USE_PROXY = ["Download_LLM", "Download_Gradio_Theme", "Connect_Grobid",
"Warmup_Modules", "Nougat_Download", "AutoGen"] "Warmup_Modules", "Nougat_Download", "AutoGen"]
# *实验性功能*: 自动检测并屏蔽失效的KEY,请勿使用
BLOCK_INVALID_APIKEY = False
# 启用插件热加载 # 启用插件热加载
PLUGIN_HOT_RELOAD = False PLUGIN_HOT_RELOAD = False

查看文件

@@ -183,6 +183,7 @@ model_info = {
"fn_with_ui": chatgpt_ui, "fn_with_ui": chatgpt_ui,
"fn_without_ui": chatgpt_noui, "fn_without_ui": chatgpt_noui,
"endpoint": openai_endpoint, "endpoint": openai_endpoint,
"has_multimodal_capacity": True,
"max_token": 128000, "max_token": 128000,
"tokenizer": tokenizer_gpt4, "tokenizer": tokenizer_gpt4,
"token_cnt": get_token_num_gpt4, "token_cnt": get_token_num_gpt4,
@@ -191,6 +192,7 @@ model_info = {
"gpt-4o-2024-05-13": { "gpt-4o-2024-05-13": {
"fn_with_ui": chatgpt_ui, "fn_with_ui": chatgpt_ui,
"fn_without_ui": chatgpt_noui, "fn_without_ui": chatgpt_noui,
"has_multimodal_capacity": True,
"endpoint": openai_endpoint, "endpoint": openai_endpoint,
"max_token": 128000, "max_token": 128000,
"tokenizer": tokenizer_gpt4, "tokenizer": tokenizer_gpt4,
@@ -227,6 +229,7 @@ model_info = {
"gpt-4-turbo": { "gpt-4-turbo": {
"fn_with_ui": chatgpt_ui, "fn_with_ui": chatgpt_ui,
"fn_without_ui": chatgpt_noui, "fn_without_ui": chatgpt_noui,
"has_multimodal_capacity": True,
"endpoint": openai_endpoint, "endpoint": openai_endpoint,
"max_token": 128000, "max_token": 128000,
"tokenizer": tokenizer_gpt4, "tokenizer": tokenizer_gpt4,
@@ -236,6 +239,7 @@ model_info = {
"gpt-4-turbo-2024-04-09": { "gpt-4-turbo-2024-04-09": {
"fn_with_ui": chatgpt_ui, "fn_with_ui": chatgpt_ui,
"fn_without_ui": chatgpt_noui, "fn_without_ui": chatgpt_noui,
"has_multimodal_capacity": True,
"endpoint": openai_endpoint, "endpoint": openai_endpoint,
"max_token": 128000, "max_token": 128000,
"tokenizer": tokenizer_gpt4, "tokenizer": tokenizer_gpt4,
@@ -900,12 +904,13 @@ for model in [m for m in AVAIL_LLM_MODELS if m.startswith("one-api-")]:
# "mixtral-8x7b" 是模型名(必要) # "mixtral-8x7b" 是模型名(必要)
# "(max_token=6666)" 是配置(非必要) # "(max_token=6666)" 是配置(非必要)
try: try:
_, max_token_tmp = read_one_api_model_name(model) origin_model_name, max_token_tmp = read_one_api_model_name(model)
# 如果是已知模型,则尝试获取其信息
original_model_info = model_info.get(origin_model_name.replace("one-api-", "", 1), None)
except: except:
print(f"one-api模型 {model} 的 max_token 配置不是整数,请检查配置文件。") print(f"one-api模型 {model} 的 max_token 配置不是整数,请检查配置文件。")
continue continue
model_info.update({ this_model_info = {
model: {
"fn_with_ui": chatgpt_ui, "fn_with_ui": chatgpt_ui,
"fn_without_ui": chatgpt_noui, "fn_without_ui": chatgpt_noui,
"can_multi_thread": True, "can_multi_thread": True,
@@ -913,8 +918,17 @@ for model in [m for m in AVAIL_LLM_MODELS if m.startswith("one-api-")]:
"max_token": max_token_tmp, "max_token": max_token_tmp,
"tokenizer": tokenizer_gpt35, "tokenizer": tokenizer_gpt35,
"token_cnt": get_token_num_gpt35, "token_cnt": get_token_num_gpt35,
}, }
})
# 同步已知模型的其他信息
attribute = "has_multimodal_capacity"
if original_model_info is not None and original_model_info.get(attribute, None) is not None: this_model_info.update({attribute: original_model_info.get(attribute, None)})
# attribute = "attribute2"
# if original_model_info is not None and original_model_info.get(attribute, None) is not None: this_model_info.update({attribute: original_model_info.get(attribute, None)})
# attribute = "attribute3"
# if original_model_info is not None and original_model_info.get(attribute, None) is not None: this_model_info.update({attribute: original_model_info.get(attribute, None)})
model_info.update({model: this_model_info})
# -=-=-=-=-=-=- vllm 对齐支持 -=-=-=-=-=-=- # -=-=-=-=-=-=- vllm 对齐支持 -=-=-=-=-=-=-
for model in [m for m in AVAIL_LLM_MODELS if m.startswith("vllm-")]: for model in [m for m in AVAIL_LLM_MODELS if m.startswith("vllm-")]:
# 为了更灵活地接入vllm多模型管理界面,设计了此接口,例子AVAIL_LLM_MODELS = ["vllm-/home/hmp/llm/cache/Qwen1___5-32B-Chat(max_token=6666)"] # 为了更灵活地接入vllm多模型管理界面,设计了此接口,例子AVAIL_LLM_MODELS = ["vllm-/home/hmp/llm/cache/Qwen1___5-32B-Chat(max_token=6666)"]

查看文件

@@ -1,5 +1,3 @@
# 借鉴了 https://github.com/GaiZhenbiao/ChuanhuChatGPT 项目
""" """
该文件中主要包含三个函数 该文件中主要包含三个函数
@@ -11,19 +9,19 @@
""" """
import json import json
import os
import re
import time import time
import gradio as gr
import logging import logging
import traceback import traceback
import requests import requests
import importlib
import random import random
# config_private.py放自己的秘密如API和代理网址 # config_private.py放自己的秘密如API和代理网址
# 读取时首先看是否存在私密的config_private配置文件不受git管控,如果有,则覆盖原config文件 # 读取时首先看是否存在私密的config_private配置文件不受git管控,如果有,则覆盖原config文件
from toolbox import get_conf, update_ui, is_any_api_key, select_api_key, what_keys, clip_history from toolbox import get_conf, update_ui, is_any_api_key, select_api_key, what_keys, clip_history
from toolbox import trimmed_format_exc, is_the_upload_folder, read_one_api_model_name, log_chat from toolbox import trimmed_format_exc, is_the_upload_folder, read_one_api_model_name, log_chat
from toolbox import ChatBotWithCookies from toolbox import ChatBotWithCookies, have_any_recent_upload_image_files, encode_image
proxies, TIMEOUT_SECONDS, MAX_RETRY, API_ORG, AZURE_CFG_ARRAY = \ proxies, TIMEOUT_SECONDS, MAX_RETRY, API_ORG, AZURE_CFG_ARRAY = \
get_conf('proxies', 'TIMEOUT_SECONDS', 'MAX_RETRY', 'API_ORG', 'AZURE_CFG_ARRAY') get_conf('proxies', 'TIMEOUT_SECONDS', 'MAX_RETRY', 'API_ORG', 'AZURE_CFG_ARRAY')
@@ -41,6 +39,48 @@ def get_full_error(chunk, stream_response):
break break
return chunk return chunk
def make_multimodal_input(inputs, image_paths):
image_base64_array = []
for image_path in image_paths:
path = os.path.abspath(image_path)
base64 = encode_image(path)
inputs = inputs + f'<br/><br/><div align="center"><img src="file={path}" base64="{base64}"></div>'
image_base64_array.append(base64)
return inputs, image_base64_array
def reverse_base64_from_input(inputs):
# 定义一个正则表达式来匹配 Base64 字符串(假设格式为 base64="<Base64编码>"
pattern = re.compile(r'base64="([^"]+)"')
# 使用 findall 方法查找所有匹配的 Base64 字符串
base64_strings = pattern.findall(inputs)
# 返回反转后的 Base64 字符串列表
return base64_strings
def contain_base64(inputs):
base64_strings = reverse_base64_from_input(inputs)
return len(base64_strings) > 0
def append_image_if_contain_base64(inputs):
if not contain_base64(inputs):
return inputs
else:
image_base64_array = reverse_base64_from_input(inputs)
pattern = re.compile(r'<br/><br/><div align="center"><img[^><]+></div>')
inputs = re.sub(pattern, '', inputs)
res = []
res.append({
"type": "text",
"text": inputs
})
for image_base64 in image_base64_array:
res.append({
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{image_base64}"
}
})
return res
def decode_chunk(chunk): def decode_chunk(chunk):
# 提前读取一些信息 (用于判断异常) # 提前读取一些信息 (用于判断异常)
chunk_decoded = chunk.decode() chunk_decoded = chunk.decode()
@@ -159,6 +199,7 @@ def predict(inputs:str, llm_kwargs:dict, plugin_kwargs:dict, chatbot:ChatBotWith
chatbot 为WebUI中显示的对话列表,修改它,然后yeild出去,可以直接修改对话界面内容 chatbot 为WebUI中显示的对话列表,修改它,然后yeild出去,可以直接修改对话界面内容
additional_fn代表点击的哪个按钮,按钮见functional.py additional_fn代表点击的哪个按钮,按钮见functional.py
""" """
from .bridge_all import model_info
if is_any_api_key(inputs): if is_any_api_key(inputs):
chatbot._cookies['api_key'] = inputs chatbot._cookies['api_key'] = inputs
chatbot.append(("输入已识别为openai的api_key", what_keys(inputs))) chatbot.append(("输入已识别为openai的api_key", what_keys(inputs)))
@@ -174,7 +215,17 @@ def predict(inputs:str, llm_kwargs:dict, plugin_kwargs:dict, chatbot:ChatBotWith
from core_functional import handle_core_functionality from core_functional import handle_core_functionality
inputs, history = handle_core_functionality(additional_fn, inputs, history, chatbot) inputs, history = handle_core_functionality(additional_fn, inputs, history, chatbot)
chatbot.append((inputs, "")) # 多模态模型
has_multimodal_capacity = model_info[llm_kwargs['llm_model']].get('has_multimodal_capacity', False)
if has_multimodal_capacity:
has_recent_image_upload, image_paths = have_any_recent_upload_image_files(chatbot, pop=True)
else:
has_recent_image_upload, image_paths = False, []
if has_recent_image_upload:
_inputs, image_base64_array = make_multimodal_input(inputs, image_paths)
else:
_inputs, image_base64_array = inputs, []
chatbot.append((_inputs, ""))
yield from update_ui(chatbot=chatbot, history=history, msg="等待响应") # 刷新界面 yield from update_ui(chatbot=chatbot, history=history, msg="等待响应") # 刷新界面
# check mis-behavior # check mis-behavior
@@ -184,7 +235,7 @@ def predict(inputs:str, llm_kwargs:dict, plugin_kwargs:dict, chatbot:ChatBotWith
time.sleep(2) time.sleep(2)
try: try:
headers, payload = generate_payload(inputs, llm_kwargs, history, system_prompt, stream) headers, payload = generate_payload(inputs, llm_kwargs, history, system_prompt, image_base64_array, has_multimodal_capacity, stream)
except RuntimeError as e: except RuntimeError as e:
chatbot[-1] = (inputs, f"您提供的api-key不满足要求,不包含任何可用于{llm_kwargs['llm_model']}的api-key。您可能选择了错误的模型或请求源。") chatbot[-1] = (inputs, f"您提供的api-key不满足要求,不包含任何可用于{llm_kwargs['llm_model']}的api-key。您可能选择了错误的模型或请求源。")
yield from update_ui(chatbot=chatbot, history=history, msg="api-key不满足要求") # 刷新界面 yield from update_ui(chatbot=chatbot, history=history, msg="api-key不满足要求") # 刷新界面
@@ -192,7 +243,6 @@ def predict(inputs:str, llm_kwargs:dict, plugin_kwargs:dict, chatbot:ChatBotWith
# 检查endpoint是否合法 # 检查endpoint是否合法
try: try:
from .bridge_all import model_info
endpoint = verify_endpoint(model_info[llm_kwargs['llm_model']]['endpoint']) endpoint = verify_endpoint(model_info[llm_kwargs['llm_model']]['endpoint'])
except: except:
tb_str = '```\n' + trimmed_format_exc() + '```' tb_str = '```\n' + trimmed_format_exc() + '```'
@@ -200,7 +250,11 @@ def predict(inputs:str, llm_kwargs:dict, plugin_kwargs:dict, chatbot:ChatBotWith
yield from update_ui(chatbot=chatbot, history=history, msg="Endpoint不满足要求") # 刷新界面 yield from update_ui(chatbot=chatbot, history=history, msg="Endpoint不满足要求") # 刷新界面
return return
history.append(inputs); history.append("") # 加入历史
if has_recent_image_upload:
history.extend([_inputs, ""])
else:
history.extend([inputs, ""])
retry = 0 retry = 0
while True: while True:
@@ -314,7 +368,7 @@ def handle_error(inputs, llm_kwargs, chatbot, history, chunk_decoded, error_msg)
chatbot[-1] = (chatbot[-1][0], f"[Local Message] 异常 \n\n{tb_str} \n\n{regular_txt_to_markdown(chunk_decoded)}") chatbot[-1] = (chatbot[-1][0], f"[Local Message] 异常 \n\n{tb_str} \n\n{regular_txt_to_markdown(chunk_decoded)}")
return chatbot, history return chatbot, history
def generate_payload(inputs, llm_kwargs, history, system_prompt, stream): def generate_payload(inputs:str, llm_kwargs:dict, history:list, system_prompt:str, image_base64_array:list=[], has_multimodal_capacity:bool=False, stream:bool=True):
""" """
整合所有信息,选择LLM模型,生成http请求,为发送请求做准备 整合所有信息,选择LLM模型,生成http请求,为发送请求做准备
""" """
@@ -337,8 +391,18 @@ def generate_payload(inputs, llm_kwargs, history, system_prompt, stream):
azure_api_key_unshared = AZURE_CFG_ARRAY[llm_kwargs['llm_model']]["AZURE_API_KEY"] azure_api_key_unshared = AZURE_CFG_ARRAY[llm_kwargs['llm_model']]["AZURE_API_KEY"]
headers.update({"api-key": azure_api_key_unshared}) headers.update({"api-key": azure_api_key_unshared})
conversation_cnt = len(history) // 2 if has_multimodal_capacity:
# 当以下条件满足时,启用多模态能力:
# 1. 模型本身是多模态模型has_multimodal_capacity
# 2. 输入包含图像len(image_base64_array) > 0
# 3. 历史输入包含图像( any([contain_base64(h) for h in history])
enable_multimodal_capacity = (len(image_base64_array) > 0) or any([contain_base64(h) for h in history])
else:
enable_multimodal_capacity = False
if not enable_multimodal_capacity:
# 不使用多模态能力
conversation_cnt = len(history) // 2
messages = [{"role": "system", "content": system_prompt}] messages = [{"role": "system", "content": system_prompt}]
if conversation_cnt: if conversation_cnt:
for index in range(0, 2*conversation_cnt, 2): for index in range(0, 2*conversation_cnt, 2):
@@ -355,11 +419,46 @@ def generate_payload(inputs, llm_kwargs, history, system_prompt, stream):
messages.append(what_gpt_answer) messages.append(what_gpt_answer)
else: else:
messages[-1]['content'] = what_gpt_answer['content'] messages[-1]['content'] = what_gpt_answer['content']
what_i_ask_now = {} what_i_ask_now = {}
what_i_ask_now["role"] = "user" what_i_ask_now["role"] = "user"
what_i_ask_now["content"] = inputs what_i_ask_now["content"] = inputs
messages.append(what_i_ask_now) messages.append(what_i_ask_now)
else:
# 多模态能力
conversation_cnt = len(history) // 2
messages = [{"role": "system", "content": system_prompt}]
if conversation_cnt:
for index in range(0, 2*conversation_cnt, 2):
what_i_have_asked = {}
what_i_have_asked["role"] = "user"
what_i_have_asked["content"] = append_image_if_contain_base64(history[index])
what_gpt_answer = {}
what_gpt_answer["role"] = "assistant"
what_gpt_answer["content"] = append_image_if_contain_base64(history[index+1])
if what_i_have_asked["content"] != "":
if what_gpt_answer["content"] == "": continue
if what_gpt_answer["content"] == timeout_bot_msg: continue
messages.append(what_i_have_asked)
messages.append(what_gpt_answer)
else:
messages[-1]['content'] = what_gpt_answer['content']
what_i_ask_now = {}
what_i_ask_now["role"] = "user"
what_i_ask_now["content"] = []
what_i_ask_now["content"].append({
"type": "text",
"text": inputs
})
for image_base64 in image_base64_array:
what_i_ask_now["content"].append({
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{image_base64}"
}
})
messages.append(what_i_ask_now)
model = llm_kwargs['llm_model'] model = llm_kwargs['llm_model']
if llm_kwargs['llm_model'].startswith('api2d-'): if llm_kwargs['llm_model'].startswith('api2d-'):
model = llm_kwargs['llm_model'][len('api2d-'):] model = llm_kwargs['llm_model'][len('api2d-'):]

查看文件

@@ -27,10 +27,8 @@ timeout_bot_msg = '[Local Message] Request timeout. Network error. Please check
def report_invalid_key(key): def report_invalid_key(key):
if get_conf("BLOCK_INVALID_APIKEY"): # 弃用功能
# 实验性功能,自动检测并屏蔽失效的KEY,请勿使用 return
from request_llms.key_manager import ApiKeyManager
api_key = ApiKeyManager().add_key_to_blacklist(key)
def get_full_error(chunk, stream_response): def get_full_error(chunk, stream_response):
""" """

查看文件

@@ -903,15 +903,18 @@ def get_pictures_list(path):
return file_manifest return file_manifest
def have_any_recent_upload_image_files(chatbot:ChatBotWithCookies): def have_any_recent_upload_image_files(chatbot:ChatBotWithCookies, pop:bool=False):
_5min = 5 * 60 _5min = 5 * 60
if chatbot is None: if chatbot is None:
return False, None # chatbot is None return False, None # chatbot is None
if pop:
most_recent_uploaded = chatbot._cookies.pop("most_recent_uploaded", None)
else:
most_recent_uploaded = chatbot._cookies.get("most_recent_uploaded", None) most_recent_uploaded = chatbot._cookies.get("most_recent_uploaded", None)
# most_recent_uploaded 是一个放置最新上传图像的路径
if not most_recent_uploaded: if not most_recent_uploaded:
return False, None # most_recent_uploaded is None return False, None # most_recent_uploaded is None
if time.time() - most_recent_uploaded["time"] < _5min: if time.time() - most_recent_uploaded["time"] < _5min:
most_recent_uploaded = chatbot._cookies.get("most_recent_uploaded", None)
path = most_recent_uploaded["path"] path = most_recent_uploaded["path"]
file_manifest = get_pictures_list(path) file_manifest = get_pictures_list(path)
if len(file_manifest) == 0: if len(file_manifest) == 0: