diff --git a/config.py b/config.py
index ad206d022..eedac2837 100644
--- a/config.py
+++ b/config.py
@@ -43,18 +43,22 @@
# AVAIL_LLM_MODELS = [
# "qianfan", "deepseekcoder",
# "spark", "sparkv2", "sparkv3", "sparkv3.5",
-# "qwen-turbo", "qwen-plus", "qwen-max", "qwen-local",
+# "qwen-turbo", "qwen-plus", "qwen-max", "qwen-max-longcontext", "qwen-long", "qwen-local",
+# "qwen2-72b-instruct","qwen2-57b-a14b-instruct","qwen2-7b-instruct","qwen1.5-110b-chat",
# "moonshot-v1-128k", "moonshot-v1-32k", "moonshot-v1-8k",
# "gpt-3.5-turbo-0613", "gpt-3.5-turbo-16k-0613", "gpt-3.5-turbo-0125", "gpt-4o-2024-05-13"
# "claude-3-haiku-20240307","claude-3-sonnet-20240229","claude-3-opus-20240229", "claude-2.1", "claude-instant-1.2",
# "moss", "llama2", "chatglm_onnx", "internlm", "jittorllms_pangualpha", "jittorllms_llama",
# "deepseek-chat" ,"deepseek-coder",
-# "yi-34b-chat-0205","yi-34b-chat-200k","yi-large","yi-medium","yi-spark","yi-large-turbo","yi-large-preview",
+# "llama3-8b-8192", "gemma-7b-it", "mixtral-8x7b-32768", "llama3-70b-8192",
+# "yi-34b-chat-0205","yi-34b-chat-200k","yi-large","yi-medium","yi-spark","yi-large-turbo","yi-large-preview","yi-vision"
# ]
# --- --- --- ---
# 此外,您还可以在接入one-api/vllm/ollama时,
# 使用"one-api-*","vllm-*","ollama-*"前缀直接使用非标准方式接入的模型,例如
# AVAIL_LLM_MODELS = ["one-api-claude-3-sonnet-20240229(max_token=100000)", "ollama-phi3(max_token=4096)"]
+# 在接入多模态模型时,可以使用"one-api-vision-*"前缀接入,例如
+# AVAIL_LLM_MODELS = ["one-api-vision-gpt-4o(max_token=32000)"]
# --- --- --- ---
@@ -127,7 +131,7 @@
QWEN_LOCAL_MODEL_SELECTION = "Qwen/Qwen-1_8B-Chat-Int8"
-# 接入通义千问在线大模型 https://dashscope.console.aliyun.com/
+# 接入通义千问在线大模型 https://bailian.console.aliyun.com/
DASHSCOPE_API_KEY = "" # 阿里灵积云API_KEY
@@ -229,14 +233,20 @@
# 零一万物(Yi Model) API KEY
YIMODEL_API_KEY = ""
+
# 深度求索(DeepSeek) API KEY,默认请求地址为"https://api.deepseek.com/v1/chat/completions"
DEEPSEEK_API_KEY = ""
+
# Mathpix 拥有执行PDF的OCR功能,但是需要注册账号
MATHPIX_APPID = ""
MATHPIX_APPKEY = ""
+# Groq API KEY,默认请求地址为"https://api.groq.com/openai/v1/chat/completions"
+GROQ_API_KEY = ""
+
+
# DOC2X的PDF解析服务,注册账号并获取API KEY: https://doc2x.noedgeai.com/login
DOC2X_API_KEY = ""
diff --git a/request_llms/bridge_all.py b/request_llms/bridge_all.py
index 06e695835..ea7612889 100644
--- a/request_llms/bridge_all.py
+++ b/request_llms/bridge_all.py
@@ -72,6 +72,8 @@ def decode(self, *args, **kwargs):
ollama_endpoint = "http://localhost:11434/api/chat"
yimodel_endpoint = "https://api.lingyiwanwu.com/v1/chat/completions"
deepseekapi_endpoint = "https://api.deepseek.com/v1/chat/completions"
+qwenapi_endpoint = "https://dashscope.aliyuncs.com/compatible-mode/v1/chat/completions"
+groq_endpoint = "https://api.groq.com/openai/v1/chat/completions"
if not AZURE_ENDPOINT.endswith('/'): AZURE_ENDPOINT += '/'
azure_endpoint = AZURE_ENDPOINT + f'openai/deployments/{AZURE_ENGINE}/chat/completions?api-version=2023-05-15'
@@ -93,6 +95,8 @@ def decode(self, *args, **kwargs):
if ollama_endpoint in API_URL_REDIRECT: ollama_endpoint = API_URL_REDIRECT[ollama_endpoint]
if yimodel_endpoint in API_URL_REDIRECT: yimodel_endpoint = API_URL_REDIRECT[yimodel_endpoint]
if deepseekapi_endpoint in API_URL_REDIRECT: deepseekapi_endpoint = API_URL_REDIRECT[deepseekapi_endpoint]
+if qwenapi_endpoint in API_URL_REDIRECT: qwenapi_endpoint = API_URL_REDIRECT[qwenapi_endpoint]
+if groq_endpoint in API_URL_REDIRECT: groq_endpoint = API_URL_REDIRECT[groq_endpoint]
# 获取tokenizer
tokenizer_gpt35 = LazyloadTiktoken("gpt-3.5-turbo")
@@ -640,43 +644,118 @@ def decode(self, *args, **kwargs):
except:
print(trimmed_format_exc())
# -=-=-=-=-=-=- 通义-在线模型 -=-=-=-=-=-=-
-if "qwen-turbo" in AVAIL_LLM_MODELS or "qwen-plus" in AVAIL_LLM_MODELS or "qwen-max" in AVAIL_LLM_MODELS: # zhipuai
+qwen_models = [
+ "qwen-turbo",
+ "qwen-plus",
+ "qwen-max",
+ "qwen-max-longcontext",
+ "qwen-long",
+ "qwen2-72b-instruct",
+ "qwen2-57b-a14b-instruct",
+ "qwen2-7b-instruct",
+ "qwen1.5-110b-chat",
+]
+if any(item in qwen_models for item in AVAIL_LLM_MODELS):
try:
- from .bridge_qwen import predict_no_ui_long_connection as qwen_noui
- from .bridge_qwen import predict as qwen_ui
+ qwen_1500_noui, qwen_1500_ui = get_predict_function(
+ api_key_conf_name="DASHSCOPE_API_KEY", max_output_token=1500, disable_proxy=False
+ )
+ qwen_2000_noui, qwen_2000_ui = get_predict_function(
+ api_key_conf_name="DASHSCOPE_API_KEY", max_output_token=2000, disable_proxy=False
+ )
+ qwen_6144_noui, qwen_6144_ui = get_predict_function(
+ api_key_conf_name="DASHSCOPE_API_KEY", max_output_token=6144, disable_proxy=False
+ )
+ qwen_8000_noui, qwen_8000_ui = get_predict_function(
+ api_key_conf_name="DASHSCOPE_API_KEY", max_output_token=8000, disable_proxy=False
+ )
model_info.update({
"qwen-turbo": {
- "fn_with_ui": qwen_ui,
- "fn_without_ui": qwen_noui,
+ "fn_with_ui": qwen_1500_ui,
+ "fn_without_ui": qwen_1500_noui,
"can_multi_thread": True,
- "endpoint": None,
+ "endpoint": qwenapi_endpoint,
"max_token": 6144,
"tokenizer": tokenizer_gpt35,
"token_cnt": get_token_num_gpt35,
},
"qwen-plus": {
- "fn_with_ui": qwen_ui,
- "fn_without_ui": qwen_noui,
+ "fn_with_ui": qwen_2000_ui,
+ "fn_without_ui": qwen_2000_noui,
"can_multi_thread": True,
- "endpoint": None,
+ "endpoint": qwenapi_endpoint,
"max_token": 30720,
"tokenizer": tokenizer_gpt35,
"token_cnt": get_token_num_gpt35,
},
"qwen-max": {
- "fn_with_ui": qwen_ui,
- "fn_without_ui": qwen_noui,
+ "fn_with_ui": qwen_2000_ui,
+ "fn_without_ui": qwen_2000_noui,
"can_multi_thread": True,
- "endpoint": None,
+ "endpoint": qwenapi_endpoint,
+ "max_token": 6144,
+ "tokenizer": tokenizer_gpt35,
+ "token_cnt": get_token_num_gpt35,
+ },
+ "qwen-max-longcontext": {
+ "fn_with_ui": qwen_2000_ui,
+ "fn_without_ui": qwen_2000_noui,
+ "can_multi_thread": True,
+ "endpoint": qwenapi_endpoint,
"max_token": 28672,
"tokenizer": tokenizer_gpt35,
"token_cnt": get_token_num_gpt35,
- }
+ },
+ "qwen-long": {
+ "fn_with_ui": qwen_2000_ui,
+ "fn_without_ui": qwen_2000_noui,
+ "can_multi_thread": True,
+ "endpoint": qwenapi_endpoint,
+ "max_token": 1000000,
+ "tokenizer": tokenizer_gpt35,
+ "token_cnt": get_token_num_gpt35,
+ },
+ "qwen2-72b-instruct": {
+ "fn_with_ui": qwen_6144_ui,
+ "fn_without_ui": qwen_6144_noui,
+ "can_multi_thread": True,
+ "endpoint": qwenapi_endpoint,
+ "max_token": 128000,
+ "tokenizer": tokenizer_gpt35,
+ "token_cnt": get_token_num_gpt35,
+ },
+ "qwen2-57b-a14b-instruct": {
+ "fn_with_ui": qwen_6144_ui,
+ "fn_without_ui": qwen_6144_noui,
+ "can_multi_thread": True,
+ "endpoint": qwenapi_endpoint,
+ "max_token": 30720,
+ "tokenizer": tokenizer_gpt35,
+ "token_cnt": get_token_num_gpt35,
+ },
+ "qwen2-7b-instruct": {
+ "fn_with_ui": qwen_6144_ui,
+ "fn_without_ui": qwen_6144_noui,
+ "can_multi_thread": True,
+ "endpoint": qwenapi_endpoint,
+ "max_token": 128000,
+ "tokenizer": tokenizer_gpt35,
+ "token_cnt": get_token_num_gpt35,
+ },
+ "qwen1.5-110b-chat": {
+ "fn_with_ui": qwen_8000_ui,
+ "fn_without_ui": qwen_8000_noui,
+ "can_multi_thread": True,
+ "endpoint": qwenapi_endpoint,
+ "max_token": 32000,
+ "tokenizer": tokenizer_gpt35,
+ "token_cnt": get_token_num_gpt35,
+ },
})
except:
print(trimmed_format_exc())
# -=-=-=-=-=-=- 零一万物模型 -=-=-=-=-=-=-
-yi_models = ["yi-34b-chat-0205","yi-34b-chat-200k","yi-large","yi-medium","yi-spark","yi-large-turbo","yi-large-preview"]
+yi_models = ["yi-34b-chat-0205","yi-34b-chat-200k","yi-large","yi-medium","yi-spark","yi-large-turbo","yi-large-preview","yi-vision"]
if any(item in yi_models for item in AVAIL_LLM_MODELS):
try:
yimodel_4k_noui, yimodel_4k_ui = get_predict_function(
@@ -688,6 +767,23 @@ def decode(self, *args, **kwargs):
yimodel_200k_noui, yimodel_200k_ui = get_predict_function(
api_key_conf_name="YIMODEL_API_KEY", max_output_token=4096, disable_proxy=False
)
+ if "yi-vision" in AVAIL_LLM_MODELS:
+ from .bridge_yi_vision import yi_version_generate_message_version
+ yimodel_version_noui, yimodel_version_ui = get_predict_function(
+ api_key_conf_name="YIMODEL_API_KEY", max_output_token=600, disable_proxy=False, encode_call=yi_version_generate_message_version
+ )
+ model_info.update({
+ "yi-vision": {
+ "fn_with_ui": yimodel_version_ui,
+ "fn_without_ui": yimodel_version_noui,
+ "can_multi_thread": True,
+ "endpoint": yimodel_endpoint,
+ "max_token": 4000,
+ "tokenizer": tokenizer_gpt35,
+ "token_cnt": get_token_num_gpt35,
+ }
+ })
+
model_info.update({
"yi-34b-chat-0205": {
"fn_with_ui": yimodel_4k_ui,
@@ -892,6 +988,52 @@ def decode(self, *args, **kwargs):
})
except:
print(trimmed_format_exc())
+# -=-=-=-=-=-=- groq -=-=-=-=-=-=-
+groq_models = ["llama3-8b-8192", "gemma-7b-it", "mixtral-8x7b-32768", "llama3-70b-8192"]
+if any(item in groq_models for item in AVAIL_LLM_MODELS):
+ try:
+ groq_8k_noui, groq_8k_ui = get_predict_function(
+ api_key_conf_name="GROQ_API_KEY", max_output_token=8192, disable_proxy=False
+ )
+ groq_32k_noui, groq_32k_ui = get_predict_function(
+ api_key_conf_name="GROQ_API_KEY", max_output_token=32768, disable_proxy=False
+ )
+ model_info.update({
+ "llama3-8b-8192": {
+ "fn_with_ui": groq_8k_ui,
+ "fn_without_ui": groq_8k_noui,
+ "endpoint": groq_endpoint,
+ "max_token": 8192,
+ "tokenizer": tokenizer_gpt35,
+ "token_cnt": get_token_num_gpt35,
+ },
+ "gemma-7b-it": {
+ "fn_with_ui": groq_8k_ui,
+ "fn_without_ui": groq_8k_noui,
+ "endpoint": groq_endpoint,
+ "max_token": 8192,
+ "tokenizer": tokenizer_gpt35,
+ "token_cnt": get_token_num_gpt35,
+ },
+ "mixtral-8x7b-32768": {
+ "fn_with_ui": groq_32k_ui,
+ "fn_without_ui": groq_32k_noui,
+ "endpoint": groq_endpoint,
+ "max_token": 32768,
+ "tokenizer": tokenizer_gpt35,
+ "token_cnt": get_token_num_gpt35,
+ },
+ "llama3-70b-8192": {
+ "fn_with_ui": groq_8k_ui,
+ "fn_without_ui": groq_8k_noui,
+ "endpoint": groq_endpoint,
+ "max_token": 8192,
+ "tokenizer": tokenizer_gpt35,
+ "token_cnt": get_token_num_gpt35,
+ },
+ })
+ except:
+ print(trimmed_format_exc())
# -=-=-=-=-=-=- one-api 对齐支持 -=-=-=-=-=-=-
for model in [m for m in AVAIL_LLM_MODELS if m.startswith("one-api-")]:
# 为了更灵活地接入one-api多模型管理界面,设计了此接口,例子:AVAIL_LLM_MODELS = ["one-api-mixtral-8x7b(max_token=6666)"]
@@ -915,6 +1057,36 @@ def decode(self, *args, **kwargs):
"token_cnt": get_token_num_gpt35,
},
})
+# -=-=-=-=-=-=- one-api-vision 对齐支持 -=-=-=-=-=-=-
+for model in [m for m in AVAIL_LLM_MODELS if m.startswith("one-api-vision-")]:
+ # 为了更灵活地接入one-api多模型管理界面中的多模态模型,设计了此接口,例子:AVAIL_LLM_MODELS = ["one-api-vision-gpt-4o(max_token=32000)"]
+ # 其中
+ # "one-api-vision-" 是前缀(必要)
+ # "gpt-4o" 是模型名(必要)
+ # "(max_token=32000)" 是配置(非必要)
+ try:
+ _, max_token_tmp = read_one_api_model_name(model)
+ except:
+ print(f"one-api-vision模型 {model} 的 max_token 配置不是整数,请检查配置文件。")
+ continue
+ try:
+ from .oai_vision_std import generate_message_version
+ one_api_version_noui, one_api_version_ui = get_predict_function(
+ api_key_conf_name="API_KEY", max_output_token=4000, disable_proxy=False, encode_call=generate_message_version
+ )
+ model_info.update({
+ model: {
+ "fn_with_ui": one_api_version_ui,
+ "fn_without_ui": one_api_version_noui,
+ "can_multi_thread": True,
+ "endpoint": openai_endpoint,
+ "max_token": max_token_tmp,
+ "tokenizer": tokenizer_gpt35,
+ "token_cnt": get_token_num_gpt35,
+ },
+ })
+ except:
+ print(trimmed_format_exc())
# -=-=-=-=-=-=- vllm 对齐支持 -=-=-=-=-=-=-
for model in [m for m in AVAIL_LLM_MODELS if m.startswith("vllm-")]:
# 为了更灵活地接入vllm多模型管理界面,设计了此接口,例子:AVAIL_LLM_MODELS = ["vllm-/home/hmp/llm/cache/Qwen1___5-32B-Chat(max_token=6666)"]
diff --git a/request_llms/bridge_qwen.py b/request_llms/bridge_qwen.py
deleted file mode 100644
index 2b1eeed27..000000000
--- a/request_llms/bridge_qwen.py
+++ /dev/null
@@ -1,66 +0,0 @@
-import time
-import os
-from toolbox import update_ui, get_conf, update_ui_lastest_msg
-from toolbox import check_packages, report_exception
-
-model_name = 'Qwen'
-
-def predict_no_ui_long_connection(inputs:str, llm_kwargs:dict, history:list=[], sys_prompt:str="",
- observe_window:list=[], console_slience:bool=False):
- """
- ⭐多线程方法
- 函数的说明请见 request_llms/bridge_all.py
- """
- watch_dog_patience = 5
- response = ""
-
- from .com_qwenapi import QwenRequestInstance
- sri = QwenRequestInstance()
- for response in sri.generate(inputs, llm_kwargs, history, sys_prompt):
- if len(observe_window) >= 1:
- observe_window[0] = response
- if len(observe_window) >= 2:
- if (time.time()-observe_window[1]) > watch_dog_patience: raise RuntimeError("程序终止。")
- return response
-
-def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_prompt='', stream = True, additional_fn=None):
- """
- ⭐单线程方法
- 函数的说明请见 request_llms/bridge_all.py
- """
- chatbot.append((inputs, ""))
- yield from update_ui(chatbot=chatbot, history=history)
-
- # 尝试导入依赖,如果缺少依赖,则给出安装建议
- try:
- check_packages(["dashscope"])
- except:
- yield from update_ui_lastest_msg(f"导入软件依赖失败。使用该模型需要额外依赖,安装方法```pip install --upgrade dashscope```。",
- chatbot=chatbot, history=history, delay=0)
- return
-
- # 检查DASHSCOPE_API_KEY
- if get_conf("DASHSCOPE_API_KEY") == "":
- yield from update_ui_lastest_msg(f"请配置 DASHSCOPE_API_KEY。",
- chatbot=chatbot, history=history, delay=0)
- return
-
- if additional_fn is not None:
- from core_functional import handle_core_functionality
- inputs, history = handle_core_functionality(additional_fn, inputs, history, chatbot)
- chatbot[-1] = (inputs, "")
- yield from update_ui(chatbot=chatbot, history=history)
-
- # 开始接收回复
- from .com_qwenapi import QwenRequestInstance
- sri = QwenRequestInstance()
- response = f"[Local Message] 等待{model_name}响应中 ..."
- for response in sri.generate(inputs, llm_kwargs, history, system_prompt):
- chatbot[-1] = (inputs, response)
- yield from update_ui(chatbot=chatbot, history=history)
-
- # 总结输出
- if response == f"[Local Message] 等待{model_name}响应中 ...":
- response = f"[Local Message] {model_name}响应异常 ..."
- history.extend([inputs, response])
- yield from update_ui(chatbot=chatbot, history=history)
\ No newline at end of file
diff --git a/request_llms/bridge_yi_vision.py b/request_llms/bridge_yi_vision.py
new file mode 100644
index 000000000..d0cdef53f
--- /dev/null
+++ b/request_llms/bridge_yi_vision.py
@@ -0,0 +1,88 @@
+from toolbox import encode_image, every_image_file_in_path ,read_one_api_model_name
+from .oai_vision_std import multiple_picture_types
+
+timeout_bot_msg = (
+ "[Local Message] Request timeout. Network error. Please check proxy settings in config.py."
+ + "网络错误,检查代理服务器是否可用,以及代理设置的格式是否正确,格式须是[协议]://[地址]:[端口],缺一不可。"
+)
+
+def yi_version_generate_message_version(
+ chatbot, input, model, key, history, max_output_token, system_prompt, temperature
+):
+ """
+ 整合所有信息,选择LLM模型,生成http请求,为发送请求做准备
+ """
+ if chatbot != None:
+ have_recent_file, image_paths = every_image_file_in_path(chatbot)
+ else:
+ have_recent_file = False
+ image_paths = []
+ conversation_cnt = len(history) // 2
+ messages = []
+ input = system_prompt + "\n" + input
+
+ # def make_media_input(inputs, image_paths):
+ # for image_path in image_paths:
+ # inputs = (
+ # inputs
+ # + f'