GROBID服务代理访问支持

这个提交包含在:
binary-husky
2023-09-27 15:40:55 +08:00
父节点 9cb05e5724
当前提交 0844b6e9cf
共有 2 个文件被更改,包括 7 次插入4 次删除

查看文件

@@ -2,6 +2,8 @@ from functools import lru_cache
from toolbox import gen_time_str
from toolbox import promote_file_to_downloadzone
from toolbox import write_history_to_file, promote_file_to_downloadzone
from toolbox import get_conf
from toolbox import ProxyNetworkActivate
from colorful import *
import requests
import random
@@ -12,13 +14,13 @@ import math
class GROBID_OFFLINE_EXCEPTION(Exception): pass
def get_avail_grobid_url():
from toolbox import get_conf
GROBID_URLS, = get_conf('GROBID_URLS')
if len(GROBID_URLS) == 0: return None
try:
_grobid_url = random.choice(GROBID_URLS) # 随机负载均衡
if _grobid_url.endswith('/'): _grobid_url = _grobid_url.rstrip('/')
res = requests.get(_grobid_url+'/api/isalive')
with ProxyNetworkActivate('Connect_Grobid'):
res = requests.get(_grobid_url+'/api/isalive')
if res.text=='true': return _grobid_url
else: return None
except:
@@ -29,7 +31,8 @@ def parse_pdf(pdf_path, grobid_url):
import scipdf # pip install scipdf_parser
if grobid_url.endswith('/'): grobid_url = grobid_url.rstrip('/')
try:
article_dict = scipdf.parse_pdf_to_dict(pdf_path, grobid_url=grobid_url)
with ProxyNetworkActivate('Connect_Grobid'):
article_dict = scipdf.parse_pdf_to_dict(pdf_path, grobid_url=grobid_url)
except GROBID_OFFLINE_EXCEPTION:
raise GROBID_OFFLINE_EXCEPTION("GROBID服务不可用,请修改config中的GROBID_URL,可修改成本地GROBID服务。")
except: