镜像自地址
https://github.com/binary-husky/gpt_academic.git
已同步 2025-12-06 14:36:48 +00:00
Master 4.0 (#2210)
* stage academic conversation * stage document conversation * fix buggy gradio version * file dynamic load * merge more academic plugins * accelerate nltk * feat: 为predict函数添加文件和URL读取功能 - 添加URL检测和网页内容提取功能,支持自动提取网页文本 - 添加文件路径识别和文件内容读取功能,支持private_upload路径格式 - 集成WebTextExtractor处理网页内容提取 - 集成TextContentLoader处理本地文件读取 - 支持文件路径与问题组合的智能处理 * back * block unstable --------- Co-authored-by: XiaoBoAI <liuboyin2019@ia.ac.cn>
这个提交包含在:
@@ -0,0 +1,142 @@
|
||||
import json
|
||||
import os
|
||||
from typing import Dict, Optional
|
||||
|
||||
class JournalMetrics:
|
||||
"""期刊指标管理类"""
|
||||
|
||||
def __init__(self):
|
||||
self.journal_data: Dict = {} # 期刊名称到指标的映射
|
||||
self.issn_map: Dict = {} # ISSN到指标的映射
|
||||
self.name_map: Dict = {} # 标准化名称到指标的映射
|
||||
self._load_journal_data()
|
||||
|
||||
def _normalize_journal_name(self, name: str) -> str:
|
||||
"""标准化期刊名称
|
||||
|
||||
Args:
|
||||
name: 原始期刊名称
|
||||
|
||||
Returns:
|
||||
标准化后的期刊名称
|
||||
"""
|
||||
if not name:
|
||||
return ""
|
||||
|
||||
# 转换为小写
|
||||
name = name.lower()
|
||||
|
||||
# 移除常见的前缀和后缀
|
||||
prefixes = ['the ', 'proceedings of ', 'journal of ']
|
||||
suffixes = [' journal', ' proceedings', ' magazine', ' review', ' letters']
|
||||
|
||||
for prefix in prefixes:
|
||||
if name.startswith(prefix):
|
||||
name = name[len(prefix):]
|
||||
|
||||
for suffix in suffixes:
|
||||
if name.endswith(suffix):
|
||||
name = name[:-len(suffix)]
|
||||
|
||||
# 移除特殊字符,保留字母、数字和空格
|
||||
name = ''.join(c for c in name if c.isalnum() or c.isspace())
|
||||
|
||||
# 移除多余的空格
|
||||
name = ' '.join(name.split())
|
||||
|
||||
return name
|
||||
|
||||
def _convert_if_value(self, if_str: str) -> Optional[float]:
|
||||
"""转换IF值为float,处理特殊情况"""
|
||||
try:
|
||||
if if_str.startswith('<'):
|
||||
# 对于<0.1这样的值,返回0.1
|
||||
return float(if_str.strip('<'))
|
||||
return float(if_str)
|
||||
except (ValueError, AttributeError):
|
||||
return None
|
||||
|
||||
def _load_journal_data(self):
|
||||
"""加载期刊数据"""
|
||||
try:
|
||||
file_path = os.path.join(os.path.dirname(__file__), 'cas_if.json')
|
||||
with open(file_path, 'r', encoding='utf-8') as f:
|
||||
data = json.load(f)
|
||||
|
||||
# 建立期刊名称到指标的映射
|
||||
for journal in data:
|
||||
# 准备指标数据
|
||||
metrics = {
|
||||
'if_factor': self._convert_if_value(journal.get('IF')),
|
||||
'jcr_division': journal.get('Q'),
|
||||
'cas_division': journal.get('B')
|
||||
}
|
||||
|
||||
# 存储期刊名称映射(使用标准化名称)
|
||||
if journal.get('journal'):
|
||||
normalized_name = self._normalize_journal_name(journal['journal'])
|
||||
self.journal_data[normalized_name] = metrics
|
||||
self.name_map[normalized_name] = metrics
|
||||
|
||||
# 存储期刊缩写映射
|
||||
if journal.get('jabb'):
|
||||
normalized_abbr = self._normalize_journal_name(journal['jabb'])
|
||||
self.journal_data[normalized_abbr] = metrics
|
||||
self.name_map[normalized_abbr] = metrics
|
||||
|
||||
# 存储ISSN映射
|
||||
if journal.get('issn'):
|
||||
self.issn_map[journal['issn']] = metrics
|
||||
if journal.get('eissn'):
|
||||
self.issn_map[journal['eissn']] = metrics
|
||||
|
||||
except Exception as e:
|
||||
print(f"加载期刊数据时出错: {str(e)}")
|
||||
self.journal_data = {}
|
||||
self.issn_map = {}
|
||||
self.name_map = {}
|
||||
|
||||
def get_journal_metrics(self, venue_name: str, venue_info: dict) -> dict:
|
||||
"""获取期刊指标
|
||||
|
||||
Args:
|
||||
venue_name: 期刊名称
|
||||
venue_info: 期刊详细信息
|
||||
|
||||
Returns:
|
||||
包含期刊指标的字典
|
||||
"""
|
||||
try:
|
||||
metrics = {}
|
||||
|
||||
# 1. 首先尝试通过ISSN匹配
|
||||
if venue_info and 'issn' in venue_info:
|
||||
issn_value = venue_info['issn']
|
||||
# 处理ISSN可能是列表的情况
|
||||
if isinstance(issn_value, list):
|
||||
# 尝试每个ISSN
|
||||
for issn in issn_value:
|
||||
metrics = self.issn_map.get(issn, {})
|
||||
if metrics: # 如果找到匹配的指标,就停止搜索
|
||||
break
|
||||
else: # ISSN是字符串的情况
|
||||
metrics = self.issn_map.get(issn_value, {})
|
||||
|
||||
# 2. 如果ISSN匹配失败,尝试通过期刊名称匹配
|
||||
if not metrics and venue_name:
|
||||
# 标准化期刊名称
|
||||
normalized_name = self._normalize_journal_name(venue_name)
|
||||
metrics = self.name_map.get(normalized_name, {})
|
||||
|
||||
# 如果完全匹配失败,尝试部分匹配
|
||||
# if not metrics:
|
||||
# for db_name, db_metrics in self.name_map.items():
|
||||
# if normalized_name in db_name:
|
||||
# metrics = db_metrics
|
||||
# break
|
||||
|
||||
return metrics
|
||||
|
||||
except Exception as e:
|
||||
print(f"获取期刊指标时出错: {str(e)}")
|
||||
return {}
|
||||
在新工单中引用
屏蔽一个用户