镜像自地址
https://github.com/binary-husky/gpt_academic.git
已同步 2025-12-06 06:26:47 +00:00
Master 4.0 (#2210)
* stage academic conversation * stage document conversation * fix buggy gradio version * file dynamic load * merge more academic plugins * accelerate nltk * feat: 为predict函数添加文件和URL读取功能 - 添加URL检测和网页内容提取功能,支持自动提取网页文本 - 添加文件路径识别和文件内容读取功能,支持private_upload路径格式 - 集成WebTextExtractor处理网页内容提取 - 集成TextContentLoader处理本地文件读取 - 支持文件路径与问题组合的智能处理 * back * block unstable --------- Co-authored-by: XiaoBoAI <liuboyin2019@ia.ac.cn>
这个提交包含在:
@@ -0,0 +1,102 @@
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import List, Dict, Optional
|
||||
from dataclasses import dataclass
|
||||
|
||||
class PaperMetadata:
|
||||
"""论文元数据"""
|
||||
def __init__(
|
||||
self,
|
||||
title: str,
|
||||
authors: List[str],
|
||||
abstract: str,
|
||||
year: int,
|
||||
doi: str = None,
|
||||
url: str = None,
|
||||
citations: int = None,
|
||||
venue: str = None,
|
||||
institutions: List[str] = None,
|
||||
venue_type: str = None, # 来源类型(journal/conference/preprint等)
|
||||
venue_name: str = None, # 具体的期刊/会议名称
|
||||
venue_info: Dict = None, # 更多来源详细信息(如影响因子、分区等)
|
||||
source: str = None # 新增: 论文来源标记
|
||||
):
|
||||
self.title = title
|
||||
self.authors = authors
|
||||
self.abstract = abstract
|
||||
self.year = year
|
||||
self.doi = doi
|
||||
self.url = url
|
||||
self.citations = citations
|
||||
self.venue = venue
|
||||
self.institutions = institutions or []
|
||||
self.venue_type = venue_type # 新增
|
||||
self.venue_name = venue_name # 新增
|
||||
self.venue_info = venue_info or {} # 新增
|
||||
self.source = source # 新增: 存储论文来源
|
||||
|
||||
# 新增:影响因子和分区信息,初始化为None
|
||||
self._if_factor = None
|
||||
self._cas_division = None
|
||||
self._jcr_division = None
|
||||
|
||||
@property
|
||||
def if_factor(self) -> Optional[float]:
|
||||
"""获取影响因子"""
|
||||
return self._if_factor
|
||||
|
||||
@if_factor.setter
|
||||
def if_factor(self, value: float):
|
||||
"""设置影响因子"""
|
||||
self._if_factor = value
|
||||
|
||||
@property
|
||||
def cas_division(self) -> Optional[str]:
|
||||
"""获取中科院分区"""
|
||||
return self._cas_division
|
||||
|
||||
@cas_division.setter
|
||||
def cas_division(self, value: str):
|
||||
"""设置中科院分区"""
|
||||
self._cas_division = value
|
||||
|
||||
@property
|
||||
def jcr_division(self) -> Optional[str]:
|
||||
"""获取JCR分区"""
|
||||
return self._jcr_division
|
||||
|
||||
@jcr_division.setter
|
||||
def jcr_division(self, value: str):
|
||||
"""设置JCR分区"""
|
||||
self._jcr_division = value
|
||||
|
||||
class DataSource(ABC):
|
||||
"""数据源基类"""
|
||||
|
||||
def __init__(self, api_key: Optional[str] = None):
|
||||
self.api_key = api_key
|
||||
self._initialize()
|
||||
|
||||
@abstractmethod
|
||||
def _initialize(self) -> None:
|
||||
"""初始化数据源"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def search(self, query: str, limit: int = 100) -> List[PaperMetadata]:
|
||||
"""搜索论文"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def get_paper_details(self, paper_id: str) -> PaperMetadata:
|
||||
"""获取论文详细信息"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def get_citations(self, paper_id: str) -> List[PaperMetadata]:
|
||||
"""获取引用该论文的文献"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def get_references(self, paper_id: str) -> List[PaperMetadata]:
|
||||
"""获取该论文引用的文献"""
|
||||
pass
|
||||
在新工单中引用
屏蔽一个用户