镜像自地址
https://github.com/binary-husky/gpt_academic.git
已同步 2025-12-06 06:26:47 +00:00
Master 4.0 (#2210)
* stage academic conversation * stage document conversation * fix buggy gradio version * file dynamic load * merge more academic plugins * accelerate nltk * feat: 为predict函数添加文件和URL读取功能 - 添加URL检测和网页内容提取功能,支持自动提取网页文本 - 添加文件路径识别和文件内容读取功能,支持private_upload路径格式 - 集成WebTextExtractor处理网页内容提取 - 集成TextContentLoader处理本地文件读取 - 支持文件路径与问题组合的智能处理 * back * block unstable --------- Co-authored-by: XiaoBoAI <liuboyin2019@ia.ac.cn>
这个提交包含在:
@@ -0,0 +1,46 @@
|
||||
import aiohttp
|
||||
from typing import List, Dict, Optional
|
||||
from datetime import datetime
|
||||
from .base_source import DataSource, PaperMetadata
|
||||
|
||||
class UnpaywallSource(DataSource):
|
||||
"""Unpaywall API实现"""
|
||||
|
||||
def _initialize(self) -> None:
|
||||
self.base_url = "https://api.unpaywall.org/v2"
|
||||
self.email = self.api_key # Unpaywall使用email作为API key
|
||||
|
||||
async def search(self, query: str, limit: int = 100) -> List[PaperMetadata]:
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with session.get(
|
||||
f"{self.base_url}/search",
|
||||
params={
|
||||
"query": query,
|
||||
"email": self.email,
|
||||
"limit": limit
|
||||
}
|
||||
) as response:
|
||||
data = await response.json()
|
||||
return [self._parse_response(item.response)
|
||||
for item in data.get("results", [])]
|
||||
|
||||
def _parse_response(self, data: Dict) -> PaperMetadata:
|
||||
"""解析Unpaywall返回的数据"""
|
||||
return PaperMetadata(
|
||||
title=data.get("title", ""),
|
||||
authors=[
|
||||
f"{author.get('given', '')} {author.get('family', '')}"
|
||||
for author in data.get("z_authors", [])
|
||||
],
|
||||
institutions=[
|
||||
aff.get("name", "")
|
||||
for author in data.get("z_authors", [])
|
||||
for aff in author.get("affiliation", [])
|
||||
],
|
||||
abstract="", # Unpaywall不提供摘要
|
||||
year=data.get("year"),
|
||||
doi=data.get("doi"),
|
||||
url=data.get("doi_url"),
|
||||
citations=None, # Unpaywall不提供引用计数
|
||||
venue=data.get("journal_name")
|
||||
)
|
||||
在新工单中引用
屏蔽一个用户