文件
gpt_academic/crazy_functions/review_fns/data_sources/unpaywall_source.py
2025-06-22 18:31:41 +08:00

46 行
1.7 KiB
Python

import aiohttp
from typing import List, Dict, Optional
from datetime import datetime
from .base_source import DataSource, PaperMetadata
class UnpaywallSource(DataSource):
"""Unpaywall API实现"""
def _initialize(self) -> None:
self.base_url = "https://api.unpaywall.org/v2"
self.email = self.api_key # Unpaywall使用email作为API key
async def search(self, query: str, limit: int = 100) -> List[PaperMetadata]:
async with aiohttp.ClientSession() as session:
async with session.get(
f"{self.base_url}/search",
params={
"query": query,
"email": self.email,
"limit": limit
}
) as response:
data = await response.json()
return [self._parse_response(item.response)
for item in data.get("results", [])]
def _parse_response(self, data: Dict) -> PaperMetadata:
"""解析Unpaywall返回的数据"""
return PaperMetadata(
title=data.get("title", ""),
authors=[
f"{author.get('given', '')} {author.get('family', '')}"
for author in data.get("z_authors", [])
],
institutions=[
aff.get("name", "")
for author in data.get("z_authors", [])
for aff in author.get("affiliation", [])
],
abstract="", # Unpaywall不提供摘要
year=data.get("year"),
doi=data.get("doi"),
url=data.get("doi_url"),
citations=None, # Unpaywall不提供引用计数
venue=data.get("journal_name")
)