镜像自地址
https://github.com/binary-husky/gpt_academic.git
已同步 2025-12-09 16:06:48 +00:00
merge more academic plugins
这个提交包含在:
@@ -0,0 +1,386 @@
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import List, Dict, Any
|
||||
from ..query_analyzer import SearchCriteria
|
||||
from ..sources.github_source import GitHubSource
|
||||
import asyncio
|
||||
import re
|
||||
from datetime import datetime
|
||||
|
||||
class BaseHandler(ABC):
|
||||
"""处理器基类"""
|
||||
|
||||
def __init__(self, github: GitHubSource, llm_kwargs: Dict = None):
|
||||
self.github = github
|
||||
self.llm_kwargs = llm_kwargs or {}
|
||||
self.ranked_repos = [] # 存储排序后的仓库列表
|
||||
|
||||
def _get_search_params(self, plugin_kwargs: Dict) -> Dict:
|
||||
"""获取搜索参数"""
|
||||
return {
|
||||
'max_repos': plugin_kwargs.get('max_repos', 150), # 最大仓库数量,从30改为150
|
||||
'max_details': plugin_kwargs.get('max_details', 80), # 最多展示详情的仓库数量,新增参数
|
||||
'search_multiplier': plugin_kwargs.get('search_multiplier', 3), # 检索倍数
|
||||
'min_stars': plugin_kwargs.get('min_stars', 0), # 最少星标数
|
||||
}
|
||||
|
||||
@abstractmethod
|
||||
async def handle(
|
||||
self,
|
||||
criteria: SearchCriteria,
|
||||
chatbot: List[List[str]],
|
||||
history: List[List[str]],
|
||||
system_prompt: str,
|
||||
llm_kwargs: Dict[str, Any],
|
||||
plugin_kwargs: Dict[str, Any],
|
||||
) -> str:
|
||||
"""处理查询"""
|
||||
pass
|
||||
|
||||
async def _search_repositories(self, query: str, language: str = None, min_stars: int = 0,
|
||||
sort: str = "stars", per_page: int = 30) -> List[Dict]:
|
||||
"""搜索仓库"""
|
||||
try:
|
||||
# 构建查询字符串
|
||||
if min_stars > 0 and "stars:>" not in query:
|
||||
query += f" stars:>{min_stars}"
|
||||
|
||||
if language and "language:" not in query:
|
||||
query += f" language:{language}"
|
||||
|
||||
# 执行搜索
|
||||
result = await self.github.search_repositories(
|
||||
query=query,
|
||||
sort=sort,
|
||||
per_page=per_page
|
||||
)
|
||||
|
||||
if result and "items" in result:
|
||||
return result["items"]
|
||||
return []
|
||||
except Exception as e:
|
||||
print(f"仓库搜索出错: {str(e)}")
|
||||
return []
|
||||
|
||||
async def _search_bilingual_repositories(self, english_query: str, chinese_query: str, language: str = None, min_stars: int = 0,
|
||||
sort: str = "stars", per_page: int = 30) -> List[Dict]:
|
||||
"""同时搜索中英文仓库并合并结果"""
|
||||
try:
|
||||
# 搜索英文仓库
|
||||
english_results = await self._search_repositories(
|
||||
query=english_query,
|
||||
language=language,
|
||||
min_stars=min_stars,
|
||||
sort=sort,
|
||||
per_page=per_page
|
||||
)
|
||||
|
||||
# 搜索中文仓库
|
||||
chinese_results = await self._search_repositories(
|
||||
query=chinese_query,
|
||||
language=language,
|
||||
min_stars=min_stars,
|
||||
sort=sort,
|
||||
per_page=per_page
|
||||
)
|
||||
|
||||
# 合并结果,去除重复项
|
||||
merged_results = []
|
||||
seen_repos = set()
|
||||
|
||||
# 优先添加英文结果
|
||||
for repo in english_results:
|
||||
repo_id = repo.get('id')
|
||||
if repo_id and repo_id not in seen_repos:
|
||||
seen_repos.add(repo_id)
|
||||
merged_results.append(repo)
|
||||
|
||||
# 添加中文结果(排除重复)
|
||||
for repo in chinese_results:
|
||||
repo_id = repo.get('id')
|
||||
if repo_id and repo_id not in seen_repos:
|
||||
seen_repos.add(repo_id)
|
||||
merged_results.append(repo)
|
||||
|
||||
# 按星标数重新排序
|
||||
merged_results.sort(key=lambda x: x.get('stargazers_count', 0), reverse=True)
|
||||
|
||||
return merged_results[:per_page] # 返回合并后的前per_page个结果
|
||||
except Exception as e:
|
||||
print(f"双语仓库搜索出错: {str(e)}")
|
||||
return []
|
||||
|
||||
async def _search_code(self, query: str, language: str = None, per_page: int = 30) -> List[Dict]:
|
||||
"""搜索代码"""
|
||||
try:
|
||||
# 构建查询字符串
|
||||
if language and "language:" not in query:
|
||||
query += f" language:{language}"
|
||||
|
||||
# 执行搜索
|
||||
result = await self.github.search_code(
|
||||
query=query,
|
||||
per_page=per_page
|
||||
)
|
||||
|
||||
if result and "items" in result:
|
||||
return result["items"]
|
||||
return []
|
||||
except Exception as e:
|
||||
print(f"代码搜索出错: {str(e)}")
|
||||
return []
|
||||
|
||||
async def _search_bilingual_code(self, english_query: str, chinese_query: str, language: str = None, per_page: int = 30) -> List[Dict]:
|
||||
"""同时搜索中英文代码并合并结果"""
|
||||
try:
|
||||
# 搜索英文代码
|
||||
english_results = await self._search_code(
|
||||
query=english_query,
|
||||
language=language,
|
||||
per_page=per_page
|
||||
)
|
||||
|
||||
# 搜索中文代码
|
||||
chinese_results = await self._search_code(
|
||||
query=chinese_query,
|
||||
language=language,
|
||||
per_page=per_page
|
||||
)
|
||||
|
||||
# 合并结果,去除重复项
|
||||
merged_results = []
|
||||
seen_files = set()
|
||||
|
||||
# 优先添加英文结果
|
||||
for item in english_results:
|
||||
# 使用文件URL作为唯一标识
|
||||
file_url = item.get('html_url', '')
|
||||
if file_url and file_url not in seen_files:
|
||||
seen_files.add(file_url)
|
||||
merged_results.append(item)
|
||||
|
||||
# 添加中文结果(排除重复)
|
||||
for item in chinese_results:
|
||||
file_url = item.get('html_url', '')
|
||||
if file_url and file_url not in seen_files:
|
||||
seen_files.add(file_url)
|
||||
merged_results.append(item)
|
||||
|
||||
# 对结果进行排序,优先显示匹配度高的结果
|
||||
# 由于无法直接获取匹配度,这里使用仓库的星标数作为替代指标
|
||||
merged_results.sort(key=lambda x: x.get('repository', {}).get('stargazers_count', 0), reverse=True)
|
||||
|
||||
return merged_results[:per_page] # 返回合并后的前per_page个结果
|
||||
except Exception as e:
|
||||
print(f"双语代码搜索出错: {str(e)}")
|
||||
return []
|
||||
|
||||
async def _search_users(self, query: str, per_page: int = 30) -> List[Dict]:
|
||||
"""搜索用户"""
|
||||
try:
|
||||
result = await self.github.search_users(
|
||||
query=query,
|
||||
per_page=per_page
|
||||
)
|
||||
|
||||
if result and "items" in result:
|
||||
return result["items"]
|
||||
return []
|
||||
except Exception as e:
|
||||
print(f"用户搜索出错: {str(e)}")
|
||||
return []
|
||||
|
||||
async def _search_bilingual_users(self, english_query: str, chinese_query: str, per_page: int = 30) -> List[Dict]:
|
||||
"""同时搜索中英文用户并合并结果"""
|
||||
try:
|
||||
# 搜索英文用户
|
||||
english_results = await self._search_users(
|
||||
query=english_query,
|
||||
per_page=per_page
|
||||
)
|
||||
|
||||
# 搜索中文用户
|
||||
chinese_results = await self._search_users(
|
||||
query=chinese_query,
|
||||
per_page=per_page
|
||||
)
|
||||
|
||||
# 合并结果,去除重复项
|
||||
merged_results = []
|
||||
seen_users = set()
|
||||
|
||||
# 优先添加英文结果
|
||||
for user in english_results:
|
||||
user_id = user.get('id')
|
||||
if user_id and user_id not in seen_users:
|
||||
seen_users.add(user_id)
|
||||
merged_results.append(user)
|
||||
|
||||
# 添加中文结果(排除重复)
|
||||
for user in chinese_results:
|
||||
user_id = user.get('id')
|
||||
if user_id and user_id not in seen_users:
|
||||
seen_users.add(user_id)
|
||||
merged_results.append(user)
|
||||
|
||||
# 按关注者数量进行排序
|
||||
merged_results.sort(key=lambda x: x.get('followers', 0), reverse=True)
|
||||
|
||||
return merged_results[:per_page] # 返回合并后的前per_page个结果
|
||||
except Exception as e:
|
||||
print(f"双语用户搜索出错: {str(e)}")
|
||||
return []
|
||||
|
||||
async def _search_topics(self, query: str, per_page: int = 30) -> List[Dict]:
|
||||
"""搜索主题"""
|
||||
try:
|
||||
result = await self.github.search_topics(
|
||||
query=query,
|
||||
per_page=per_page
|
||||
)
|
||||
|
||||
if result and "items" in result:
|
||||
return result["items"]
|
||||
return []
|
||||
except Exception as e:
|
||||
print(f"主题搜索出错: {str(e)}")
|
||||
return []
|
||||
|
||||
async def _search_bilingual_topics(self, english_query: str, chinese_query: str, per_page: int = 30) -> List[Dict]:
|
||||
"""同时搜索中英文主题并合并结果"""
|
||||
try:
|
||||
# 搜索英文主题
|
||||
english_results = await self._search_topics(
|
||||
query=english_query,
|
||||
per_page=per_page
|
||||
)
|
||||
|
||||
# 搜索中文主题
|
||||
chinese_results = await self._search_topics(
|
||||
query=chinese_query,
|
||||
per_page=per_page
|
||||
)
|
||||
|
||||
# 合并结果,去除重复项
|
||||
merged_results = []
|
||||
seen_topics = set()
|
||||
|
||||
# 优先添加英文结果
|
||||
for topic in english_results:
|
||||
topic_name = topic.get('name')
|
||||
if topic_name and topic_name not in seen_topics:
|
||||
seen_topics.add(topic_name)
|
||||
merged_results.append(topic)
|
||||
|
||||
# 添加中文结果(排除重复)
|
||||
for topic in chinese_results:
|
||||
topic_name = topic.get('name')
|
||||
if topic_name and topic_name not in seen_topics:
|
||||
seen_topics.add(topic_name)
|
||||
merged_results.append(topic)
|
||||
|
||||
# 可以按流行度进行排序(如果有)
|
||||
if merged_results and 'featured' in merged_results[0]:
|
||||
merged_results.sort(key=lambda x: x.get('featured', False), reverse=True)
|
||||
|
||||
return merged_results[:per_page] # 返回合并后的前per_page个结果
|
||||
except Exception as e:
|
||||
print(f"双语主题搜索出错: {str(e)}")
|
||||
return []
|
||||
|
||||
async def _get_repo_details(self, repos: List[Dict]) -> List[Dict]:
|
||||
"""获取仓库详细信息"""
|
||||
enhanced_repos = []
|
||||
|
||||
for repo in repos:
|
||||
try:
|
||||
# 获取README信息
|
||||
owner = repo.get('owner', {}).get('login') if repo.get('owner') is not None else None
|
||||
repo_name = repo.get('name')
|
||||
|
||||
if owner and repo_name:
|
||||
readme = await self.github.get_repo_readme(owner, repo_name)
|
||||
if readme and "decoded_content" in readme:
|
||||
# 提取README的前1000个字符作为摘要
|
||||
repo['readme_excerpt'] = readme["decoded_content"][:1000] + "..."
|
||||
|
||||
# 获取语言使用情况
|
||||
languages = await self.github.get_repository_languages(owner, repo_name)
|
||||
if languages:
|
||||
repo['languages_detail'] = languages
|
||||
|
||||
# 获取最新发布版本
|
||||
releases = await self.github.get_repo_releases(owner, repo_name, per_page=1)
|
||||
if releases and len(releases) > 0:
|
||||
repo['latest_release'] = releases[0]
|
||||
|
||||
# 获取主题标签
|
||||
topics = await self.github.get_repo_topics(owner, repo_name)
|
||||
if topics and "names" in topics:
|
||||
repo['topics'] = topics["names"]
|
||||
|
||||
enhanced_repos.append(repo)
|
||||
except Exception as e:
|
||||
print(f"获取仓库 {repo.get('full_name')} 详情时出错: {str(e)}")
|
||||
enhanced_repos.append(repo) # 添加原始仓库信息
|
||||
|
||||
return enhanced_repos
|
||||
|
||||
def _format_repos(self, repos: List[Dict]) -> str:
|
||||
"""格式化仓库列表"""
|
||||
formatted = []
|
||||
|
||||
for i, repo in enumerate(repos, 1):
|
||||
# 构建仓库URL
|
||||
repo_url = repo.get('html_url', '')
|
||||
|
||||
# 构建完整的引用
|
||||
reference = (
|
||||
f"{i}. **{repo.get('full_name', '')}**\n"
|
||||
f" - 描述: {repo.get('description', 'N/A')}\n"
|
||||
f" - 语言: {repo.get('language', 'N/A')}\n"
|
||||
f" - 星标: {repo.get('stargazers_count', 0)}\n"
|
||||
f" - Fork数: {repo.get('forks_count', 0)}\n"
|
||||
f" - 更新时间: {repo.get('updated_at', 'N/A')[:10]}\n"
|
||||
f" - 创建时间: {repo.get('created_at', 'N/A')[:10]}\n"
|
||||
f" - URL: <a href='{repo_url}' target='_blank'>{repo_url}</a>\n"
|
||||
)
|
||||
|
||||
# 添加主题标签(如果有)
|
||||
if repo.get('topics'):
|
||||
topics_str = ", ".join(repo.get('topics'))
|
||||
reference += f" - 主题标签: {topics_str}\n"
|
||||
|
||||
# 添加最新发布版本(如果有)
|
||||
if repo.get('latest_release'):
|
||||
release = repo.get('latest_release')
|
||||
reference += f" - 最新版本: {release.get('tag_name', 'N/A')} ({release.get('published_at', 'N/A')[:10]})\n"
|
||||
|
||||
# 添加README摘要(如果有)
|
||||
if repo.get('readme_excerpt'):
|
||||
# 截断README,只取前300个字符
|
||||
readme_short = repo.get('readme_excerpt')[:300].replace('\n', ' ')
|
||||
reference += f" - README摘要: {readme_short}...\n"
|
||||
|
||||
formatted.append(reference)
|
||||
|
||||
return "\n".join(formatted)
|
||||
|
||||
def _generate_apology_prompt(self, criteria: SearchCriteria) -> str:
|
||||
"""生成道歉提示"""
|
||||
return f"""很抱歉,我们未能找到与"{criteria.main_topic}"相关的GitHub项目。
|
||||
|
||||
可能的原因:
|
||||
1. 搜索词过于具体或冷门
|
||||
2. 星标数要求过高
|
||||
3. 编程语言限制过于严格
|
||||
|
||||
建议解决方案:
|
||||
1. 尝试使用更通用的关键词
|
||||
2. 降低最低星标数要求
|
||||
3. 移除或更改编程语言限制
|
||||
请根据以上建议调整后重试。"""
|
||||
|
||||
def _get_current_time(self) -> str:
|
||||
"""获取当前时间信息"""
|
||||
now = datetime.now()
|
||||
return now.strftime("%Y年%m月%d日")
|
||||
@@ -0,0 +1,156 @@
|
||||
from typing import List, Dict, Any
|
||||
from .base_handler import BaseHandler
|
||||
from ..query_analyzer import SearchCriteria
|
||||
import asyncio
|
||||
|
||||
class CodeSearchHandler(BaseHandler):
|
||||
"""代码搜索处理器"""
|
||||
|
||||
def __init__(self, github, llm_kwargs=None):
|
||||
super().__init__(github, llm_kwargs)
|
||||
|
||||
async def handle(
|
||||
self,
|
||||
criteria: SearchCriteria,
|
||||
chatbot: List[List[str]],
|
||||
history: List[List[str]],
|
||||
system_prompt: str,
|
||||
llm_kwargs: Dict[str, Any],
|
||||
plugin_kwargs: Dict[str, Any],
|
||||
) -> str:
|
||||
"""处理代码搜索请求,返回最终的prompt"""
|
||||
|
||||
search_params = self._get_search_params(plugin_kwargs)
|
||||
|
||||
# 搜索代码
|
||||
code_results = await self._search_bilingual_code(
|
||||
english_query=criteria.github_params["query"],
|
||||
chinese_query=criteria.github_params["chinese_query"],
|
||||
language=criteria.language,
|
||||
per_page=search_params['max_repos']
|
||||
)
|
||||
|
||||
if not code_results:
|
||||
return self._generate_apology_prompt(criteria)
|
||||
|
||||
# 获取代码文件内容
|
||||
enhanced_code_results = await self._get_code_details(code_results[:search_params['max_details']])
|
||||
self.ranked_repos = [item["repository"] for item in enhanced_code_results if "repository" in item]
|
||||
|
||||
if not enhanced_code_results:
|
||||
return self._generate_apology_prompt(criteria)
|
||||
|
||||
# 构建最终的prompt
|
||||
current_time = self._get_current_time()
|
||||
final_prompt = f"""当前时间: {current_time}
|
||||
|
||||
基于用户对{criteria.main_topic}的查询,我找到了以下代码示例。
|
||||
|
||||
代码搜索结果:
|
||||
{self._format_code_results(enhanced_code_results)}
|
||||
|
||||
请提供:
|
||||
|
||||
1. 对于搜索的"{criteria.main_topic}"主题的综合解释:
|
||||
- 概念和原理介绍
|
||||
- 常见实现方法和技术
|
||||
- 最佳实践和注意事项
|
||||
|
||||
2. 对每个代码示例:
|
||||
- 解释代码的主要功能和实现方式
|
||||
- 分析代码质量、可读性和效率
|
||||
- 指出代码中的亮点和潜在改进空间
|
||||
- 说明代码的适用场景
|
||||
|
||||
3. 代码实现比较:
|
||||
- 不同实现方法的优缺点
|
||||
- 性能和可维护性分析
|
||||
- 适用不同场景的实现建议
|
||||
|
||||
4. 学习建议:
|
||||
- 理解和使用这些代码需要的背景知识
|
||||
- 如何扩展或改进所展示的代码
|
||||
- 进一步学习相关技术的资源
|
||||
|
||||
重要提示:
|
||||
- 深入解释代码的核心逻辑和实现思路
|
||||
- 提供专业、技术性的分析
|
||||
- 优先关注代码的实现质量和技术价值
|
||||
- 当代码实现有问题时,指出并提供改进建议
|
||||
- 对于复杂代码,分解解释其组成部分
|
||||
- 根据用户查询的具体问题提供针对性答案
|
||||
- 所有链接请使用<a href='链接地址' target='_blank'>链接文本</a>格式,确保链接在新窗口打开
|
||||
|
||||
使用markdown格式提供清晰的分节回复。
|
||||
"""
|
||||
|
||||
return final_prompt
|
||||
|
||||
async def _get_code_details(self, code_results: List[Dict]) -> List[Dict]:
|
||||
"""获取代码详情"""
|
||||
enhanced_results = []
|
||||
|
||||
for item in code_results:
|
||||
try:
|
||||
repo = item.get('repository', {})
|
||||
file_path = item.get('path', '')
|
||||
repo_name = repo.get('full_name', '')
|
||||
|
||||
if repo_name and file_path:
|
||||
owner, repo_name = repo_name.split('/')
|
||||
|
||||
# 获取文件内容
|
||||
file_content = await self.github.get_file_content(owner, repo_name, file_path)
|
||||
if file_content and "decoded_content" in file_content:
|
||||
item['code_content'] = file_content["decoded_content"]
|
||||
|
||||
# 获取仓库基本信息
|
||||
repo_details = await self.github.get_repo(owner, repo_name)
|
||||
if repo_details:
|
||||
item['repository'] = repo_details
|
||||
|
||||
enhanced_results.append(item)
|
||||
except Exception as e:
|
||||
print(f"获取代码详情时出错: {str(e)}")
|
||||
enhanced_results.append(item) # 添加原始信息
|
||||
|
||||
return enhanced_results
|
||||
|
||||
def _format_code_results(self, code_results: List[Dict]) -> str:
|
||||
"""格式化代码搜索结果"""
|
||||
formatted = []
|
||||
|
||||
for i, item in enumerate(code_results, 1):
|
||||
# 构建仓库信息
|
||||
repo = item.get('repository', {})
|
||||
repo_name = repo.get('full_name', 'N/A')
|
||||
repo_url = repo.get('html_url', '')
|
||||
stars = repo.get('stargazers_count', 0)
|
||||
language = repo.get('language', 'N/A')
|
||||
|
||||
# 构建文件信息
|
||||
file_path = item.get('path', 'N/A')
|
||||
file_url = item.get('html_url', '')
|
||||
|
||||
# 构建代码内容
|
||||
code_content = item.get('code_content', '')
|
||||
if code_content:
|
||||
# 只显示前30行代码
|
||||
code_lines = code_content.split("\n")
|
||||
if len(code_lines) > 30:
|
||||
displayed_code = "\n".join(code_lines[:30]) + "\n... (代码太长已截断) ..."
|
||||
else:
|
||||
displayed_code = code_content
|
||||
else:
|
||||
displayed_code = "(代码内容获取失败)"
|
||||
|
||||
reference = (
|
||||
f"### {i}. {file_path} (在 {repo_name} 中)\n\n"
|
||||
f"- **仓库**: <a href='{repo_url}' target='_blank'>{repo_name}</a> (⭐ {stars}, 语言: {language})\n"
|
||||
f"- **文件路径**: <a href='{file_url}' target='_blank'>{file_path}</a>\n\n"
|
||||
f"```{language.lower()}\n{displayed_code}\n```\n\n"
|
||||
)
|
||||
|
||||
formatted.append(reference)
|
||||
|
||||
return "\n".join(formatted)
|
||||
@@ -0,0 +1,192 @@
|
||||
from typing import List, Dict, Any
|
||||
from .base_handler import BaseHandler
|
||||
from ..query_analyzer import SearchCriteria
|
||||
import asyncio
|
||||
|
||||
class RepositoryHandler(BaseHandler):
|
||||
"""仓库搜索处理器"""
|
||||
|
||||
def __init__(self, github, llm_kwargs=None):
|
||||
super().__init__(github, llm_kwargs)
|
||||
|
||||
async def handle(
|
||||
self,
|
||||
criteria: SearchCriteria,
|
||||
chatbot: List[List[str]],
|
||||
history: List[List[str]],
|
||||
system_prompt: str,
|
||||
llm_kwargs: Dict[str, Any],
|
||||
plugin_kwargs: Dict[str, Any],
|
||||
) -> str:
|
||||
"""处理仓库搜索请求,返回最终的prompt"""
|
||||
|
||||
search_params = self._get_search_params(plugin_kwargs)
|
||||
|
||||
# 如果是特定仓库查询
|
||||
if criteria.repo_id:
|
||||
try:
|
||||
owner, repo = criteria.repo_id.split('/')
|
||||
repo_details = await self.github.get_repo(owner, repo)
|
||||
if repo_details:
|
||||
# 获取推荐的相似仓库
|
||||
similar_repos = await self.github.get_repo_recommendations(criteria.repo_id, limit=5)
|
||||
|
||||
# 添加详细信息
|
||||
all_repos = [repo_details] + similar_repos
|
||||
enhanced_repos = await self._get_repo_details(all_repos)
|
||||
|
||||
self.ranked_repos = enhanced_repos
|
||||
|
||||
# 构建最终的prompt
|
||||
current_time = self._get_current_time()
|
||||
final_prompt = self._build_repo_detail_prompt(enhanced_repos[0], enhanced_repos[1:], current_time)
|
||||
return final_prompt
|
||||
else:
|
||||
return self._generate_apology_prompt(criteria)
|
||||
except Exception as e:
|
||||
print(f"处理特定仓库时出错: {str(e)}")
|
||||
return self._generate_apology_prompt(criteria)
|
||||
|
||||
# 一般仓库搜索
|
||||
repos = await self._search_bilingual_repositories(
|
||||
english_query=criteria.github_params["query"],
|
||||
chinese_query=criteria.github_params["chinese_query"],
|
||||
language=criteria.language,
|
||||
min_stars=criteria.min_stars,
|
||||
per_page=search_params['max_repos']
|
||||
)
|
||||
|
||||
if not repos:
|
||||
return self._generate_apology_prompt(criteria)
|
||||
|
||||
# 获取仓库详情
|
||||
enhanced_repos = await self._get_repo_details(repos[:search_params['max_details']]) # 使用max_details参数
|
||||
self.ranked_repos = enhanced_repos
|
||||
|
||||
if not enhanced_repos:
|
||||
return self._generate_apology_prompt(criteria)
|
||||
|
||||
# 构建最终的prompt
|
||||
current_time = self._get_current_time()
|
||||
final_prompt = f"""当前时间: {current_time}
|
||||
|
||||
基于用户对{criteria.main_topic}的兴趣,以下是相关的GitHub仓库。
|
||||
|
||||
可供推荐的GitHub仓库:
|
||||
{self._format_repos(enhanced_repos)}
|
||||
|
||||
请提供:
|
||||
1. 按功能、用途或成熟度对仓库进行分组
|
||||
|
||||
2. 对每个仓库:
|
||||
- 简要描述其主要功能和用途
|
||||
- 分析其技术特点和优势
|
||||
- 说明其适用场景和使用难度
|
||||
- 指出其与同类产品相比的独特优势
|
||||
- 解释其星标数量和活跃度代表的意义
|
||||
|
||||
3. 使用建议:
|
||||
- 新手最适合入门的仓库
|
||||
- 生产环境中最稳定可靠的选择
|
||||
- 最新技术栈或创新方案的代表
|
||||
- 学习特定技术的最佳资源
|
||||
|
||||
4. 相关资源:
|
||||
- 学习这些项目需要的前置知识
|
||||
- 项目间的关联和技术栈兼容性
|
||||
- 可能的使用组合方案
|
||||
|
||||
重要提示:
|
||||
- 重点解释为什么每个仓库值得关注
|
||||
- 突出项目间的关联性和差异性
|
||||
- 考虑用户不同水平的需求(初学者vs专业人士)
|
||||
- 在介绍项目时,使用<a href='链接' target='_blank'>文本</a>格式,确保链接在新窗口打开
|
||||
- 根据仓库的活跃度、更新频率、维护状态提供使用建议
|
||||
- 仅基于提供的信息,不要做无根据的猜测
|
||||
- 在信息缺失或不明确时,坦诚说明
|
||||
|
||||
使用markdown格式提供清晰的分节回复。
|
||||
"""
|
||||
|
||||
return final_prompt
|
||||
|
||||
def _build_repo_detail_prompt(self, main_repo: Dict, similar_repos: List[Dict], current_time: str) -> str:
|
||||
"""构建仓库详情prompt"""
|
||||
|
||||
# 提取README摘要
|
||||
readme_content = "未提供"
|
||||
if main_repo.get('readme_excerpt'):
|
||||
readme_content = main_repo.get('readme_excerpt')
|
||||
|
||||
# 构建语言分布
|
||||
languages = main_repo.get('languages_detail', {})
|
||||
lang_distribution = []
|
||||
if languages:
|
||||
total = sum(languages.values())
|
||||
for lang, bytes_val in languages.items():
|
||||
percentage = (bytes_val / total) * 100
|
||||
lang_distribution.append(f"{lang}: {percentage:.1f}%")
|
||||
|
||||
lang_str = "未知"
|
||||
if lang_distribution:
|
||||
lang_str = ", ".join(lang_distribution)
|
||||
|
||||
# 构建最终prompt
|
||||
prompt = f"""当前时间: {current_time}
|
||||
|
||||
## 主要仓库信息
|
||||
|
||||
### {main_repo.get('full_name')}
|
||||
|
||||
- **描述**: {main_repo.get('description', '未提供')}
|
||||
- **星标数**: {main_repo.get('stargazers_count', 0)}
|
||||
- **Fork数**: {main_repo.get('forks_count', 0)}
|
||||
- **Watch数**: {main_repo.get('watchers_count', 0)}
|
||||
- **Issues数**: {main_repo.get('open_issues_count', 0)}
|
||||
- **语言分布**: {lang_str}
|
||||
- **许可证**: {main_repo.get('license', {}).get('name', '未指定') if main_repo.get('license') is not None else '未指定'}
|
||||
- **创建时间**: {main_repo.get('created_at', '')[:10]}
|
||||
- **最近更新**: {main_repo.get('updated_at', '')[:10]}
|
||||
- **主题标签**: {', '.join(main_repo.get('topics', ['无']))}
|
||||
- **GitHub链接**: <a href='{main_repo.get('html_url')}' target='_blank'>链接</a>
|
||||
|
||||
### README摘要:
|
||||
{readme_content}
|
||||
|
||||
## 类似仓库:
|
||||
{self._format_repos(similar_repos)}
|
||||
|
||||
请提供以下内容:
|
||||
|
||||
1. **项目概述**
|
||||
- 详细解释{main_repo.get('name', '')}项目的主要功能和用途
|
||||
- 分析其技术特点、架构和实现原理
|
||||
- 讨论其在所属领域的地位和影响力
|
||||
- 评估项目成熟度和稳定性
|
||||
|
||||
2. **优势与特点**
|
||||
- 与同类项目相比的独特优势
|
||||
- 显著的技术创新或设计模式
|
||||
- 值得学习或借鉴的代码实践
|
||||
|
||||
3. **使用场景**
|
||||
- 最适合的应用场景
|
||||
- 潜在的使用限制和注意事项
|
||||
- 入门门槛和学习曲线评估
|
||||
- 产品级应用的可行性分析
|
||||
|
||||
4. **资源与生态**
|
||||
- 相关学习资源推荐
|
||||
- 配套工具和库的建议
|
||||
- 社区支持和活跃度评估
|
||||
|
||||
5. **类似项目对比**
|
||||
- 与列出的类似项目的详细对比
|
||||
- 不同场景下的最佳选择建议
|
||||
- 潜在的互补使用方案
|
||||
|
||||
提示:所有链接请使用<a href='链接地址' target='_blank'>链接文本</a>格式,确保链接在新窗口打开。
|
||||
|
||||
请以专业、客观的技术分析角度回答,使用markdown格式提供结构化信息。
|
||||
"""
|
||||
return prompt
|
||||
@@ -0,0 +1,217 @@
|
||||
from typing import List, Dict, Any
|
||||
from .base_handler import BaseHandler
|
||||
from ..query_analyzer import SearchCriteria
|
||||
import asyncio
|
||||
|
||||
class TopicHandler(BaseHandler):
|
||||
"""主题搜索处理器"""
|
||||
|
||||
def __init__(self, github, llm_kwargs=None):
|
||||
super().__init__(github, llm_kwargs)
|
||||
|
||||
async def handle(
|
||||
self,
|
||||
criteria: SearchCriteria,
|
||||
chatbot: List[List[str]],
|
||||
history: List[List[str]],
|
||||
system_prompt: str,
|
||||
llm_kwargs: Dict[str, Any],
|
||||
plugin_kwargs: Dict[str, Any],
|
||||
) -> str:
|
||||
"""处理主题搜索请求,返回最终的prompt"""
|
||||
|
||||
search_params = self._get_search_params(plugin_kwargs)
|
||||
|
||||
# 搜索主题
|
||||
topics = await self._search_bilingual_topics(
|
||||
english_query=criteria.github_params["query"],
|
||||
chinese_query=criteria.github_params["chinese_query"],
|
||||
per_page=search_params['max_repos']
|
||||
)
|
||||
|
||||
if not topics:
|
||||
# 尝试用主题搜索仓库
|
||||
search_query = criteria.github_params["query"]
|
||||
chinese_search_query = criteria.github_params["chinese_query"]
|
||||
if "topic:" not in search_query:
|
||||
search_query += " topic:" + criteria.main_topic.replace(" ", "-")
|
||||
if "topic:" not in chinese_search_query:
|
||||
chinese_search_query += " topic:" + criteria.main_topic.replace(" ", "-")
|
||||
|
||||
repos = await self._search_bilingual_repositories(
|
||||
english_query=search_query,
|
||||
chinese_query=chinese_search_query,
|
||||
language=criteria.language,
|
||||
min_stars=criteria.min_stars,
|
||||
per_page=search_params['max_repos']
|
||||
)
|
||||
|
||||
if not repos:
|
||||
return self._generate_apology_prompt(criteria)
|
||||
|
||||
# 获取仓库详情
|
||||
enhanced_repos = await self._get_repo_details(repos[:10])
|
||||
self.ranked_repos = enhanced_repos
|
||||
|
||||
if not enhanced_repos:
|
||||
return self._generate_apology_prompt(criteria)
|
||||
|
||||
# 构建基于主题的仓库列表prompt
|
||||
current_time = self._get_current_time()
|
||||
final_prompt = f"""当前时间: {current_time}
|
||||
|
||||
基于用户对主题"{criteria.main_topic}"的查询,我找到了以下相关GitHub仓库。
|
||||
|
||||
主题相关仓库:
|
||||
{self._format_repos(enhanced_repos)}
|
||||
|
||||
请提供:
|
||||
|
||||
1. 主题综述:
|
||||
- "{criteria.main_topic}"主题的概述和重要性
|
||||
- 该主题在技术领域中的应用和发展趋势
|
||||
- 主题相关的主要技术栈和知识体系
|
||||
|
||||
2. 仓库分析:
|
||||
- 按功能、技术栈或应用场景对仓库进行分类
|
||||
- 每个仓库在该主题领域的定位和贡献
|
||||
- 不同仓库间的技术路线对比
|
||||
|
||||
3. 学习路径建议:
|
||||
- 初学者入门该主题的推荐仓库和学习顺序
|
||||
- 进阶学习的关键仓库和技术要点
|
||||
- 实际应用中的最佳实践选择
|
||||
|
||||
4. 技术生态分析:
|
||||
- 该主题下的主流工具和库
|
||||
- 社区活跃度和维护状况
|
||||
- 与其他相关技术的集成方案
|
||||
|
||||
重要提示:
|
||||
- 主题"{criteria.main_topic}"是用户查询的核心,请围绕此主题展开分析
|
||||
- 注重仓库质量评估和使用建议
|
||||
- 提供基于事实的客观技术分析
|
||||
- 在介绍仓库时使用<a href='链接地址' target='_blank'>链接文本</a>格式,确保链接在新窗口打开
|
||||
- 考虑不同技术水平用户的需求
|
||||
|
||||
使用markdown格式提供清晰的分节回复。
|
||||
"""
|
||||
return final_prompt
|
||||
|
||||
# 如果找到了主题,则获取主题下的热门仓库
|
||||
topic_repos = []
|
||||
for topic in topics[:5]: # 增加到5个主题
|
||||
topic_name = topic.get('name', '')
|
||||
if topic_name:
|
||||
# 搜索该主题下的仓库
|
||||
repos = await self._search_repositories(
|
||||
query=f"topic:{topic_name}",
|
||||
language=criteria.language,
|
||||
min_stars=criteria.min_stars,
|
||||
per_page=20 # 每个主题最多20个仓库
|
||||
)
|
||||
|
||||
if repos:
|
||||
for repo in repos:
|
||||
repo['topic_source'] = topic_name
|
||||
topic_repos.append(repo)
|
||||
|
||||
if not topic_repos:
|
||||
return self._generate_apology_prompt(criteria)
|
||||
|
||||
# 获取前N个仓库的详情
|
||||
enhanced_repos = await self._get_repo_details(topic_repos[:search_params['max_details']])
|
||||
self.ranked_repos = enhanced_repos
|
||||
|
||||
if not enhanced_repos:
|
||||
return self._generate_apology_prompt(criteria)
|
||||
|
||||
# 构建最终的prompt
|
||||
current_time = self._get_current_time()
|
||||
final_prompt = f"""当前时间: {current_time}
|
||||
|
||||
基于用户对"{criteria.main_topic}"主题的查询,我找到了以下相关GitHub主题和仓库。
|
||||
|
||||
主题相关仓库:
|
||||
{self._format_topic_repos(enhanced_repos)}
|
||||
|
||||
请提供:
|
||||
|
||||
1. 主题概述:
|
||||
- 对"{criteria.main_topic}"相关主题的介绍和技术背景
|
||||
- 这些主题在软件开发中的重要性和应用范围
|
||||
- 主题间的关联性和技术演进路径
|
||||
|
||||
2. 精选仓库分析:
|
||||
- 每个主题下最具代表性的仓库详解
|
||||
- 仓库的技术亮点和创新点
|
||||
- 使用场景和技术成熟度评估
|
||||
|
||||
3. 技术趋势分析:
|
||||
- 基于主题和仓库活跃度的技术发展趋势
|
||||
- 新兴解决方案和传统方案的对比
|
||||
- 未来可能的技术方向预测
|
||||
|
||||
4. 实践建议:
|
||||
- 不同应用场景下的最佳仓库选择
|
||||
- 学习路径和资源推荐
|
||||
- 实际项目中的应用策略
|
||||
|
||||
重要提示:
|
||||
- 将分析重点放在主题的技术内涵和价值上
|
||||
- 突出主题间的关联性和技术演进脉络
|
||||
- 提供基于数据(星标数、更新频率等)的客观分析
|
||||
- 考虑不同技术背景用户的需求
|
||||
- 所有链接请使用<a href='链接地址' target='_blank'>链接文本</a>格式,确保链接在新窗口打开
|
||||
|
||||
使用markdown格式提供清晰的分节回复。
|
||||
"""
|
||||
|
||||
return final_prompt
|
||||
|
||||
def _format_topic_repos(self, repos: List[Dict]) -> str:
|
||||
"""按主题格式化仓库列表"""
|
||||
# 按主题分组
|
||||
topics_dict = {}
|
||||
for repo in repos:
|
||||
topic = repo.get('topic_source', '其他')
|
||||
if topic not in topics_dict:
|
||||
topics_dict[topic] = []
|
||||
topics_dict[topic].append(repo)
|
||||
|
||||
# 格式化输出
|
||||
formatted = []
|
||||
for topic, topic_repos in topics_dict.items():
|
||||
formatted.append(f"## 主题: {topic}\n")
|
||||
|
||||
for i, repo in enumerate(topic_repos, 1):
|
||||
# 构建仓库URL
|
||||
repo_url = repo.get('html_url', '')
|
||||
|
||||
# 构建引用
|
||||
reference = (
|
||||
f"{i}. **{repo.get('full_name', '')}**\n"
|
||||
f" - 描述: {repo.get('description', 'N/A')}\n"
|
||||
f" - 语言: {repo.get('language', 'N/A')}\n"
|
||||
f" - 星标: {repo.get('stargazers_count', 0)}\n"
|
||||
f" - Fork数: {repo.get('forks_count', 0)}\n"
|
||||
f" - 更新时间: {repo.get('updated_at', 'N/A')[:10]}\n"
|
||||
f" - URL: <a href='{repo_url}' target='_blank'>{repo_url}</a>\n"
|
||||
)
|
||||
|
||||
# 添加主题标签(如果有)
|
||||
if repo.get('topics'):
|
||||
topics_str = ", ".join(repo.get('topics'))
|
||||
reference += f" - 主题标签: {topics_str}\n"
|
||||
|
||||
# 添加README摘要(如果有)
|
||||
if repo.get('readme_excerpt'):
|
||||
# 截断README,只取前200个字符
|
||||
readme_short = repo.get('readme_excerpt')[:200].replace('\n', ' ')
|
||||
reference += f" - README摘要: {readme_short}...\n"
|
||||
|
||||
formatted.append(reference)
|
||||
|
||||
formatted.append("\n") # 主题之间添加空行
|
||||
|
||||
return "\n".join(formatted)
|
||||
@@ -0,0 +1,164 @@
|
||||
from typing import List, Dict, Any
|
||||
from .base_handler import BaseHandler
|
||||
from ..query_analyzer import SearchCriteria
|
||||
import asyncio
|
||||
|
||||
class UserSearchHandler(BaseHandler):
|
||||
"""用户搜索处理器"""
|
||||
|
||||
def __init__(self, github, llm_kwargs=None):
|
||||
super().__init__(github, llm_kwargs)
|
||||
|
||||
async def handle(
|
||||
self,
|
||||
criteria: SearchCriteria,
|
||||
chatbot: List[List[str]],
|
||||
history: List[List[str]],
|
||||
system_prompt: str,
|
||||
llm_kwargs: Dict[str, Any],
|
||||
plugin_kwargs: Dict[str, Any],
|
||||
) -> str:
|
||||
"""处理用户搜索请求,返回最终的prompt"""
|
||||
|
||||
search_params = self._get_search_params(plugin_kwargs)
|
||||
|
||||
# 搜索用户
|
||||
users = await self._search_bilingual_users(
|
||||
english_query=criteria.github_params["query"],
|
||||
chinese_query=criteria.github_params["chinese_query"],
|
||||
per_page=search_params['max_repos']
|
||||
)
|
||||
|
||||
if not users:
|
||||
return self._generate_apology_prompt(criteria)
|
||||
|
||||
# 获取用户详情和仓库
|
||||
enhanced_users = await self._get_user_details(users[:search_params['max_details']])
|
||||
self.ranked_repos = [] # 添加用户top仓库进行展示
|
||||
|
||||
for user in enhanced_users:
|
||||
if user.get('top_repos'):
|
||||
self.ranked_repos.extend(user.get('top_repos'))
|
||||
|
||||
if not enhanced_users:
|
||||
return self._generate_apology_prompt(criteria)
|
||||
|
||||
# 构建最终的prompt
|
||||
current_time = self._get_current_time()
|
||||
final_prompt = f"""当前时间: {current_time}
|
||||
|
||||
基于用户对{criteria.main_topic}的查询,我找到了以下GitHub用户。
|
||||
|
||||
GitHub用户搜索结果:
|
||||
{self._format_users(enhanced_users)}
|
||||
|
||||
请提供:
|
||||
|
||||
1. 用户综合分析:
|
||||
- 各开发者的专业领域和技术专长
|
||||
- 他们在GitHub开源社区的影响力
|
||||
- 技术实力和项目质量评估
|
||||
|
||||
2. 对每位开发者:
|
||||
- 其主要贡献领域和技术栈
|
||||
- 代表性项目及其价值
|
||||
- 编程风格和技术特点
|
||||
- 在相关领域的影响力
|
||||
|
||||
3. 项目推荐:
|
||||
- 针对用户查询的最有价值项目
|
||||
- 值得学习和借鉴的代码实践
|
||||
- 不同用户项目的相互补充关系
|
||||
|
||||
4. 如何学习和使用:
|
||||
- 如何从这些开发者项目中学习
|
||||
- 最适合入门学习的项目
|
||||
- 进阶学习的路径建议
|
||||
|
||||
重要提示:
|
||||
- 关注开发者的技术专长和核心贡献
|
||||
- 分析其开源项目的技术价值
|
||||
- 根据用户的原始查询提供相关建议
|
||||
- 避免过度赞美或主观评价
|
||||
- 基于事实数据(项目数、星标数等)进行客观分析
|
||||
- 所有链接请使用<a href='链接地址' target='_blank'>链接文本</a>格式,确保链接在新窗口打开
|
||||
|
||||
使用markdown格式提供清晰的分节回复。
|
||||
"""
|
||||
|
||||
return final_prompt
|
||||
|
||||
async def _get_user_details(self, users: List[Dict]) -> List[Dict]:
|
||||
"""获取用户详情和仓库"""
|
||||
enhanced_users = []
|
||||
|
||||
for user in users:
|
||||
try:
|
||||
username = user.get('login')
|
||||
|
||||
if username:
|
||||
# 获取用户详情
|
||||
user_details = await self.github.get_user(username)
|
||||
if user_details:
|
||||
user.update(user_details)
|
||||
|
||||
# 获取用户仓库
|
||||
repos = await self.github.get_user_repos(
|
||||
username,
|
||||
sort="stars",
|
||||
per_page=10 # 增加到10个仓库
|
||||
)
|
||||
if repos:
|
||||
user['top_repos'] = repos
|
||||
|
||||
enhanced_users.append(user)
|
||||
except Exception as e:
|
||||
print(f"获取用户 {user.get('login')} 详情时出错: {str(e)}")
|
||||
enhanced_users.append(user) # 添加原始信息
|
||||
|
||||
return enhanced_users
|
||||
|
||||
def _format_users(self, users: List[Dict]) -> str:
|
||||
"""格式化用户列表"""
|
||||
formatted = []
|
||||
|
||||
for i, user in enumerate(users, 1):
|
||||
# 构建用户信息
|
||||
username = user.get('login', 'N/A')
|
||||
name = user.get('name', username)
|
||||
profile_url = user.get('html_url', '')
|
||||
bio = user.get('bio', '无简介')
|
||||
followers = user.get('followers', 0)
|
||||
public_repos = user.get('public_repos', 0)
|
||||
company = user.get('company', '未指定')
|
||||
location = user.get('location', '未指定')
|
||||
blog = user.get('blog', '')
|
||||
|
||||
user_info = (
|
||||
f"### {i}. {name} (@{username})\n\n"
|
||||
f"- **简介**: {bio}\n"
|
||||
f"- **关注者**: {followers} | **公开仓库**: {public_repos}\n"
|
||||
f"- **公司**: {company} | **地点**: {location}\n"
|
||||
f"- **个人网站**: {blog}\n"
|
||||
f"- **GitHub**: <a href='{profile_url}' target='_blank'>{username}</a>\n\n"
|
||||
)
|
||||
|
||||
# 添加用户的热门仓库
|
||||
top_repos = user.get('top_repos', [])
|
||||
if top_repos:
|
||||
user_info += "**热门仓库**:\n\n"
|
||||
for repo in top_repos:
|
||||
repo_name = repo.get('name', '')
|
||||
repo_url = repo.get('html_url', '')
|
||||
repo_desc = repo.get('description', '无描述')
|
||||
repo_stars = repo.get('stargazers_count', 0)
|
||||
repo_language = repo.get('language', '未指定')
|
||||
|
||||
user_info += (
|
||||
f"- <a href='{repo_url}' target='_blank'>{repo_name}</a> - ⭐ {repo_stars}, {repo_language}\n"
|
||||
f" {repo_desc}\n\n"
|
||||
)
|
||||
|
||||
formatted.append(user_info)
|
||||
|
||||
return "\n".join(formatted)
|
||||
在新工单中引用
屏蔽一个用户