From c9762da447f5bd12df81f3bc41ddd787e40ac756 Mon Sep 17 00:00:00 2001 From: grassblock Date: Fri, 1 Aug 2025 14:27:55 +0800 Subject: [PATCH] feat: handle some links directly and add report links features --- adapters/tg.py | 11 ++++++----- config.example.yaml | 6 ++++++ config.py | 4 ++++ core/link.py | 32 ++++++++++++++++++++++---------- core/report_links.py | 33 +++++++++++++++++++++++++++++++++ 5 files changed, 71 insertions(+), 15 deletions(-) create mode 100644 core/report_links.py diff --git a/adapters/tg.py b/adapters/tg.py index 4d37db4..b8a8cc8 100644 --- a/adapters/tg.py +++ b/adapters/tg.py @@ -17,6 +17,7 @@ from core.link import handle_links from core.post_to_fedi import handle_auth, handle_post_to_fedi from core.promote import handle_promote_command from core.repeater import MessageRepeater +from core.report_links import report_broken_links from core.simple import handle_start_command, handle_baka, dummy_handler, handle_info_command from core.actions import handle_actions, handle_reverse_actions from core.stats import handle_stats_command @@ -55,15 +56,15 @@ class TelegramAdapter: router.message(F.chat.type.in_({'group', 'supergroup'}) & F.sender_chat & ( F.sender_chat.type == 'channel') & F.is_automatic_forward)( handle_unpin_channel_message) + # link 模块 + router.message(Command('report_broken_links'))(report_broken_links) + router.message(F.text.contains('http') & ~F.text.contains('/report_broken_links'))(handle_links) + # repeater 模块 + router.message(F.chat.type.in_({'group', 'supergroup'}))(MessageRepeater().handle_message) # actions 模块 router.message(F.text.startswith('/'))(handle_actions) router.message(F.text.startswith('\\'))(handle_reverse_actions) router.message(F.text == '我是笨蛋')(handle_baka) - # link 模块 - router.message(F.text.contains('http'))(handle_links) - # repeater 模块 - router.message(F.chat.type.in_({'group', 'supergroup'}))(MessageRepeater().handle_message) - # 捕获所有其他消息 router.message(F.chat.type.in_({'group', 'supergroup'}))(dummy_handler) diff --git a/config.example.yaml b/config.example.yaml index e19b0ff..e6d17a5 100644 --- a/config.example.yaml +++ b/config.example.yaml @@ -1,6 +1,12 @@ # 管理员对应的 Telegram 用户 ID # 你可以通过 /info 命令获取你的用户 ID admin: 123456789 +# 开发者对应的 Telegram 用户 ID +# 如果你希望 fork 这个项目并进行开发,你可以在这里更改成你的用户 ID,用户不推荐更改这部分 +# 这部分仅限于汇报链接跟踪参数去除的问题 +dev: 616760897 + + # global features settings features: diff --git a/config.py b/config.py index 98d46a8..1b4b184 100644 --- a/config.py +++ b/config.py @@ -23,6 +23,10 @@ class Config: """Get admin user ID""" return self.config_data.get('admin') + def get_developer_id(self) -> Optional[int]: + """Get developer user ID""" + return self.config_data.get('dev') + def is_feature_enabled(self, feature_name: str, chat_id: Optional[int] = None) -> bool: """ Check if a feature is enabled for a specific chat or globally diff --git a/core/link.py b/core/link.py index eb0ab41..21ddcf5 100644 --- a/core/link.py +++ b/core/link.py @@ -1,5 +1,4 @@ import aiohttp -import requests import re import html import asyncio @@ -44,7 +43,6 @@ async def extend_short_urls(url): """ 扩展短链接 """ async with aiohttp.ClientSession() as session: async with session.get(url,allow_redirects=False) as r: - if 'tb.cn' in urlparse(url).hostname: # 淘宝短链接特殊处理 html_content = await r.text() @@ -52,7 +50,7 @@ async def extend_short_urls(url): if not url: return url if r.status in [301, 302, 304, 307, 308] and 'Location' in r.headers: - if 'http' in r.headers['Location']: + if r.headers['Location'].startswith(('http://', 'https://')): return r.headers['Location'] else: # 如果 Location 头部没有 http 前缀,可能是相对路径 @@ -131,7 +129,7 @@ def reserve_whitelisted_params(url): # 重新构建URL cleaned_query = urlencode(new_query_params, doseq=True) return urlunparse(parsed_url._replace(query=cleaned_query)) - elif parsed_url.hostname in ['www.bilibili.com','m.bilibili.com','bilibili.com','mall.bilibili.com','space.bilibili.com','live.bilibili.com']: + elif parsed_url.hostname in ['www.iesdouyin.com','www.bilibili.com','m.bilibili.com','bilibili.com','mall.bilibili.com','space.bilibili.com','live.bilibili.com']: # 不保留任何参数 new_query_params = {} # 重新构建URL @@ -149,21 +147,33 @@ def transform_into_fixed_url(url): if parsed_url.hostname in ['bilibili.com', 'm.bilibili.com']: # 把 bilibili 的链接转换为桌面端的 www.bilibili.com return urlunparse(parsed_url._replace(netloc='www.bilibili.com')) + if parsed_url.hostname in ['www.iesdouyin.com']: + # 把抖音分享链接转换为正常的 www.douyin.com + return urlunparse(parsed_url._replace(netloc='www.douyin.com')) return url async def process_url(url): - # 首先清理跟踪参数 + # 对于适配的网站,直接保留白名单参数并返回 + if urlparse(url).hostname in ['www.iesdouyin.com','item.taobao.com', 'detail.tmall.com', 'h5.m.goofish.com', 'music.163.com', + 'www.bilibili.com', 'm.bilibili.com', 'bilibili.com', 'mall.bilibili.com', + 'space.bilibili.com', 'live.bilibili.com']: + final_url = reserve_whitelisted_params(url) + if urlparse(final_url).hostname in ['www.iesdouyin.com','bilibili.com', 'm.bilibili.com']: + final_url = transform_into_fixed_url(final_url) + return final_url + # 对于其它的网站,首先清理跟踪参数 cleaned_url = remove_tracking_params(url) # 扩展短链接 extended_url = await extend_short_urls(cleaned_url) - # 对于一些网站,只保留白名单中的参数 - if urlparse(extended_url).hostname in ['item.taobao.com', 'detail.tmall.com', 'h5.m.goofish.com', 'music.163.com', + # 对于扩展短链接之后的适配的网站,直接保留白名单参数并返回 + if urlparse(extended_url).hostname in ['www.iesdouyin.com','item.taobao.com', 'detail.tmall.com', 'h5.m.goofish.com', 'music.163.com', 'www.bilibili.com', 'm.bilibili.com', 'bilibili.com', 'mall.bilibili.com', 'space.bilibili.com', 'live.bilibili.com']: final_url = reserve_whitelisted_params(extended_url) - if urlparse(extended_url).hostname in ['bilibili.com', 'm.bilibili.com']: + if urlparse(final_url).hostname in ['www.iesdouyin.com','bilibili.com', 'm.bilibili.com']: final_url = transform_into_fixed_url(final_url) - elif urlparse(extended_url).hostname in ['x.com', 'twitter.com']: + return final_url + if urlparse(extended_url).hostname in ['x.com', 'twitter.com']: # 对于 Twitter 链接,转换为 fixupx.com removed_tracking_url = remove_tracking_params(extended_url) final_url = transform_into_fixed_url(removed_tracking_url) @@ -192,4 +202,6 @@ async def handle_links(message: Message): # 回复处理后的链接 if final_urls: - await message.reply(f"{"\n".join(final_urls)}\n消息里有包含跟踪参数的链接,已经帮你转换了哦") \ No newline at end of file + await message.reply(f"{"\n".join(final_urls)}\n消息里有包含跟踪参数的链接,已经帮你转换了哦~\n\n注意:" + f"这个功能是试验性的,可能会出现链接无法访问等问题,如果出现链接没有清理干净的情况," + f"可以将返回的结果再次发送给bot,或者尝试手动清理。\n如果你找到了这个工具的问题,欢迎把它通过 `/report_broken_links 链接` 报告给开发者!") \ No newline at end of file diff --git a/core/report_links.py b/core/report_links.py new file mode 100644 index 0000000..c5df9ee --- /dev/null +++ b/core/report_links.py @@ -0,0 +1,33 @@ +import re + +from aiogram.types import Message + +from config import config + + +async def report_broken_links(message: Message): + if not config.is_feature_enabled('link', message.chat.id): + return + # 获取被回复的消息中的链接 + links = [] + # 链接正则表达式 + url_pattern = r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+' + text = message.text or message.caption + # Extract URLs from message text + if text: + links = re.findall(url_pattern, text) + + if not links: + await message.reply("没有找到链接。请提供链接以及希望得到的清理结果。格式最好是 `/report_broken_links 链接 描述文本`。") + return + + # 处理报告逻辑(例如,保存到数据库或发送给开发者) + report_content = f"用户 {message.from_user.full_name} ({message.from_user.id}) 报告了以下链接的问题:\n" + report_content += "\n".join(links) + "\n" + report_content += f"描述:{text.split(' ')[2] if ' ' in text else text}\n" + + # 将 report_content 发送到开发者 + developer_id = config.get_developer_id() # 从配置获取开发者ID + await message.bot.send_message(chat_id=developer_id, text=report_content) + + await message.reply("感谢您的报告,我们会尽快处理!") \ No newline at end of file