Compare commits
2 commits
4225b6e8a3
...
c8bf401603
Author | SHA1 | Date | |
---|---|---|---|
c8bf401603 | |||
37d477de2e |
1 changed files with 11 additions and 3 deletions
14
core/link.py
14
core/link.py
|
@ -14,7 +14,8 @@ from config import config
|
||||||
whitelist_param_links = ['www.iesdouyin.com','item.taobao.com', 'detail.tmall.com', 'h5.m.goofish.com', 'music.163.com',
|
whitelist_param_links = ['www.iesdouyin.com','item.taobao.com', 'detail.tmall.com', 'h5.m.goofish.com', 'music.163.com',
|
||||||
'www.bilibili.com', 'm.bilibili.com', 'bilibili.com', 'mall.bilibili.com',
|
'www.bilibili.com', 'm.bilibili.com', 'bilibili.com', 'mall.bilibili.com',
|
||||||
'space.bilibili.com', 'live.bilibili.com','item.m.jd.com','item.jd.com',
|
'space.bilibili.com', 'live.bilibili.com','item.m.jd.com','item.jd.com',
|
||||||
'www.xiaohongshu.com','zhuanlan.zhihu.com','www.baidu.com','www.youtube.com']
|
'www.xiaohongshu.com','zhuanlan.zhihu.com','www.baidu.com','www.youtube.com',
|
||||||
|
'music.youtube.com','youtu.be']
|
||||||
|
|
||||||
has_self_redirection_links = ['www.cnbeta.com.tw','m.cnbeta.com.tw','www.landiannews.com', 'www.bilibili.com']
|
has_self_redirection_links = ['www.cnbeta.com.tw','m.cnbeta.com.tw','www.landiannews.com', 'www.bilibili.com']
|
||||||
|
|
||||||
|
@ -161,6 +162,13 @@ def reserve_whitelisted_params(url):
|
||||||
elif parsed_url.hostname in ['www.iesdouyin.com','www.bilibili.com','m.bilibili.com','bilibili.com','mall.bilibili.com','space.bilibili.com','live.bilibili.com','item.m.jd.com','item.jd.com','www.xiaohongshu.com', 'zhuanlan.zhihu.com']:
|
elif parsed_url.hostname in ['www.iesdouyin.com','www.bilibili.com','m.bilibili.com','bilibili.com','mall.bilibili.com','space.bilibili.com','live.bilibili.com','item.m.jd.com','item.jd.com','www.xiaohongshu.com', 'zhuanlan.zhihu.com']:
|
||||||
# 不保留任何参数
|
# 不保留任何参数
|
||||||
new_query_params = {}
|
new_query_params = {}
|
||||||
|
if parsed_url.hostname == 'mall.bilibili.com' and query_params.get('itemsId'):
|
||||||
|
# 处理bilibili工房的商品链接,保留 itemsId 和 page 参数
|
||||||
|
new_query_params['itemsId'] = query_params['itemsId']
|
||||||
|
new_query_params['page'] = query_params.get('page')
|
||||||
|
if parsed_url.hostname == 'gf.bilibili.com' and 'item/detail' in parsed_url.path:
|
||||||
|
# TODO: fix
|
||||||
|
new_query_params = {}
|
||||||
if 'bilibili.com' in parsed_url.hostname and 'video' in parsed_url.path and query_params:
|
if 'bilibili.com' in parsed_url.hostname and 'video' in parsed_url.path and query_params:
|
||||||
# 对于 bilibili 的视频链接,保留一些必要的参数
|
# 对于 bilibili 的视频链接,保留一些必要的参数
|
||||||
if 't' in query_params:
|
if 't' in query_params:
|
||||||
|
@ -174,12 +182,12 @@ def reserve_whitelisted_params(url):
|
||||||
# 重新构建URL
|
# 重新构建URL
|
||||||
cleaned_query = urlencode(new_query_params, doseq=True)
|
cleaned_query = urlencode(new_query_params, doseq=True)
|
||||||
return urlunparse(parsed_url._replace(query=cleaned_query))
|
return urlunparse(parsed_url._replace(query=cleaned_query))
|
||||||
elif parsed_url.hostname in ['www.baidu.com','www.youtube.com']:
|
elif parsed_url.hostname in ['www.baidu.com','www.youtube.com','music.youtube.com','youtu.be']:
|
||||||
new_query_params = {}
|
new_query_params = {}
|
||||||
if parsed_url.hostname == 'www.baidu.com' and 'wd' in query_params:
|
if parsed_url.hostname == 'www.baidu.com' and 'wd' in query_params:
|
||||||
# 百度搜索链接保留 wd 参数
|
# 百度搜索链接保留 wd 参数
|
||||||
new_query_params['wd'] = query_params['wd']
|
new_query_params['wd'] = query_params['wd']
|
||||||
if parsed_url.hostname == 'www.youtube.com':
|
if 'youtube.com' in parsed_url.hostname and query_params:
|
||||||
# YouTube 视频链接保留 v 参数
|
# YouTube 视频链接保留 v 参数
|
||||||
if 'v' in query_params:
|
if 'v' in query_params:
|
||||||
new_query_params['v'] = query_params['v'] # 保留 v 参数
|
new_query_params['v'] = query_params['v'] # 保留 v 参数
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue