-
Notifications
You must be signed in to change notification settings - Fork 228
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[讨论] 对底层请求功能模块重构 #878
Comments
如果能同时替换 同步请求用处不是很大,可以不用。 希望重构时能把这个问题也解决了 #870 (comment) 本质就是因为这行代码引用了
顺带一提现在的 |
Edited on Jan 18th, 2025. 最终确定下来的统一请求客户端标准长这样子: @dataclass
class BiliAPIResponse:
"""
响应对象类。
"""
code: int
headers: dict
cookies: CookieJar
raw: bytes
url: str
def utf8_text(self):
return self.raw.decode("utf-8")
def json(self):
return json.loads(self.utf8_text())
def get_cookie(self, name: str):
for cookie in self.cookies:
if cookie.name == name:
return cookie.value
return None
class BiliWsMsgType(Enum):
"""
WebSocket 状态枚举
"""
CONTINUATION = 0x0
TEXT = 0x1
BINARY = 0x2
PING = 0x9
PONG = 0xA
CLOSE = 0x8
CLOSING = 0x100
CLOSED = 0x101
class BiliAPIClient(ABC):
"""
请求客户端抽象类。通过对第三方模块请求客户端的封装令模块可对其进行调用。
"""
@abstractmethod
def __init__(
self,
proxy: str = "",
timeout: float = 0.0,
verify_ssl: bool = True,
trust_env: bool = True,
session: Optional[object] = None,
) -> None:
"""
Args:
proxy (str, optional): 代理地址. Defaults to "".
timeout (float, optional): 请求超时时间. Defaults to 0.0.
verify_ssl (bool, optional): 是否验证 SSL. Defaults to True.
trust_env (bool, optional): `trust_env`. Defaults to True.
session (object, optional): 会话对象. Defaults to None.
Note: 当设置 session 后自动忽略 proxy 和 timeout 参数。
"""
raise NotImplementedError
@abstractmethod
def get_wrapped_session(self) -> object:
"""
获取封装的第三方会话对象
Returns:
object: 第三方会话对象
"""
raise NotImplementedError
@abstractmethod
def set_timeout(self, timeout: float = 0.0) -> None:
"""
设置请求超时时间
Args:
timeout (float, optional): 请求超时时间. Defaults to 0.0.
"""
raise NotImplementedError
@abstractmethod
def set_proxy(self, proxy: str = "") -> None:
"""
设置代理地址
Args:
proxy (str, optional): 代理地址. Defaults to "".
"""
raise NotImplementedError
@abstractmethod
def set_verify_ssl(self, verify_ssl: bool = True) -> None:
"""
设置是否验证 SSL
Args:
verify_ssl (bool, optional): 是否验证 SSL. Defaults to True.
"""
raise NotImplementedError
@abstractmethod
def set_trust_env(self, trust_env: bool = True) -> None:
"""
设置 `trust_env`
Args:
trust_env (bool, optional): `trust_env`. Defaults to True.
"""
raise NotImplementedError
@abstractmethod
async def request(
self,
method: str = "",
url: str = "",
params: dict = {},
data: Union[dict, str] = {},
files: dict = {},
headers: dict = {},
cookies: dict = {},
allow_redirects: bool = False,
) -> BiliAPIResponse:
"""
进行 HTTP 请求
Args:
method (str, optional): 请求方法. Defaults to "".
url (str, optional): 请求地址. Defaults to "".
params (dict, optional): 请求参数. Defaults to {}.
data (Union[dict, str], optional): 请求数据. Defaults to {}.
files (dict, optional): 请求文件. Defaults to {}.
headers (dict, optional): 请求头. Defaults to {}.
cookies (dict, optional): 请求 Cookies. Defaults to {}.
allow_redirects (bool, optional): 是否允许重定向. Defaults to False.
Returns:
BiliAPIResponse: 响应对象
Note: 无需实现 data 为 str 且 files 不为空的情况。
"""
raise NotImplementedError
@abstractmethod
async def ws_create(
self, url: str = "", params: dict = {}, headers: dict = {}, cookies: dict = {}
) -> int:
"""
创建 WebSocket 连接
Args:
url (str, optional): WebSocket 地址. Defaults to "".
params (dict, optional): WebSocket 参数. Defaults to {}.
headers (dict, optional): WebSocket 头. Defaults to {}.
cookies (dict, optional): WebSocket Cookies. Defaults to {}.
Returns:
int: WebSocket 连接编号,用于后续操作。
"""
raise NotImplementedError
@abstractmethod
async def ws_send(self, cnt: int, data: bytes) -> None:
"""
发送 WebSocket 数据
Args:
cnt (int): WebSocket 连接编号
data (bytes): WebSocket 数据
"""
raise NotImplementedError
@abstractmethod
async def ws_recv(self, cnt: int) -> Tuple[bytes, BiliWsMsgType]:
"""
接受 WebSocket 数据
Args:
cnt (int): WebSocket 连接编号
Returns:
Tuple[bytes, BiliWsMsgType]: WebSocket 数据和状态
Note: 建议实现此函数时支持其他线程关闭不阻塞,除基础状态同时实现 CLOSE, CLOSING, CLOSED。
"""
raise NotImplementedError
@abstractmethod
async def ws_close(self, cnt: int) -> None:
"""
关闭 WebSocket 连接
Args:
cnt (int): WebSocket 连接编号
"""
raise NotImplementedError
@abstractmethod
async def close(self):
"""
关闭请求客户端,即关闭封装的第三方会话对象
"""
raise NotImplementedError 翻看整个项目的用法,应该是没逃出这个框架。 另外,其实整个模块已经重写完了,缝合了原来 |
check https://github.com/Nemo2011/bilibili-api/blob/dev/bilibili_api/utils/network.py 有些需要改动的还没改,有的改动了但缺乏测试( 另外还得考虑是否在模块内主动提供 |
提醒注意这个 bug, 可能会导致整个程序崩掉, 不是抛异常那种. 我也在想办法尽快解决.
|
初步猜测:此 bug 无影响 原因:见下 from curl_cffi import requests
from bilibili_api import sync
URL = "https://xy59x47x230x25xy.mcdn.bilivideo.cn:8082/v1/resource/62131_da3-1-100023.m4s?agrr=0&build=0&buvid=6468D7D3-3D12-9C38-99F9-E33230E1693D19648infoc&bvc=vod&bw=25963&deadline=1739457219&e=ig8euxZM2rNcNbdlhoNvNC8BqJIzNbfqXBvEqxTEto8BTrNvN0GvT90W5JZMkX_YN0MvXg8gNEV4NC8xNEV4N03eN0B5tZlqNxTEto8BTrNvNeZVuJ10Kj_g2UB02J0mN0B5tZlqNCNEto8BTrNvNC7MTX502C8f2jmMQJ6mqF2fka1mqx6gqj0eN0B599M%3D&f=u_0_0&gen=playurlv2&logo=A0020000&mcdnid=50017743&mid=0&nbs=1&nettype=0&og=cos&oi=0&orderid=0%2C3&os=mcdn&platform=pc&sign=a58b55&traceid=trKaaVowiDJHxJ_0_e_N&uipk=5&uparams=e%2Cuipk%2Cnbs%2Cdeadline%2Cgen%2Cos%2Coi%2Ctrid%2Cmid%2Cplatform%2Cog&upsig=980f3f4b9dfd1d960066e70fbc088a5d"
HEADERS = {"User-Agent": "Mozilla/5.0", "Referer": "https://www.bilibili.com"}
def sync_main():
sess = requests.Session()
for _ in range(10):
print(f"#{_}")
response = sess.request("GET", URL, headers=HEADERS, stream=True)
leng = 0
for chunk in response.iter_content():
leng += len(chunk)
print(leng, end="\r")
print()
response.close()
sess.close()
async def main():
sess = requests.AsyncSession()
for _ in range(10):
print(f"#{_}")
response = await sess.request("GET", URL, headers=HEADERS, stream=True)
leng = 0
async for chunk in response.aiter_content():
leng += len(chunk)
print(leng, end="\r")
print()
await response.aclose()
await sess.close()
sync_main()
# sync(main())
# - sync failed on #1
# Python(71983,0x1e9010f40) malloc: double free for ptr 0x1589dda00
# Python(71983,0x1e9010f40) malloc: *** set a breakpoint in malloc_error_break to debug
# [1] 71983 abort python test-run.py
# - async success
# - sync success
# - async success
# - async success
# - async success
# - async success
# - async success
# - sync success
# - sync failed on #6
# Python(75338,0x1e9010f40) malloc: Double free of object 0x1253c8760
# Python(75338,0x1e9010f40) malloc: *** set a breakpoint in malloc_error_break to debug
# [1] 75338 abort python test-run.py
# - sync success
# - sync success
# - sync failed on #3
# Python(76169,0x1e9010f40) malloc: Double free of object 0x10d60c810
# Python(76169,0x1e9010f40) malloc: *** set a breakpoint in malloc_error_break to debug
# [1] 76169 abort python test-run.py 可能需要更多测试以有力地证明异步下不会有问题,但目前可以肯定的是异步下出错概率比同步下小得多。 原因可能是 |
即
utils/network.py
。动机主要在
curl_cffi
库上,其可以模仿浏览器的指纹绕过某些反爬虫措施。详见 #877而且这东西同时支持同步和异步,支持
WebSocket
,速度也比aiohttp
和httpx
快。所以模块支持
curl_cffi
还是有必要的,只是network.py
本来就很乱,现在还要往上加东西会更乱。所以考虑趁此机会对
network.py
进行重构,同时加上支持自定义网络请求模块的功能。计划是这样子的:
然后就是想到了两个问题:
第一,上面的方案改动绝对不会少,但是有改动少的方法:把 httpx 批量替换成
curl_cffi
就可以了,甚至再大胆一点,把aiohttp
也换成curl_cffi
,不仅实现了大一统,还在几乎每个方面得到了最优化。所以要不要干脆放弃aiohttp
和httpx
?第二,同步请求还要保留吗?
The text was updated successfully, but these errors were encountered: