diff --git a/DrissionPage/_pages/chromium_base.py b/DrissionPage/_pages/chromium_base.py index 35d93c5..d8df4e9 100644 --- a/DrissionPage/_pages/chromium_base.py +++ b/DrissionPage/_pages/chromium_base.py @@ -93,14 +93,15 @@ class ChromiumBase(BasePage): self._get_document() self._first_run = False - def _driver_init(self, tab_id): + def _driver_init(self, tab_id, is_init=True): """新建页面、页面刷新、切换标签页后要进行的cdp参数初始化 :param tab_id: 要跳转到的标签页id + :param is_init: 是否初始化时执行本方法,用于判断是否to_tab()调用 :return: None """ self._is_loading = True - if hasattr(self, '_driver'): - return + if is_init and hasattr(self, '_driver'): + return # ChromiumPage接收ChromiumDriver方式启动时 self._driver = ChromiumDriver(tab_id=tab_id, tab_type='page', address=self.address) self._driver.call_method('DOM.enable') @@ -379,6 +380,7 @@ class ChromiumBase(BasePage): self.wait.load_complete() if self._scroll is None: self._scroll = ChromiumPageScroll(self) + self.set.scroll.smooth(False) return self._scroll @property diff --git a/DrissionPage/_pages/chromium_base.pyi b/DrissionPage/_pages/chromium_base.pyi index 447e86c..f036256 100644 --- a/DrissionPage/_pages/chromium_base.pyi +++ b/DrissionPage/_pages/chromium_base.pyi @@ -50,7 +50,7 @@ class ChromiumBase(BasePage): def _connect_browser(self, tab_id: str = None) -> None: ... - def _driver_init(self, tab_id: str) -> None: ... + def _driver_init(self, tab_id: str, is_init:bool=True) -> None: ... def _get_document(self) -> None: ... diff --git a/DrissionPage/_pages/chromium_frame.py b/DrissionPage/_pages/chromium_frame.py index f83f322..d7fb0ed 100644 --- a/DrissionPage/_pages/chromium_frame.py +++ b/DrissionPage/_pages/chromium_frame.py @@ -81,9 +81,8 @@ class ChromiumFrame(ChromiumBase): self.retry_interval = self._target_page.retry_interval self._page_load_strategy = self._target_page.page_load_strategy self._download_path = self._target_page.download_path - # self._when_download_file_exists = self._target_page._when_download_file_exists - def _driver_init(self, tab_id): + def _driver_init(self, tab_id, is_init=True): """避免出现服务器500错误 :param tab_id: 要跳转到的标签页id :return: None diff --git a/DrissionPage/_pages/chromium_page.py b/DrissionPage/_pages/chromium_page.py index 6c6ba37..6fd1026 100644 --- a/DrissionPage/_pages/chromium_page.py +++ b/DrissionPage/_pages/chromium_page.py @@ -17,6 +17,7 @@ from .._pages.chromium_tab import ChromiumTab from .._units.setter import ChromiumPageSetter from .._units.tab_rect import ChromiumTabRect from .._units.waiter import ChromiumPageWaiter +from ..errors import BrowserConnectError class ChromiumPage(ChromiumBase): @@ -65,8 +66,11 @@ class ChromiumPage(ChromiumBase): """连接浏览器""" connect_browser(self._driver_options) ws = get(f'http://{self._driver_options.debugger_address}/json/version', - headers={'Connection': 'close'}).json()['webSocketDebuggerUrl'] - self._browser = Browser(self._driver_options.debugger_address, ws.split('/')[-1], self) + headers={'Connection': 'close'}) + if not ws: + raise BrowserConnectError('\n浏览器连接失败,请检查是否启用全局代理。如有,须开放127.0.0.1地址。') + ws = ws.json()['webSocketDebuggerUrl'].split('/')[-1] + self._browser = Browser(self._driver_options.debugger_address, ws, self) def _d_set_runtime_settings(self): """设置运行时用到的属性""" @@ -230,7 +234,7 @@ class ChromiumPage(ChromiumBase): return self.driver.stop() - self._driver_init(tab_id) + self._driver_init(tab_id, False) if read_doc and self.ready_state in ('complete', None): self._get_document() diff --git a/DrissionPage/_units/network_listener.py b/DrissionPage/_units/network_listener.py index 6376ee1..e36954c 100644 --- a/DrissionPage/_units/network_listener.py +++ b/DrissionPage/_units/network_listener.py @@ -23,8 +23,7 @@ class NetworkListener(object): :param page: ChromiumBase对象 """ self._page = page - self._driver = ChromiumDriver(page.tab_id, 'page', page.address) - self._driver.call_method('Network.enable') + self._driver = None self._caught = None # 临存捕捉到的数据 self._request_ids = None # 暂存须要拦截的请求id @@ -75,6 +74,11 @@ class NetworkListener(object): """ if targets: self.set_targets(targets, is_regex, method) + if self.listening: + return + + self._driver = ChromiumDriver(self._page.tab_id, 'page', self._page.address) + self._driver.call_method('Network.enable') self.listening = True self._request_ids = {} @@ -118,10 +122,10 @@ class NetworkListener(object): return [self._caught.get_nowait() for _ in range(count)] def steps(self, count=None, timeout=None, gap=1): - """用于单步操作,可实现没收到若干个数据包执行一步操作(如翻页) + """用于单步操作,可实现每收到若干个数据包执行一步操作(如翻页) :param count: 需捕获的数据包总数,为None表示无限 :param timeout: 每个数据包等待时间,为None表示无限 - :param gap: 每接收到多少个数据包触发 + :param gap: 每接收到多少个数据包返回一次数据 :return: 用于在接收到监听目标时触发动作的可迭代对象 """ caught = 0 @@ -144,6 +148,8 @@ class NetworkListener(object): if self.listening: self.pause() self.clear() + self._driver.stop() + self._driver = None def pause(self, clear=True): """暂停监听 @@ -181,7 +187,7 @@ class NetworkListener(object): def _requestWillBeSent(self, **kwargs): """接收到请求时的回调函数""" if not self._targets: - self._request_ids[kwargs['requestId']] = DataPacket(self._driver.id, None, kwargs) + self._request_ids[kwargs['requestId']] = DataPacket(self._page.tab_id, None, kwargs) if kwargs['request'].get('hasPostData', None) and not kwargs['request'].get('postData', None): self._request_ids[kwargs['requestId']]._raw_post_data = \ self._driver.call_method('Network.getRequestPostData', requestId=kwargs['requestId'])['postData'] @@ -192,7 +198,7 @@ class NetworkListener(object): if ((self._is_regex and search(target, kwargs['request']['url'])) or (not self._is_regex and target in kwargs['request']['url'])) and ( not self._method or kwargs['request']['method'] in self._method): - self._request_ids[kwargs['requestId']] = DataPacket(self._driver.id, target, kwargs) + self._request_ids[kwargs['requestId']] = DataPacket(self._page.tab_id, target, kwargs) if kwargs['request'].get('hasPostData', None) and not kwargs['request'].get('postData', None): self._request_ids[kwargs['requestId']]._raw_post_data = \ @@ -253,7 +259,7 @@ class DataPacket(object): :param target: 监听目标 :param raw_request: 原始request数据,从cdp获得 """ - self.tab = tab_id + self.tab_id = tab_id self.target = target self._raw_request = raw_request @@ -353,6 +359,11 @@ class Response(object): self._headers = CaseInsensitiveDict(self._response['headers']) return self._headers + @property + def raw_body(self): + """返回未被处理的body文本""" + return self._raw_body + @property def body(self): """返回body内容,如果是json格式,自动进行转换,如果时图片格式,进行base64转换,其它格式直接返回文本""" diff --git a/DrissionPage/_units/network_listener.pyi b/DrissionPage/_units/network_listener.pyi index 3b85ad4..bb26f99 100644 --- a/DrissionPage/_units/network_listener.pyi +++ b/DrissionPage/_units/network_listener.pyi @@ -100,14 +100,15 @@ class Request(object): _headers: Union[CaseInsensitiveDict, None] = ... method: str = ... - # urlFragment: str = ... - # postDataEntries: list = ... - # mixedContentType: str = ... - # initialPriority: str = ... - # referrerPolicy: str = ... - # isLinkPreload: bool = ... - # trustTokenParams: dict = ... - # isSameSite: bool = ... + urlFragment = ... + hasPostData = ... + postDataEntries = ... + mixedContentType = ... + initialPriority = ... + referrerPolicy = ... + isLinkPreload = ... + trustTokenParams = ... + isSameSite = ... def __init__(self, raw_request: dict, post_data: str): self._request: dict = ... @@ -122,9 +123,29 @@ class Request(object): class Response(object): - status: str = ... - statusText: int = ... - mimeType: str = ... + url = ... + status = ... + statusText = ... + headersText = ... + mimeType = ... + requestHeaders = ... + requestHeadersText = ... + connectionReused = ... + connectionId = ... + remoteIPAddress = ... + remotePort = ... + fromDiskCache = ... + fromServiceWorker = ... + fromPrefetchCache = ... + encodedDataLength = ... + timing = ... + serviceWorkerResponseSource = ... + responseTime = ... + cacheStorageCacheName = ... + protocol = ... + alternateProtocolUsage = ... + securityState = ... + securityDetails = ... def __init__(self, raw_response: dict, raw_body: str, base64_body: bool): self._response: dict = ... @@ -136,5 +157,8 @@ class Response(object): @property def headers(self) -> CaseInsensitiveDict: ... + @property + def raw_body(self) -> str: ... + @property def body(self) -> Union[str, dict, bool]: ... diff --git a/README.md b/README.md index 0c1b297..8d5fe9e 100644 --- a/README.md +++ b/README.md @@ -28,6 +28,8 @@ python 版本:3.6 及以上 --- +# 🛠 如何使用 + **📖 使用文档:** [点击查看](http://g1879.gitee.io/drissionpagedocs) **交流 QQ 群:** 897838127[已满]、558778073 @@ -36,7 +38,7 @@ python 版本:3.6 及以上 # 🔥 新版预告 -查看下一步开发计划:[新版预告](http://g1879.gitee.io/drissionpagedocs/whatsnew/3_3/) +查看下一步开发计划:[新版预告](https://g1879.gitee.io/drissionpagedocs/whatsnew/3_3/) --- @@ -110,15 +112,9 @@ python 版本:3.6 及以上 --- -# 🛠 使用文档 - -[点击跳转到使用文档](http://g1879.gitee.io/drissionpage) - ---- - # 🔖 版本历史 -[点击查看版本历史](http://g1879.gitee.io/drissionpagedocs/history/3.x/) +[点击查看版本历史](https://g1879.gitee.io/drissionpagedocs/history/) ---