From cefb94515e9fdda426e5491e34b391073485add6 Mon Sep 17 00:00:00 2001 From: g1879 Date: Mon, 24 Apr 2023 19:29:26 +0800 Subject: [PATCH] =?UTF-8?q?=E7=BB=A7=E7=BB=AD=E4=BF=AE=E6=94=B9=E6=8A=93?= =?UTF-8?q?=E5=8C=85=E5=8A=9F=E8=83=BD=EF=BC=8C=E6=9C=AA=E5=AE=8C=E6=88=90?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/chromium_base.py | 59 ++++++++++++++++++++-------------- DrissionPage/chromium_base.pyi | 3 +- DrissionPage/commons/web.py | 48 +++++++++++++++++++-------- 3 files changed, 72 insertions(+), 38 deletions(-) diff --git a/DrissionPage/chromium_base.py b/DrissionPage/chromium_base.py index 77f399a..c3d6bb9 100644 --- a/DrissionPage/chromium_base.py +++ b/DrissionPage/chromium_base.py @@ -1116,7 +1116,7 @@ class NetworkListener(object): break sleep(.1) - if not self._results: + if self._caught == 0: r = False else: # todo @@ -1127,10 +1127,23 @@ class NetworkListener(object): self._caught = 0 return r + def _requestWillBeSent(self, **kwargs): + """接收到请求时的回调函数""" + for target in self._targets: + if (self._is_regex and search(target, kwargs['request']['url'])) or ( + not self._is_regex and target in kwargs['request']['url']): + self._requests[kwargs['requestId']] = DataPacket(kwargs['requestId'], self._page.tab_id, target, kwargs) + + if kwargs['request'].get('hasPostData', None) and not kwargs['request'].get('postData', None): + self._requests[kwargs['requestId']]._rawPostData \ + = self._page.run_cdp('Network.getRequestPostData', requestId=kwargs['requestId'])['postData'] + + break + def _response_received(self, **kwargs): """接收到返回信息时处理方法""" if kwargs['requestId'] in self._requests: - self._requests[kwargs['requestId']]['response'] = kwargs['response'] + self._requests[kwargs['requestId']]._raw_response = kwargs def _loading_finished(self, **kwargs): """请求完成时处理方法""" @@ -1144,31 +1157,29 @@ class NetworkListener(object): body = '' is_base64 = False - request = self._requests[request_id] - target = request['target'] - rd = ResponseData(request_id, request['response'], body, self._page.tab_id, target) - rd.postData = request['post_data'] - rd._base64_body = is_base64 - rd.requestHeaders = request['request_headers'] - rd.method = request['method'] - self._results[target] = rd + data_packet = self._requests[request_id] + data_packet._rowBody = body + data_packet._base64_body = is_base64 + + if data_packet.target in self._results: + self._results[data_packet.target].append(data_packet) + else: + self._results[data_packet.target] = [data_packet] self._caught += 1 - def _requestWillBeSent(self, **kwargs): - """接收到请求时的回调函数""" - for target in self._targets: - if (self._is_regex and search(target, kwargs['request']['url'])) or ( - not self._is_regex and target in kwargs['request']['url']): - # self._requests[kwargs['requestId']] = {'target': target, - # 'post_data': kwargs['request'].get('postData', None), - # 'request_headers': kwargs['request']['headers'], - # 'method': kwargs['request']['method']} - self._requests[kwargs['requestId']] = DataPacket(kwargs['requestId'], self._page.tab_id, target, kwargs) - if kwargs['request'].get('hasPostData', None) and not kwargs['request'].get('postData', None): - pd = self._page.run_cdp('Network.getRequestPostData', requestId=kwargs['requestId'])['postData'] - self._requests[kwargs['requestId']]. - break + def _loading_failed(self, **kwargs): + """请求失败时的处理方法""" + if kwargs['requestId'] in self._requests: + data_packet = self._requests[kwargs['requestId']] + data_packet._raw_fail_info = kwargs + + if data_packet.target in self._results: + self._results[data_packet.target].append(data_packet) + else: + self._results[data_packet.target] = [data_packet] + + self._caught += 1 class ChromiumPageScroll(ChromiumScroll): diff --git a/DrissionPage/chromium_base.pyi b/DrissionPage/chromium_base.pyi index 4c65f0a..ee85f58 100644 --- a/DrissionPage/chromium_base.pyi +++ b/DrissionPage/chromium_base.pyi @@ -16,6 +16,7 @@ from .chromium_driver import ChromiumDriver from .chromium_element import ChromiumElement, ChromiumScroll from .chromium_frame import ChromiumFrame from .commons.constants import NoneElement +from .commons.web import DataPacket from .session_element import SessionElement @@ -248,7 +249,7 @@ class NetworkListener(object): self._caught: int = ... self._targets: Union[str, dict] = ... self._single: bool = ... - self._results: Union[ResponseData, Dict[str, ResponseData], False] = ... + self._results: Union[ResponseData, Dict[str, DataPacket], False] = ... self._is_regex: bool = ... self._requests: dict = ... diff --git a/DrissionPage/commons/web.py b/DrissionPage/commons/web.py index 3e68386..2ff6d53 100644 --- a/DrissionPage/commons/web.py +++ b/DrissionPage/commons/web.py @@ -18,7 +18,9 @@ from tldextract import extract class DataPacket(object): """返回的数据包管理类""" __slots__ = ('requestId', 'request', 'response', 'rawBody', 'tab', 'target', '_requestHeaders', '_body', - '_base64_body', '_rawPostData', '_postData', + '_postData', + # cdp 原始数据 + '_raw_request', '_raw_response', '_raw_fail_info', '_rawPostData', '_rawBody', '_base64_body', 'url', 'urlFragment', 'method', 'postDataEntries', 'mixedContentType', 'initialPriority', 'referrerPolicy', 'isLinkPreload', 'trustTokenParams', 'isSameSite', @@ -35,24 +37,40 @@ class DataPacket(object): :param request_id: request id :param tab: 产生这个数据包的tab的id :param target: 监听目标 - :param raw_request: 原始request数据 + :param raw_request: 原始request数据,从cdp获得 """ self.requestId = request_id - self._raw_request = raw_request self.tab = tab self.target = target + + self._raw_request = raw_request + self._raw_response = None + self._raw_fail_info = None + self._rawPostData = None + self._rawBody = None + self._base64_body = False + self._requestHeaders = None self._postData = None self._body = None - self._base64_body = False - self._rawPostData = None - - def __getattr__(self, item): - return self.response.get(item, None) def __repr__(self): - return f'' + return f'' + @property + def reuqest(self): + pass + + @property + def response(self): + pass + + @property + def fail_info(self): + pass + + +class RequestData(object): @property def responseHeaders(self): """以大小写不敏感字典返回headers数据""" @@ -77,10 +95,6 @@ class DataPacket(object): self._postData = self._rawPostData return self._postData - def set_postData(self, val): - """设置postData,当hasPostData为True但数据太长时使用""" - self._rawPostData = val - @property def body(self): """返回body内容,如果是json格式,自动进行转换,如果时图片格式,进行base64转换,其它格式直接返回文本""" @@ -97,6 +111,14 @@ class DataPacket(object): return self._body +class ResponseData(object): + pass + + +class FailData(object): + pass + + def get_ele_txt(e): """获取元素内所有文本 :param e: 元素对象