diff --git a/DrissionPage/chromium_base.py b/DrissionPage/chromium_base.py index 1ddc43d..f42ace7 100644 --- a/DrissionPage/chromium_base.py +++ b/DrissionPage/chromium_base.py @@ -41,6 +41,7 @@ class ChromiumBase(BasePage): self._tab_obj = None self._set = None self._screencast = None + self._listener = None if isinstance(address, int) or (isinstance(address, str) and address.isdigit()): address = f'127.0.0.1:{address}' @@ -360,6 +361,13 @@ class ChromiumBase(BasePage): self._screencast = Screencast(self) return self._screencast + @property + def listener(self): + """返回用于聆听数据包的对象""" + if self._listener is None: + self._listener = NetworkListener(self) + return self._listener + def run_cdp(self, cmd, **cmd_args): """执行Chrome DevTools Protocol语句 :param cmd: 协议项目 @@ -1024,34 +1032,6 @@ class ChromiumBaseWaiter(object): sleep(gap) return False - def set_targets(self, targets=None, is_regex=False, count=None): - """指定要等待的数据包 - :param targets: 要匹配的数据包url特征,可用list等传入多个,为None时获取所有 - :param is_regex: 设置的target是否正则表达式 - :param count: 设置总共等待多少个数据包,为None时每个目标等待1个 - :return: None - """ - if not self._listener: - self._listener = NetworkListener(self._driver) - self._listener.set_targets(targets, is_regex, count=count) - self._listener.start() - - def data_packets(self, timeout=None, any_one=False): - """等待指定数据包加载完成 - :param timeout: 超时时间,为None则使用页面对象timeout - :param any_one: 多个target时,是否全部监听到才结束,为True时监听到一个目标就结束 - :return: ResponseData对象或监听结果字典 - """ - if not self._listener: - self._listener = NetworkListener(self._driver) - return self._listener.listen(timeout, any_one) - - def stop_listening(self): - """停止监听数据包""" - if not self._listener: - self._listener = NetworkListener(self._driver) - self._listener.stop() - class NetworkListener(object): def __init__(self, page): @@ -1060,17 +1040,19 @@ class NetworkListener(object): self._is_regex = False self._results = {} self._single = False + self._method = None self._requests = {} self._count = None self._caught = 0 # 已获取到的数量 self._driver = self._page.driver - def set_targets(self, targets=None, is_regex=False, count=None): + def set_targets(self, targets=None, is_regex=False, count=None, method=None): """指定要等待的数据包 :param targets: 要匹配的数据包url特征,可用list等传入多个,为None时获取所有 :param is_regex: 设置的target是否正则表达式 :param count: 设置总共等待多少个数据包,为None时每个目标等待1个 + :param method: 设置监听的请求类型,可用list等指定多个,为None时监听全部 :return: None """ if not isinstance(targets, (str, list, tuple, set)) and targets is not None: @@ -1083,26 +1065,36 @@ class NetworkListener(object): self._targets = {targets} else: self._targets = set(targets) - if count is None: - self._count = len(self._targets) + + self._count = len(self._targets) if not count else count self._single = self._count == 1 + if method is not None: + if isinstance(method, str): + self._method = {method.upper()} + elif isinstance(method, (list, tuple, set)): + self._method = set(i.upper() for i in method) + else: + raise TypeError('method参数只能是str、list、tuple、set类型。') + self.start() def start(self): - self._driver.set_listener('Fetch.requestPaused', self._request_paused) self._driver.set_listener('Network.requestWillBeSent', self._requestWillBeSent) self._driver.set_listener('Network.responseReceived', self._response_received) self._driver.set_listener('Network.loadingFinished', self._loading_finished) + self._driver.set_listener('Network.loadingFailed', self._loading_failed) self._driver.call_method('Network.enable') - self._driver.call_method('Fetch.enable', patterns=[{'requestStage': 'Request'}, {'requestStage': 'Response'}]) + # self._driver.set_listener('Fetch.requestPaused', self._request_paused) + # self._driver.call_method('Fetch.enable', patterns=[{'requestStage': 'Request'}, {'requestStage': 'Response'}]) def stop(self): """停止监听数据包""" - self._driver.set_listener('Fetch.requestPaused', None) + self._driver.call_method('Network.disable') self._driver.set_listener('Network.requestWillBeSent', None) self._driver.set_listener('Network.responseReceived', None) self._driver.set_listener('Network.loadingFinished', None) - self._driver.call_method('Fetch.disable') - self._driver.call_method('Network.disable') + self._driver.set_listener('Network.loadingFailed', None) + # self._driver.call_method('Fetch.disable') + # self._driver.set_listener('Fetch.requestPaused', None) def listen(self, timeout=None, any_one=False): """等待指定数据包加载完成 @@ -1115,9 +1107,7 @@ class NetworkListener(object): timeout = timeout if timeout is not None else self._page.timeout end_time = perf_counter() + timeout - while perf_counter() < end_time: - if self._results and (any_one or set(self._results) == self._targets): - break + while perf_counter() < end_time and not ((any_one and self._caught) or self._caught >= self._count): sleep(.1) self._requests = {} @@ -1130,8 +1120,9 @@ class NetworkListener(object): def _requestWillBeSent(self, **kwargs): """接收到请求时的回调函数""" for target in self._targets: - if (self._is_regex and search(target, kwargs['request']['url'])) or ( - not self._is_regex and target in kwargs['request']['url']): + if ((self._is_regex and search(target, kwargs['request']['url'])) or + (not self._is_regex and target in kwargs['request']['url'])) and ( + not self._method or kwargs['request']['method'] in self._method): self._requests[kwargs['requestId']] = DataPacket(self._page.tab_id, target, kwargs) if kwargs['request'].get('hasPostData', None) and not kwargs['request'].get('postData', None): @@ -1142,8 +1133,10 @@ class NetworkListener(object): def _response_received(self, **kwargs): """接收到返回信息时处理方法""" - if kwargs['requestId'] in self._requests: - self._requests[kwargs['requestId']]._raw_response = kwargs['response'] + request_id = kwargs['requestId'] + if request_id in self._requests: + self._requests[request_id]._raw_response = kwargs['response'] + self._requests[request_id]._resource_type = kwargs['type'] def _loading_finished(self, **kwargs): """请求完成时处理方法""" @@ -1169,6 +1162,22 @@ class NetworkListener(object): self._caught += 1 + def _loading_failed(self, **kwargs): + """请求失败时的回调方法""" + request_id = kwargs['requestId'] + if request_id in self._requests: + dp = self._requests[request_id] + target = dp.target + dp.errorText = kwargs['errorText'] + dp._resource_type = kwargs['type'] + + if target in self._results: + self._results[target].append(dp) + else: + self._results[target] = [dp] + + self._caught += 1 + def _request_paused(self, **kwargs): i = kwargs['requestId'] if 'networkId' not in kwargs: diff --git a/DrissionPage/chromium_base.pyi b/DrissionPage/chromium_base.pyi index 97ed3bd..f7b5fdd 100644 --- a/DrissionPage/chromium_base.pyi +++ b/DrissionPage/chromium_base.pyi @@ -42,6 +42,7 @@ class ChromiumBase(BasePage): self._wait: ChromiumBaseWaiter = ... self._set: ChromiumBaseSetter = ... self._screencast: Screencast = ... + self._listener: NetworkListener = ... def _connect_browser(self, tab_id: str = None) -> None: ... @@ -129,6 +130,9 @@ class ChromiumBase(BasePage): @property def screencast(self) -> Screencast: ... + @property + def listener(self) -> NetworkListener: ... + def run_js(self, script: str, *args: Any, as_expr: bool = False) -> Any: ... def run_js_loaded(self, script: str, *args: Any, as_expr: bool = False) -> Any: ... @@ -226,14 +230,6 @@ class ChromiumBaseWaiter(object): def upload_paths_inputted(self) -> None: ... - def set_targets(self, targets: Union[str, list, tuple, set, None] = None, is_regex: bool = False, - count: int = None) -> None: ... - - def stop_listening(self) -> None: ... - - def data_packets(self, timeout: float = None, - any_one: bool = False) -> Union[DataPacket, Dict[str, List[DataPacket]], False]: ... - class NetworkListener(object): def __init__(self, page: ChromiumBase): @@ -242,13 +238,14 @@ class NetworkListener(object): self._caught: int = ... self._targets: Union[str, dict] = ... self._single: bool = ... + self._method: set = ... self._results: Union[DataPacket, Dict[str, List[DataPacket]], False] = ... self._is_regex: bool = ... self._driver: ChromiumDriver = ... self._requests: dict = ... def set_targets(self, targets: Union[str, list, tuple, set, None] = None, is_regex: bool = False, - count: int = None) -> None: ... + count: int = None, method: Union[str, list, tuple, set] = None) -> None: ... def start(self) -> None: ... @@ -257,11 +254,13 @@ class NetworkListener(object): def listen(self, timeout: float = None, any_one: bool = False) -> Union[DataPacket, Dict[str, List[DataPacket]], False]: ... + def _requestWillBeSent(self, **kwargs) -> None: ... + def _response_received(self, **kwargs) -> None: ... def _loading_finished(self, **kwargs) -> None: ... - def _requestWillBeSent(self, **kwargs) -> None: ... + def _loading_failed(self, **kwargs) -> None: ... def _request_paused(self, **kwargs) -> None: ... diff --git a/DrissionPage/commons/browser.py b/DrissionPage/commons/browser.py index 2e0e440..7af4fd1 100644 --- a/DrissionPage/commons/browser.py +++ b/DrissionPage/commons/browser.py @@ -175,7 +175,10 @@ def _run_browser(port, path: str, args) -> Popen: p = str(p / 'chrome') if p.is_dir() else str(path) arguments = [p, f'--remote-debugging-port={port}'] arguments.extend(args) - return Popen(arguments, shell=False) + try: + return Popen(arguments, shell=False) + except FileNotFoundError: + raise FileNotFoundError('未找到浏览器,请手动指定浏览器可执行文件路径。') def _make_leave_in_dict(target_dict: dict, src: list, num: int, end: int) -> None: diff --git a/DrissionPage/commons/web.py b/DrissionPage/commons/web.py index 771c98f..9a7a9de 100644 --- a/DrissionPage/commons/web.py +++ b/DrissionPage/commons/web.py @@ -28,21 +28,16 @@ class DataPacket(object): self.target = target self._raw_request = raw_request - self._raw_response = None - self._raw_post_data = None + + self._raw_response = None self._raw_body = None self._base64_body = False self._request = None self._response = None - - # def __repr__(self): - # return f'' - # - # @property - # def requestId(self): - # return self._raw_info['requestId'] + self.errorText = None + self._resource_type = None @property def url(self): @@ -54,11 +49,11 @@ class DataPacket(object): @property def frameId(self): - return self._raw_request['frameId'] + return self._raw_request.get('frameId') - # @property - # def resourceType(self): - # return self._raw_request['resourceType'] + @property + def resourceType(self): + return self._resource_type @property def request(self): @@ -66,22 +61,24 @@ class DataPacket(object): self._request = Request(self._raw_request['request'], self._raw_post_data) return self._request - # @property - # def response(self): - # if self._response is None: - # self._response = Response(self._raw_info, self._raw_body, self._base64_body) - # return self._response + @property + def response(self): + if self._response is None: + self._response = Response(self._raw_response, self._raw_body, self._base64_body) + return self._response class Request(object): - __slots__ = ('url', 'urlFragment', 'postDataEntries', 'mixedContentType', 'initialPriority', - 'referrerPolicy', 'isLinkPreload', 'trustTokenParams', 'isSameSite', 'method', - '_request', '_raw_post_data', '_postData') + __slots__ = ('url', 'method', + # 'urlFragment', 'postDataEntries', 'mixedContentType', 'initialPriority', + # 'referrerPolicy', 'isLinkPreload', 'trustTokenParams', 'isSameSite', + '_request', '_raw_post_data', '_postData', '_headers') def __init__(self, raw_request, post_data): self._request = raw_request self._raw_post_data = post_data self._postData = None + self._headers = None def __getattr__(self, item): return self._request.get(item, None) @@ -89,7 +86,9 @@ class Request(object): @property def headers(self): """以大小写不敏感字典返回headers数据""" - return CaseInsensitiveDict(self._request['request']['headers']) + if self._headers is None: + self._headers = CaseInsensitiveDict(self._request['headers']) + return self._headers @property def postData(self): @@ -103,13 +102,13 @@ class Request(object): postData = False try: self._postData = loads(postData) - except JSONDecodeError: + except (JSONDecodeError, TypeError): self._postData = postData return self._postData class Response(object): - __slots__ = ('responseErrorReason', 'responseStatusCode', 'responseStatusText', + __slots__ = ('status', 'statusText', 'mimeType', '_response', '_raw_body', '_is_base64_body', '_body', '_headers') def __init__(self, raw_response, raw_body, base64_body): @@ -124,12 +123,9 @@ class Response(object): @property def headers(self): + """以大小写不敏感字典返回headers数据""" if self._headers is None: - if 'responseHeaders' in self._response: - headers = {i['name']: i['value'] for i in self._response['responseHeaders']} - self._headers = CaseInsensitiveDict(headers) - else: - self._headers = False + self._headers = CaseInsensitiveDict(self._response['headers']) return self._headers @property diff --git a/DrissionPage/commons/web.pyi b/DrissionPage/commons/web.pyi index 2598d90..f8b45bb 100644 --- a/DrissionPage/commons/web.pyi +++ b/DrissionPage/commons/web.pyi @@ -22,16 +22,14 @@ class DataPacket(object): self.tab: str = ... self.target: str = ... self._raw_request: dict = ... + self._raw_response: dict = ... self._raw_post_data: str = ... self._raw_body: str = ... self._base64_body: bool = ... self._request: Request = ... self._response: Response = ... - - def __repr__(self): ... - - @property - def requestId(self) -> str: ... + self.errorText: str = ... + self._resource_type: str = ... @property def url(self) -> str: ... @@ -54,15 +52,16 @@ class DataPacket(object): class Request(object): url: str = ... - urlFragment: str = ... - method:str = ... - postDataEntries: list = ... - mixedContentType: str = ... - initialPriority: str = ... - referrerPolicy: str = ... - isLinkPreload: bool = ... - trustTokenParams: dict = ... - isSameSite: bool = ... + _headers: Union[CaseInsensitiveDict, None] = ... + method: str = ... + # urlFragment: str = ... + # postDataEntries: list = ... + # mixedContentType: str = ... + # initialPriority: str = ... + # referrerPolicy: str = ... + # isLinkPreload: bool = ... + # trustTokenParams: dict = ... + # isSameSite: bool = ... def __init__(self, raw_request: dict, post_data: str): self._request: dict = ... @@ -77,9 +76,9 @@ class Request(object): class Response(object): - responseErrorReason: str = ... - responseStatusCod: int = ... - responseStatusText: str = ... + status: str = ... + statusText: int = ... + mimeType: str = ... def __init__(self, raw_response: dict, raw_body: str, base64_body: bool): self._response: dict = ...