修改抓包功能,未完成

This commit is contained in:
g1879 2023-05-04 18:20:12 +08:00
parent 13c3cf0101
commit 84bce2c7cd
5 changed files with 65 additions and 58 deletions

View File

@ -1024,15 +1024,17 @@ class ChromiumBaseWaiter(object):
sleep(gap) sleep(gap)
return False return False
def set_targets(self, targets, is_regex=False): def set_targets(self, targets=None, is_regex=False, count=None):
"""指定要等待的数据包 """指定要等待的数据包
:param targets: 要匹配的数据包url特征可用list等传入多个 :param targets: 要匹配的数据包url特征可用list等传入多个为None时获取所有
:param is_regex: 设置的target是否正则表达式 :param is_regex: 设置的target是否正则表达式
:param count: 设置总共等待多少个数据包为None时每个目标等待1个
:return: None :return: None
""" """
if not self._listener: if not self._listener:
self._listener = NetworkListener(self._driver) self._listener = NetworkListener(self._driver)
self._listener.set_targets(targets, is_regex) self._listener.set_targets(targets, is_regex, count=count)
self._listener.start()
def data_packets(self, timeout=None, any_one=False): def data_packets(self, timeout=None, any_one=False):
"""等待指定数据包加载完成 """等待指定数据包加载完成
@ -1064,25 +1066,26 @@ class NetworkListener(object):
self._caught = 0 # 已获取到的数量 self._caught = 0 # 已获取到的数量
self._driver = self._page.driver self._driver = self._page.driver
def set_targets(self, targets, is_regex=False, count=None): def set_targets(self, targets=None, is_regex=False, count=None):
"""指定要等待的数据包 """指定要等待的数据包
:param targets: 要匹配的数据包url特征可用list等传入多个 :param targets: 要匹配的数据包url特征可用list等传入多个为None时获取所有
:param is_regex: 设置的target是否正则表达式 :param is_regex: 设置的target是否正则表达式
:param count: 设置总共等待多少个数据包为None时每个目标等待1个 :param count: 设置总共等待多少个数据包为None时每个目标等待1个
:return: None :return: None
""" """
if not isinstance(targets, (str, list, tuple, set)): if not isinstance(targets, (str, list, tuple, set)) and targets is not None:
raise TypeError('targets只能是str、list、tuple、set。') raise TypeError('targets只能是str、list、tuple、set、None。')
if targets is None:
targets = ''
self._is_regex = is_regex self._is_regex = is_regex
if isinstance(targets, str): if isinstance(targets, str):
self._targets = {targets} self._targets = {targets}
self._single = True
else: else:
self._targets = set(targets) self._targets = set(targets)
self._single = False
if count is None: if count is None:
self._count = len(self._targets) self._count = len(self._targets)
self._single = self._count == 1
def start(self): def start(self):
self._driver.set_listener('Fetch.requestPaused', self._request_paused) self._driver.set_listener('Fetch.requestPaused', self._request_paused)
@ -1120,14 +1123,27 @@ class NetworkListener(object):
self._requests = {} self._requests = {}
if not self._results: if not self._results:
return False return False
r = list(self._results.values())[0] if self._single else self._results r = list(self._results.values())[0][0] if self._single else self._results
self._results = {} self._results = {}
return r return r
def _requestWillBeSent(self, **kwargs):
"""接收到请求时的回调函数"""
for target in self._targets:
if (self._is_regex and search(target, kwargs['request']['url'])) or (
not self._is_regex and target in kwargs['request']['url']):
self._requests[kwargs['requestId']] = DataPacket(self._page.tab_id, target, kwargs)
if kwargs['request'].get('hasPostData', None) and not kwargs['request'].get('postData', None):
self._requests[kwargs['requestId']]._raw_post_data = \
self._page.run_cdp('Network.getRequestPostData', requestId=kwargs['requestId'])['postData']
break
def _response_received(self, **kwargs): def _response_received(self, **kwargs):
"""接收到返回信息时处理方法""" """接收到返回信息时处理方法"""
if kwargs['requestId'] in self._requests: if kwargs['requestId'] in self._requests:
self._requests[kwargs['requestId']]['response'] = kwargs['response'] self._requests[kwargs['requestId']]._raw_response = kwargs['response']
def _loading_finished(self, **kwargs): def _loading_finished(self, **kwargs):
"""请求完成时处理方法""" """请求完成时处理方法"""
@ -1141,23 +1157,17 @@ class NetworkListener(object):
body = '' body = ''
is_base64 = False is_base64 = False
request = self._requests[request_id] dp = self._requests[request_id]
target = request['target'] target = dp.target
rd = ResponseData(request_id, request['response'], body, self._page.tab_id, target) dp._raw_body = body
rd.postData = request['post_data'] dp._base64_body = is_base64
rd._base64_body = is_base64
rd.requestHeaders = request['request_headers']
self._results[target] = rd
def _requestWillBeSent(self, **kwargs): if target in self._results:
"""接收到请求时的回调函数""" self._results[target].append(dp)
for target in self._targets: else:
if (self._is_regex and search(target, kwargs['request']['url'])) or ( self._results[target] = [dp]
not self._is_regex and target in kwargs['request']['url']):
self._requests[kwargs['requestId']] = {'target': target, self._caught += 1
'post_data': kwargs['request'].get('postData', None),
'request_headers': kwargs['request']['headers']}
break
def _request_paused(self, **kwargs): def _request_paused(self, **kwargs):
i = kwargs['requestId'] i = kwargs['requestId']

View File

@ -226,7 +226,8 @@ class ChromiumBaseWaiter(object):
def upload_paths_inputted(self) -> None: ... def upload_paths_inputted(self) -> None: ...
def set_targets(self, targets: Union[str, list, tuple, set], is_regex: bool = False) -> None: ... def set_targets(self, targets: Union[str, list, tuple, set, None] = None, is_regex: bool = False,
count: int = None) -> None: ...
def stop_listening(self) -> None: ... def stop_listening(self) -> None: ...
@ -246,7 +247,8 @@ class NetworkListener(object):
self._driver: ChromiumDriver = ... self._driver: ChromiumDriver = ...
self._requests: dict = ... self._requests: dict = ...
def set_targets(self, targets: Union[str, list, tuple, set], is_regex: bool = False, count: int = None) -> None: ... def set_targets(self, targets: Union[str, list, tuple, set, None] = None, is_regex: bool = False,
count: int = None) -> None: ...
def start(self) -> None: ... def start(self) -> None: ...

View File

@ -118,13 +118,6 @@ class ChromiumPageWaiter(ChromiumBaseWaiter):
def new_tab(self, timeout: float = None) -> bool: ... def new_tab(self, timeout: float = None) -> bool: ...
def set_targets(self, targets: Union[str, list, tuple, set], is_regex: bool = False) -> None: ...
def stop_listening(self) -> None: ...
def data_packets(self, timeout: float = None,
any_one: bool = False) -> Union[DataPacket, Dict[str, List[DataPacket]], False]: ...
class ChromiumTabRect(object): class ChromiumTabRect(object):
def __init__(self, page: ChromiumPage): def __init__(self, page: ChromiumPage):

View File

@ -18,30 +18,31 @@ from tldextract import extract
class DataPacket(object): class DataPacket(object):
"""返回的数据包管理类""" """返回的数据包管理类"""
def __init__(self, tab, target, raw_info): def __init__(self, tab, target, raw_request):
""" """
:param tab: 产生这个数据包的tab的id :param tab: 产生这个数据包的tab的id
:param target: 监听目标 :param target: 监听目标
:param raw_info: 原始request数据从cdp获得 :param raw_request: 原始request数据从cdp获得
""" """
self.tab = tab self.tab = tab
self.target = target self.target = target
self._raw_info = raw_info self._raw_request = raw_request
self._raw_post_data = None self._raw_response = None
self._raw_post_data = None
self._raw_body = None self._raw_body = None
self._base64_body = False self._base64_body = False
self._request = None self._request = None
self._response = None self._response = None
def __repr__(self): # def __repr__(self):
return f'<DataPacket target={self.target} request_id={self.requestId}>' # return f'<DataPacket target={self.target} request_id={self.requestId}>'
#
@property # @property
def requestId(self): # def requestId(self):
return self._raw_info['requestId'] # return self._raw_info['requestId']
@property @property
def url(self): def url(self):
@ -53,28 +54,28 @@ class DataPacket(object):
@property @property
def frameId(self): def frameId(self):
return self._raw_info['frameId'] return self._raw_request['frameId']
@property # @property
def resourceType(self): # def resourceType(self):
return self._raw_info['resourceType'] # return self._raw_request['resourceType']
@property @property
def request(self): def request(self):
if self._request is None: if self._request is None:
self._request = Request(self._raw_info['request'], self._raw_post_data) self._request = Request(self._raw_request['request'], self._raw_post_data)
return self._request return self._request
@property # @property
def response(self): # def response(self):
if self._response is None: # if self._response is None:
self._response = Response(self._raw_info, self._raw_body, self._base64_body) # self._response = Response(self._raw_info, self._raw_body, self._base64_body)
return self._response # return self._response
class Request(object): class Request(object):
__slots__ = ('url', 'urlFragment', 'postDataEntries', 'mixedContentType', 'initialPriority', __slots__ = ('url', 'urlFragment', 'postDataEntries', 'mixedContentType', 'initialPriority',
'referrerPolicy', 'isLinkPreload', 'trustTokenParams', 'isSameSite', 'referrerPolicy', 'isLinkPreload', 'trustTokenParams', 'isSameSite', 'method',
'_request', '_raw_post_data', '_postData') '_request', '_raw_post_data', '_postData')
def __init__(self, raw_request, post_data): def __init__(self, raw_request, post_data):

View File

@ -21,7 +21,7 @@ class DataPacket(object):
def __init__(self, tab: str, target: str, raw_info: dict): def __init__(self, tab: str, target: str, raw_info: dict):
self.tab: str = ... self.tab: str = ...
self.target: str = ... self.target: str = ...
self._raw_info: dict = ... self._raw_request: dict = ...
self._raw_post_data: str = ... self._raw_post_data: str = ...
self._raw_body: str = ... self._raw_body: str = ...
self._base64_body: bool = ... self._base64_body: bool = ...
@ -55,6 +55,7 @@ class DataPacket(object):
class Request(object): class Request(object):
url: str = ... url: str = ...
urlFragment: str = ... urlFragment: str = ...
method:str = ...
postDataEntries: list = ... postDataEntries: list = ...
mixedContentType: str = ... mixedContentType: str = ...
initialPriority: str = ... initialPriority: str = ...