继续修改抓包功能,未完成

This commit is contained in:
g1879 2023-04-24 19:29:26 +08:00
parent 9d24bf908b
commit cefb94515e
3 changed files with 72 additions and 38 deletions

View File

@ -1116,7 +1116,7 @@ class NetworkListener(object):
break
sleep(.1)
if not self._results:
if self._caught == 0:
r = False
else:
# todo
@ -1127,10 +1127,23 @@ class NetworkListener(object):
self._caught = 0
return r
def _requestWillBeSent(self, **kwargs):
"""接收到请求时的回调函数"""
for target in self._targets:
if (self._is_regex and search(target, kwargs['request']['url'])) or (
not self._is_regex and target in kwargs['request']['url']):
self._requests[kwargs['requestId']] = DataPacket(kwargs['requestId'], self._page.tab_id, target, kwargs)
if kwargs['request'].get('hasPostData', None) and not kwargs['request'].get('postData', None):
self._requests[kwargs['requestId']]._rawPostData \
= self._page.run_cdp('Network.getRequestPostData', requestId=kwargs['requestId'])['postData']
break
def _response_received(self, **kwargs):
"""接收到返回信息时处理方法"""
if kwargs['requestId'] in self._requests:
self._requests[kwargs['requestId']]['response'] = kwargs['response']
self._requests[kwargs['requestId']]._raw_response = kwargs
def _loading_finished(self, **kwargs):
"""请求完成时处理方法"""
@ -1144,31 +1157,29 @@ class NetworkListener(object):
body = ''
is_base64 = False
request = self._requests[request_id]
target = request['target']
rd = ResponseData(request_id, request['response'], body, self._page.tab_id, target)
rd.postData = request['post_data']
rd._base64_body = is_base64
rd.requestHeaders = request['request_headers']
rd.method = request['method']
self._results[target] = rd
data_packet = self._requests[request_id]
data_packet._rowBody = body
data_packet._base64_body = is_base64
if data_packet.target in self._results:
self._results[data_packet.target].append(data_packet)
else:
self._results[data_packet.target] = [data_packet]
self._caught += 1
def _requestWillBeSent(self, **kwargs):
"""接收到请求时的回调函数"""
for target in self._targets:
if (self._is_regex and search(target, kwargs['request']['url'])) or (
not self._is_regex and target in kwargs['request']['url']):
# self._requests[kwargs['requestId']] = {'target': target,
# 'post_data': kwargs['request'].get('postData', None),
# 'request_headers': kwargs['request']['headers'],
# 'method': kwargs['request']['method']}
self._requests[kwargs['requestId']] = DataPacket(kwargs['requestId'], self._page.tab_id, target, kwargs)
if kwargs['request'].get('hasPostData', None) and not kwargs['request'].get('postData', None):
pd = self._page.run_cdp('Network.getRequestPostData', requestId=kwargs['requestId'])['postData']
self._requests[kwargs['requestId']].
break
def _loading_failed(self, **kwargs):
"""请求失败时的处理方法"""
if kwargs['requestId'] in self._requests:
data_packet = self._requests[kwargs['requestId']]
data_packet._raw_fail_info = kwargs
if data_packet.target in self._results:
self._results[data_packet.target].append(data_packet)
else:
self._results[data_packet.target] = [data_packet]
self._caught += 1
class ChromiumPageScroll(ChromiumScroll):

View File

@ -16,6 +16,7 @@ from .chromium_driver import ChromiumDriver
from .chromium_element import ChromiumElement, ChromiumScroll
from .chromium_frame import ChromiumFrame
from .commons.constants import NoneElement
from .commons.web import DataPacket
from .session_element import SessionElement
@ -248,7 +249,7 @@ class NetworkListener(object):
self._caught: int = ...
self._targets: Union[str, dict] = ...
self._single: bool = ...
self._results: Union[ResponseData, Dict[str, ResponseData], False] = ...
self._results: Union[ResponseData, Dict[str, DataPacket], False] = ...
self._is_regex: bool = ...
self._requests: dict = ...

View File

@ -18,7 +18,9 @@ from tldextract import extract
class DataPacket(object):
"""返回的数据包管理类"""
__slots__ = ('requestId', 'request', 'response', 'rawBody', 'tab', 'target', '_requestHeaders', '_body',
'_base64_body', '_rawPostData', '_postData',
'_postData',
# cdp 原始数据
'_raw_request', '_raw_response', '_raw_fail_info', '_rawPostData', '_rawBody', '_base64_body',
'url', 'urlFragment', 'method', 'postDataEntries', 'mixedContentType', 'initialPriority',
'referrerPolicy', 'isLinkPreload', 'trustTokenParams', 'isSameSite',
@ -35,24 +37,40 @@ class DataPacket(object):
:param request_id: request id
:param tab: 产生这个数据包的tab的id
:param target: 监听目标
:param raw_request: 原始request数据
:param raw_request: 原始request数据从cdp获得
"""
self.requestId = request_id
self._raw_request = raw_request
self.tab = tab
self.target = target
self._raw_request = raw_request
self._raw_response = None
self._raw_fail_info = None
self._rawPostData = None
self._rawBody = None
self._base64_body = False
self._requestHeaders = None
self._postData = None
self._body = None
self._base64_body = False
self._rawPostData = None
def __getattr__(self, item):
return self.response.get(item, None)
def __repr__(self):
return f'<ResponseData target={self.target} request_id={self.requestId}>'
return f'<DataPacket target={self.target} request_id={self.requestId}>'
@property
def reuqest(self):
pass
@property
def response(self):
pass
@property
def fail_info(self):
pass
class RequestData(object):
@property
def responseHeaders(self):
"""以大小写不敏感字典返回headers数据"""
@ -77,10 +95,6 @@ class DataPacket(object):
self._postData = self._rawPostData
return self._postData
def set_postData(self, val):
"""设置postData当hasPostData为True但数据太长时使用"""
self._rawPostData = val
@property
def body(self):
"""返回body内容如果是json格式自动进行转换如果时图片格式进行base64转换其它格式直接返回文本"""
@ -97,6 +111,14 @@ class DataPacket(object):
return self._body
class ResponseData(object):
pass
class FailData(object):
pass
def get_ele_txt(e):
"""获取元素内所有文本
:param e: 元素对象