4.0.0b4修复抓包问题;创建ChromiumPage可接收int表示端口号;Frame _reload()时关闭旧连接;修复driver阻断问题;优化连接读取doc逻辑

This commit is contained in:
g1879 2023-10-31 15:20:45 +08:00
parent b5f2e28e32
commit b8382e3e5f
7 changed files with 57 additions and 57 deletions

View File

@ -74,12 +74,12 @@ class ChromiumDriver(object):
while not self._stopped.is_set():
try:
return self.method_results[message['id']].get(.2)
return self.method_results[message['id']].get(timeout=.2)
except Empty:
if self.alert_flag:
self.alert_flag = False
return {'error': {'message': 'alert exists.'}}
return {'result': {'message': 'alert exists.'}}
if timeout is not None and perf_counter() > timeout:
return {'error': {'message': 'timeout'}}
@ -155,7 +155,7 @@ class ChromiumDriver(object):
if self._stopped.is_set():
return {'error': 'tab closed', 'type': 'tab_closed'}
timeout = kwargs.pop("_timeout", 30)
timeout = kwargs.pop("_timeout", 20)
result = self._send({"method": _method, "params": kwargs}, timeout=timeout)
if result is None:
return {'error': 'tab closed', 'type': 'tab_closed'}

View File

@ -85,7 +85,6 @@ class ChromiumBase(BasePage):
if not tab_id:
json = get(f'http://{self.address}/json', headers={'Connection': 'close'}).json()
tab_id = [i['id'] for i in json if i['type'] == 'page']
if not tab_id:
raise BrowserConnectError('浏览器连接失败,可能是浏览器版本原因。')
@ -159,6 +158,7 @@ class ChromiumBase(BasePage):
"""页面开始加载时执行"""
self.browser._frames[kwargs['frameId']] = self.tab_id
if kwargs['frameId'] == self._frame_id:
self._doc_got = False
self._ready_state = 'loading'
self._is_loading = True
if self.page_load_strategy == 'eager':
@ -171,6 +171,7 @@ class ChromiumBase(BasePage):
def _onFrameNavigated(self, **kwargs):
"""页面跳转时执行"""
if kwargs['frame']['id'] == self._frame_id:
self._doc_got = False
self._ready_state = 'loading'
self._is_loading = True
if self._debug:
@ -200,7 +201,6 @@ class ChromiumBase(BasePage):
if self._debug:
print(f'FrameStoppedLoading {kwargs}')
self._get_document()
self._doc_got = False
def _onFileChooserOpened(self, **kwargs):
"""文件选择框打开时执行"""

View File

@ -98,6 +98,7 @@ class ChromiumFrame(ChromiumBase):
"""重新获取document"""
debug = self._debug
d_debug = self.driver._debug
old_driver = self.driver
if debug:
print('重新获取document')
@ -131,6 +132,7 @@ class ChromiumFrame(ChromiumBase):
else:
raise GetDocumentError
old_driver.stop()
self.wait.load_complete()
def _check_ok(self):
@ -186,14 +188,8 @@ class ChromiumFrame(ChromiumBase):
def _onInspectorDetached(self, **kwargs):
self._is_loading = True
# print('reload')
self._reload()
# def _onFrameDetached(self, **kwargs):
# if kwargs['frameId'] == self.frame_id:
# self._is_loading = True
# self._reload()
@property
def page(self):
return self._page

View File

@ -24,7 +24,7 @@ class ChromiumPage(ChromiumBase):
def __init__(self, addr_or_opts=None, tab_id=None, timeout=None, addr_driver_opts=None):
"""
:param addr_or_opts: 浏览器地址:端口或ChromiumOptions对象
:param addr_or_opts: 浏览器地址:端口ChromiumOptions对象或端口数字int
:param tab_id: 要控制的标签页id不指定默认为激活的
:param timeout: 超时时间
"""
@ -48,11 +48,14 @@ class ChromiumPage(ChromiumBase):
elif isinstance(addr_or_opts, ChromiumOptions):
self._driver_options = addr_or_opts
# 接收浏览器地址和端口
elif isinstance(addr_or_opts, str):
self._driver_options = ChromiumOptions()
self._driver_options.set_debugger_address(addr_or_opts)
elif isinstance(addr_or_opts, int):
self._driver_options = ChromiumOptions()
self._driver_options.set_local_port(addr_or_opts)
else:
raise TypeError('只能接收ip:port格式或ChromiumOptions类型参数。')
@ -186,7 +189,8 @@ class ChromiumPage(ChromiumBase):
:param switch_to: 新建标签页后是否把焦点移过去
:return: switch_to为False时返回新标签页对象否则返回当前对象
"""
return self if switch_to else ChromiumTab(self, self._new_tab(url, switch_to))
tid = self._new_tab(url, switch_to)
return self if switch_to else ChromiumTab(self, tid)
def to_main_tab(self):
"""跳转到主标签页"""

View File

@ -26,6 +26,7 @@ class NetworkListener(object):
self._caught = None # 临存捕捉到的数据
self._request_ids = None # 暂存须要拦截的请求id
self._extra_info_ids = None
self.listening = False
self._targets = None # 默认监听所有
@ -81,6 +82,7 @@ class NetworkListener(object):
self.listening = True
self._request_ids = {}
self._extra_info_ids = {}
self._caught = Queue(maxsize=0)
self._set_callback()
@ -174,6 +176,7 @@ class NetworkListener(object):
def clear(self):
"""清空结果"""
self._request_ids = {}
self._extra_info_ids = {}
self._caught.queue.clear()
def _set_callback(self):
@ -188,59 +191,43 @@ class NetworkListener(object):
def _requestWillBeSent(self, **kwargs):
"""接收到请求时的回调函数"""
if not self._targets:
packet = self._request_ids.setdefault(kwargs['requestId'], DataPacket(self._page.tab_id, None))
packet._raw_request = kwargs
rid = kwargs['requestId']
p = self._request_ids.setdefault(rid, DataPacket(self._page.tab_id, None))
p._raw_request = kwargs
if kwargs['request'].get('hasPostData', None) and not kwargs['request'].get('postData', None):
packet._raw_post_data = self._driver.call_method('Network.getRequestPostData',
requestId=kwargs['requestId'])['postData']
p._raw_post_data = self._driver.call_method('Network.getRequestPostData', requestId=rid)['postData']
return
rid = kwargs['requestId']
for target in self._targets:
if ((self._is_regex and search(target, kwargs['request']['url'])) or
(not self._is_regex and target in kwargs['request']['url'])) and (
not self._method or kwargs['request']['method'] in self._method):
packet = self._request_ids.setdefault(kwargs['requestId'], DataPacket(self._page.tab_id, None))
packet._raw__request = kwargs
p = self._request_ids.setdefault(rid, DataPacket(self._page.tab_id, None))
p._raw_request = kwargs
if kwargs['request'].get('hasPostData', None) and not kwargs['request'].get('postData', None):
packet._raw_post_data = self._driver.call_method('Network.getRequestPostData',
requestId=kwargs['requestId'])['postData']
p._raw_post_data = self._driver.call_method('Network.getRequestPostData', requestId=rid)['postData']
break
def _requestWillBeSentExtraInfo(self, **kwargs):
if not self._targets:
packet = self._request_ids.setdefault(kwargs['requestId'], DataPacket(self._page.tab_id, None))
packet._requestExtraInfo = kwargs
return
for target in self._targets:
if ((self._is_regex and search(target, kwargs['request']['url'])) or
(not self._is_regex and target in kwargs['request']['url'])) and (
not self._method or kwargs['request']['method'] in self._method):
packet = self._request_ids.setdefault(kwargs['requestId'], DataPacket(self._page.tab_id, None))
packet._requestExtraInfo = kwargs
break
self._extra_info_ids.setdefault(kwargs['requestId'], {})['request'] = kwargs
def _response_received(self, **kwargs):
"""接收到返回信息时处理方法"""
request = self._request_ids.get(kwargs['requestId'])
request = self._request_ids.get(kwargs['requestId'], None)
if request:
request._raw_response = kwargs['response']
request._resource_type = kwargs['type']
def _responseReceivedExtraInfo(self, **kwargs):
request = self._request_ids.get(kwargs['requestId'])
if request:
request._responseExtraInfo = kwargs
self._extra_info_ids[kwargs['requestId']]['response'] = kwargs
def _loading_finished(self, **kwargs):
"""请求完成时处理方法"""
request_id = kwargs['requestId']
dp = self._request_ids.get(request_id)
r_id = kwargs['requestId']
dp = self._request_ids.get(r_id)
if dp:
r = self._driver.call_method('Network.getResponseBody', requestId=request_id)
r = self._driver.call_method('Network.getResponseBody', requestId=r_id)
if 'body' in r:
dp._raw_body = r['body']
dp._base64_body = r['base64Encoded']
@ -248,25 +235,37 @@ class NetworkListener(object):
dp._raw_body = ''
dp._base64_body = False
ei = self._extra_info_ids.get(r_id, None)
if ei:
dp._requestExtraInfo = ei.get('request', None)
dp._responseExtraInfo = ei.get('response', None)
self._caught.put(dp)
try:
self._request_ids.pop(request_id)
except:
pass
try:
self._request_ids.pop(r_id)
self._extra_info_ids.pop(r_id)
except:
pass
def _loading_failed(self, **kwargs):
"""请求失败时的回调方法"""
request_id = kwargs['requestId']
if request_id in self._request_ids:
dp = self._request_ids[request_id]
r_id = kwargs['requestId']
dp = self._request_ids.get(r_id, None)
if dp:
dp.errorText = kwargs['errorText']
dp._resource_type = kwargs['type']
ei = self._extra_info_ids.get(r_id, None)
if ei:
dp._requestExtraInfo = ei.get('request', None)
dp._responseExtraInfo = ei.get('response', None)
self._caught.put(dp)
try:
self._request_ids.pop(request_id)
except:
pass
try:
self._request_ids.pop(r_id)
self._extra_info_ids.pop(r_id)
except:
pass
class DataPacket(object):

View File

@ -21,6 +21,7 @@ class NetworkListener(object):
self._is_regex: bool = ...
self._driver: ChromiumDriver = ...
self._request_ids: dict = ...
self._extra_info_ids: dict = ...
self.listening: bool = ...
@property

View File

@ -6,7 +6,7 @@ with open("README.md", "r", encoding='utf-8') as fh:
setup(
name="DrissionPage",
version="4.0.0b3",
version="4.0.0b4",
author="g1879",
author_email="g1879@qq.com",
description="Python based web automation tool. It can control the browser and send and receive data packets.",