4.0.0b1对连接逻辑进行完全重构,'none'模式不主动停止网页且无视timeout;wait.new_tab()成功时返回id

This commit is contained in:
g1879 2023-10-30 01:57:18 +08:00
parent 341591edc6
commit 368665df57
11 changed files with 227 additions and 262 deletions

View File

@ -367,7 +367,7 @@ class BasePage(BaseParser):
self.retry_times = 3 self.retry_times = 3
self.retry_interval = 2 self.retry_interval = 2
self._DownloadKit = None self._DownloadKit = None
self._download_path = str(Path('../..').absolute()) self._download_path = str(Path('.').absolute())
@property @property
def title(self): def title(self):

View File

@ -6,7 +6,7 @@
from json import dumps, loads from json import dumps, loads
from queue import Queue, Empty from queue import Queue, Empty
from threading import Thread, Event from threading import Thread, Event
from time import perf_counter, sleep from time import perf_counter
from requests import get from requests import get
from websocket import WebSocketTimeoutException, WebSocketException, WebSocketConnectionClosedException, \ from websocket import WebSocketTimeoutException, WebSocketException, WebSocketConnectionClosedException, \
@ -56,8 +56,8 @@ class ChromiumDriver(object):
message_json = dumps(message) message_json = dumps(message)
if self._debug: if self._debug:
if self._debug is True or ( if self._debug is True or (isinstance(self._debug, str) and
isinstance(self._debug, str) and message.get('method', '').startswith(self._debug)): message.get('method', '').startswith(self._debug)):
print(f'发> {message_json}') print(f'发> {message_json}')
elif isinstance(self._debug, (list, tuple, set)): elif isinstance(self._debug, (list, tuple, set)):
for m in self._debug: for m in self._debug:
@ -74,17 +74,16 @@ class ChromiumDriver(object):
while not self._stopped.is_set(): while not self._stopped.is_set():
try: try:
return self.method_results[message['id']].get_nowait() return self.method_results[message['id']].get(.2)
except Empty: except Empty:
if self.alert_flag: if self.alert_flag:
self.alert_flag = False self.alert_flag = False
return {'result': []} return {'error': {'message': 'alert exists.'}}
if timeout is not None and perf_counter() > timeout: if timeout is not None and perf_counter() > timeout:
return {'error': {'message': 'timeout'}} return {'error': {'message': 'timeout'}}
sleep(.02)
continue continue
except Exception: except Exception:
@ -138,7 +137,11 @@ class ChromiumDriver(object):
function = self.event_handlers.get(event['method']) function = self.event_handlers.get(event['method'])
if function: if function:
if self._debug:
print(f'开始执行 {function.__name__}')
function(**event['params']) function(**event['params'])
if self._debug:
print(f'执行 {function.__name__}完毕')
self.event_queue.task_done() self.event_queue.task_done()

View File

@ -376,7 +376,7 @@ class ChromiumElement(DrissionElement):
def run_js(self, script, *args, as_expr=False): def run_js(self, script, *args, as_expr=False):
"""对本元素执行javascript代码 """对本元素执行javascript代码
:param script: js文本 :param script: js文本
:param args: 参数按顺序在js文本中对应argument[0]argument[1]... :param args: 参数按顺序在js文本中对应arguments[0]arguments[1]...
:param as_expr: 是否作为表达式运行为True时args无效 :param as_expr: 是否作为表达式运行为True时args无效
:return: 运行的结果 :return: 运行的结果
""" """
@ -385,7 +385,7 @@ class ChromiumElement(DrissionElement):
def run_async_js(self, script, *args, as_expr=False): def run_async_js(self, script, *args, as_expr=False):
"""以异步方式对本元素执行javascript代码 """以异步方式对本元素执行javascript代码
:param script: js文本 :param script: js文本
:param args: 参数按顺序在js文本中对应argument[0]argument[1]... :param args: 参数按顺序在js文本中对应arguments[0]arguments[1]...
:param as_expr: 是否作为表达式运行为True时args无效 :param as_expr: 是否作为表达式运行为True时args无效
:return: None :return: None
""" """
@ -841,7 +841,7 @@ class ChromiumShadowRoot(BaseElement):
def run_js(self, script, *args, as_expr=False): def run_js(self, script, *args, as_expr=False):
"""运行javascript代码 """运行javascript代码
:param script: js文本 :param script: js文本
:param args: 参数按顺序在js文本中对应argument[0]argument[1]... :param args: 参数按顺序在js文本中对应arguments[0]arguments[1]...
:param as_expr: 是否作为表达式运行为True时args无效 :param as_expr: 是否作为表达式运行为True时args无效
:return: 运行的结果 :return: 运行的结果
""" """
@ -850,7 +850,7 @@ class ChromiumShadowRoot(BaseElement):
def run_async_js(self, script, *args, as_expr=False): def run_async_js(self, script, *args, as_expr=False):
"""以异步方式执行js代码 """以异步方式执行js代码
:param script: js文本 :param script: js文本
:param args: 参数按顺序在js文本中对应argument[0]argument[1]... :param args: 参数按顺序在js文本中对应arguments[0]arguments[1]...
:param as_expr: 是否作为表达式运行为True时args无效 :param as_expr: 是否作为表达式运行为True时args无效
:return: None :return: None
""" """
@ -1042,9 +1042,9 @@ class ChromiumShadowRoot(BaseElement):
loc = loc[0], loc[1][5:] loc = loc[0], loc[1][5:]
timeout = timeout if timeout is not None else self.page.timeout timeout = timeout if timeout is not None else self.page.timeout
t1 = perf_counter() end_time = perf_counter() + timeout
eles = make_session_ele(self.html).eles(loc) eles = make_session_ele(self.html).eles(loc)
while not eles and perf_counter() - t1 <= timeout: while not eles and perf_counter() <= end_time:
eles = make_session_ele(self.html).eles(loc) eles = make_session_ele(self.html).eles(loc)
if not eles: if not eles:
@ -1299,7 +1299,7 @@ def run_js(page_or_ele, script, as_expr=False, timeout=None, args=None):
:param script: js文本 :param script: js文本
:param as_expr: 是否作为表达式运行为True时args无效 :param as_expr: 是否作为表达式运行为True时args无效
:param timeout: 超时时间 :param timeout: 超时时间
:param args: 参数按顺序在js文本中对应argument[0]argument[1]... :param args: 参数按顺序在js文本中对应arguments[0]arguments[1]...
:return: js执行结果 :return: js执行结果
""" """
if isinstance(page_or_ele, (ChromiumElement, ChromiumShadowRoot)): if isinstance(page_or_ele, (ChromiumElement, ChromiumShadowRoot)):
@ -1701,7 +1701,8 @@ class ChromiumScroll(object):
x = r['layoutViewport']['pageX'] x = r['layoutViewport']['pageX']
y = r['layoutViewport']['pageY'] y = r['layoutViewport']['pageY']
while True: end_time = perf_counter() + self._driver.page.timeout
while perf_counter() < end_time:
sleep(.1) sleep(.1)
r = page.run_cdp('Page.getLayoutMetrics') r = page.run_cdp('Page.getLayoutMetrics')
x1 = r['layoutViewport']['pageX'] x1 = r['layoutViewport']['pageX']

View File

@ -24,8 +24,8 @@ from .._units.network_listener import NetworkListener
from .._units.screencast import Screencast from .._units.screencast import Screencast
from .._units.setter import ChromiumBaseSetter from .._units.setter import ChromiumBaseSetter
from .._units.waiter import ChromiumBaseWaiter from .._units.waiter import ChromiumBaseWaiter
from ..errors import ContextLossError, ElementLossError, AlertExistsError, CDPError, TabClosedError, \ from ..errors import (ContextLossError, ElementLossError, CDPError, TabClosedError, NoRectError, BrowserConnectError,
NoRectError, BrowserConnectError, GetDocumentError AlertExistsError)
class ChromiumBase(BasePage): class ChromiumBase(BasePage):
@ -41,14 +41,14 @@ class ChromiumBase(BasePage):
self._is_loading = None self._is_loading = None
self._root_id = None # object id self._root_id = None # object id
self._debug = False self._debug = False
self._debug_recorder = None
self._set = None self._set = None
self._screencast = None self._screencast = None
self._actions = None self._actions = None
self._listener = None self._listener = None
self._has_alert = False self._has_alert = False
self._ready_state = None
self._download_path = str(Path('../..').absolute()) self._download_path = str(Path('.').absolute())
if isinstance(address, int) or (isinstance(address, str) and address.isdigit()): if isinstance(address, int) or (isinstance(address, str) and address.isdigit()):
address = f'127.0.0.1:{address}' address = f'127.0.0.1:{address}'
@ -76,7 +76,6 @@ class ChromiumBase(BasePage):
:param tab_id: 要控制的标签页id不指定默认为激活的 :param tab_id: 要控制的标签页id不指定默认为激活的
:return: None :return: None
""" """
self._first_run = True
self._is_reading = False self._is_reading = False
self._upload_list = None self._upload_list = None
self._wait = None self._wait = None
@ -89,9 +88,15 @@ class ChromiumBase(BasePage):
if not tab_id: if not tab_id:
raise BrowserConnectError('浏览器连接失败,可能是浏览器版本原因。') raise BrowserConnectError('浏览器连接失败,可能是浏览器版本原因。')
tab_id = tab_id[0] tab_id = tab_id[0]
self._driver_init(tab_id) self._driver_init(tab_id)
if self.ready_state == 'complete' and self._ready_state is None:
self._get_document() self._get_document()
self._first_run = False self._ready_state = 'complete'
r = self.run_cdp('Page.getFrameTree')
for i in findall(r"'id': '(.*?)'", str(r)):
self.browser._frames[i] = self.tab_id
def _driver_init(self, tab_id): def _driver_init(self, tab_id):
"""新建页面、页面刷新、切换标签页后要进行的cdp参数初始化 """新建页面、页面刷新、切换标签页后要进行的cdp参数初始化
@ -99,6 +104,7 @@ class ChromiumBase(BasePage):
:return: None :return: None
""" """
self._is_loading = True self._is_loading = True
self._frame_id = tab_id
self._driver = ChromiumDriver(tab_id=tab_id, tab_type='page', address=self.address) self._driver = ChromiumDriver(tab_id=tab_id, tab_type='page', address=self.address)
self._alert = Alert() self._alert = Alert()
self._driver.set_listener('Page.javascriptDialogOpening', self._on_alert_open) self._driver.set_listener('Page.javascriptDialogOpening', self._on_alert_open)
@ -108,59 +114,20 @@ class ChromiumBase(BasePage):
self._driver.call_method('Page.enable') self._driver.call_method('Page.enable')
self._driver.call_method('Emulation.setFocusEmulationEnabled', enabled=True) self._driver.call_method('Emulation.setFocusEmulationEnabled', enabled=True)
self._driver.set_listener('Page.frameStoppedLoading', self._onFrameStoppedLoading)
self._driver.set_listener('Page.frameStartedLoading', self._onFrameStartedLoading) self._driver.set_listener('Page.frameStartedLoading', self._onFrameStartedLoading)
self._driver.set_listener('DOM.documentUpdated', self._onDocumentUpdated)
self._driver.set_listener('Page.loadEventFired', self._onLoadEventFired)
self._driver.set_listener('Page.frameNavigated', self._onFrameNavigated) self._driver.set_listener('Page.frameNavigated', self._onFrameNavigated)
self._driver.set_listener('Page.domContentEventFired', self._onDomContentEventFired)
self._driver.set_listener('Page.loadEventFired', self._onLoadEventFired)
self._driver.set_listener('Page.frameStoppedLoading', self._onFrameStoppedLoading)
self._driver.set_listener('Page.frameAttached', self._onFrameAttached) self._driver.set_listener('Page.frameAttached', self._onFrameAttached)
self._driver.set_listener('Page.frameDetached', self._onFrameDetached) self._driver.set_listener('Page.frameDetached', self._onFrameDetached)
def _get_document(self): def _get_document(self):
"""刷新cdp使用的document数据"""
if self._is_reading: if self._is_reading:
return return
self._is_reading = True self._is_reading = True
if self._debug:
print('获取document')
if self._debug_recorder:
self._debug_recorder.add_data((perf_counter(), '获取document', '开始'))
try: # 遇到过网站在标签页关闭时触发读取文档导致错误,屏蔽掉
self._wait_loaded()
except TabClosedError:
return
end_time = perf_counter() + 10
while perf_counter() < end_time:
try:
b_id = self.run_cdp('DOM.getDocument')['root']['backendNodeId'] b_id = self.run_cdp('DOM.getDocument')['root']['backendNodeId']
self._root_id = self.run_cdp('DOM.resolveNode', backendNodeId=b_id)['object']['objectId'] self._root_id = self.run_cdp('DOM.resolveNode', backendNodeId=b_id)['object']['objectId']
if self._debug_recorder:
self._debug_recorder.add_data((perf_counter(), '信息', f'root_id{self._root_id}'))
break
except CDPError as e:
err = e
if self._debug:
print('重试获取document')
if self._debug_recorder:
self._debug_recorder.add_data((perf_counter(), 'err', '读取root_id出错'))
sleep(.1)
else:
txt = f'请检查是否创建了过多页面对象同时操作浏览器。\n如无法解决,请把以下信息报告作者。\n{err._info}\n' \
f'报告网址https://gitee.com/g1879/DrissionPage/issues'
raise GetDocumentError(txt)
if self._debug:
print('获取document结束')
if self._debug_recorder:
self._debug_recorder.add_data((perf_counter(), '获取document', '结束'))
r = self.run_cdp('Page.getFrameTree') r = self.run_cdp('Page.getFrameTree')
for i in findall(r"'id': '(.*?)'", str(r)): for i in findall(r"'id': '(.*?)'", str(r)):
self.browser._frames[i] = self.tab_id self.browser._frames[i] = self.tab_id
@ -173,25 +140,18 @@ class ChromiumBase(BasePage):
:param timeout: 超时时间 :param timeout: 超时时间
:return: 是否成功超时返回False :return: 是否成功超时返回False
""" """
timeout = timeout if timeout is not None else self.timeouts.page_load if self.page_load_strategy == 'none':
return True
timeout = timeout if timeout is not None else self.timeouts.page_load
end_time = perf_counter() + timeout end_time = perf_counter() + timeout
while perf_counter() < end_time: while perf_counter() < end_time:
state = self.ready_state if self._ready_state == 'complete':
if state is None: # 存在alert的情况
return None
if self._debug_recorder:
self._debug_recorder.add_data((perf_counter(), 'waiting', state))
if state == 'complete':
return True return True
elif self.page_load_strategy == 'eager' and state in ('interactive', 'complete'): elif self.page_load_strategy == 'eager' and self._ready_state in ('interactive', 'complete'):
self.stop_loading()
return True
elif self.page_load_strategy == 'none':
self.stop_loading() self.stop_loading()
return True return True
sleep(.1) sleep(.1)
self.stop_loading() self.stop_loading()
@ -209,50 +169,44 @@ class ChromiumBase(BasePage):
def _onFrameStartedLoading(self, **kwargs): def _onFrameStartedLoading(self, **kwargs):
"""页面开始加载时执行""" """页面开始加载时执行"""
self.browser._frames[kwargs['frameId']] = self.tab_id self.browser._frames[kwargs['frameId']] = self.tab_id
if kwargs['frameId'] == self._target_id: if kwargs['frameId'] == self._frame_id:
self._ready_state = 'loading'
self._is_loading = True self._is_loading = True
if self._debug: if self._debug:
print('页面开始加载 FrameStartedLoading') print(f'frameStartedLoading {kwargs}')
if self._debug_recorder:
self._debug_recorder.add_data((perf_counter(), '加载流程', 'FrameStartedLoading')) def _onFrameNavigated(self, **kwargs):
"""页面跳转时执行"""
if kwargs['frame']['id'] == self._frame_id:
self._ready_state = 'loading'
self._is_loading = True
if self._debug:
print(f'FrameNavigated {kwargs}')
def _onDomContentEventFired(self, **kwargs):
"""在页面刷新、变化后重新读取页面内容"""
self._ready_state = 'interactive'
if self.page_load_strategy == 'eager':
self.run_cdp('Page.stopLoading')
if self._debug:
print(f'DomContentEventFired {kwargs}')
def _onLoadEventFired(self, **kwargs):
"""在页面刷新、变化后重新读取页面内容"""
self._ready_state = 'complete'
if self._debug:
print(f'LoadEventFired {kwargs}')
# self._get_document()
def _onFrameStoppedLoading(self, **kwargs): def _onFrameStoppedLoading(self, **kwargs):
"""页面加载完成后执行""" """页面加载完成后执行"""
self.browser._frames[kwargs['frameId']] = self.tab_id self.browser._frames[kwargs['frameId']] = self.tab_id
if kwargs['frameId'] == self._target_id and self._first_run is False and self._is_loading: if kwargs['frameId'] == self._frame_id:
self._ready_state = 'complete'
if self._debug: if self._debug:
print('页面停止加载 FrameStoppedLoading') print(f'FrameStoppedLoading {kwargs}')
if self._debug_recorder:
self._debug_recorder.add_data((perf_counter(), '加载流程', 'FrameStoppedLoading'))
self._get_document() self._get_document()
def _onLoadEventFired(self, **kwargs):
"""在页面刷新、变化后重新读取页面内容"""
if self._debug:
print('loadEventFired')
if self._debug_recorder:
self._debug_recorder.add_data((perf_counter(), '加载流程', 'loadEventFired'))
self._get_document()
def _onDocumentUpdated(self, **kwargs):
"""页面跳转时执行"""
if self._debug:
print('documentUpdated')
if self._debug_recorder:
self._debug_recorder.add_data((perf_counter(), '加载流程', 'documentUpdated'))
def _onFrameNavigated(self, **kwargs):
"""页面跳转时执行"""
if kwargs['frame'].get('parentId', None) == self._target_id and self._first_run is False and self._is_loading:
self._is_loading = True
if self._debug:
print('navigated')
if self._debug_recorder:
self._debug_recorder.add_data((perf_counter(), '加载流程', 'navigated'))
def _onFileChooserOpened(self, **kwargs): def _onFileChooserOpened(self, **kwargs):
"""文件选择框打开时执行""" """文件选择框打开时执行"""
if self._upload_list: if self._upload_list:
@ -344,14 +298,13 @@ class ChromiumBase(BasePage):
@property @property
def ready_state(self): def ready_state(self):
"""返回当前页面加载状态,'loading' 'interactive' 'complete'有弹出框时返回None""" """返回当前页面加载状态,'loading' 'interactive' 'complete''timeout' 表示可能有弹出框"""
while True:
try: try:
return self.run_cdp('Runtime.evaluate', expression='document.readyState;')['result']['value'] return self.run_cdp('Runtime.evaluate', expression='document.readyState;', _timeout=3)['result']['value']
except (AlertExistsError, TypeError):
return None
except ContextLossError: except ContextLossError:
continue return None
except TimeoutError:
return 'timeout'
@property @property
def size(self): def size(self):
@ -439,9 +392,6 @@ class ChromiumBase(BasePage):
:param cmd_args: 参数 :param cmd_args: 参数
:return: 执行的结果 :return: 执行的结果
""" """
# if self.driver.has_alert and cmd != HANDLE_ALERT_METHOD:
# raise AlertExistsError
r = self.driver.call_method(cmd, **cmd_args) r = self.driver.call_method(cmd, **cmd_args)
if ERROR not in r: if ERROR not in r:
return r return r
@ -455,8 +405,10 @@ class ChromiumBase(BasePage):
raise ElementLossError raise ElementLossError
elif error == 'tab closed': elif error == 'tab closed':
raise TabClosedError raise TabClosedError
elif error == 'alert exists': elif error == 'timeout':
pass raise TimeoutError
elif error == 'alert exists.':
raise AlertExistsError
elif error in ('Node does not have a layout object', 'Could not compute box model.'): elif error in ('Node does not have a layout object', 'Could not compute box model.'):
raise NoRectError raise NoRectError
elif r['type'] == 'call_method_error': elif r['type'] == 'call_method_error':
@ -476,7 +428,7 @@ class ChromiumBase(BasePage):
def run_js(self, script, *args, as_expr=False): def run_js(self, script, *args, as_expr=False):
"""运行javascript代码 """运行javascript代码
:param script: js文本 :param script: js文本
:param args: 参数按顺序在js文本中对应argument[0]argument[1]... :param args: 参数按顺序在js文本中对应arguments[0]arguments[1]...
:param as_expr: 是否作为表达式运行为True时args无效 :param as_expr: 是否作为表达式运行为True时args无效
:return: 运行的结果 :return: 运行的结果
""" """
@ -485,7 +437,7 @@ class ChromiumBase(BasePage):
def run_js_loaded(self, script, *args, as_expr=False): def run_js_loaded(self, script, *args, as_expr=False):
"""运行javascript代码执行前等待页面加载完毕 """运行javascript代码执行前等待页面加载完毕
:param script: js文本 :param script: js文本
:param args: 参数按顺序在js文本中对应argument[0]argument[1]... :param args: 参数按顺序在js文本中对应arguments[0]arguments[1]...
:param as_expr: 是否作为表达式运行为True时args无效 :param as_expr: 是否作为表达式运行为True时args无效
:return: 运行的结果 :return: 运行的结果
""" """
@ -495,7 +447,7 @@ class ChromiumBase(BasePage):
def run_async_js(self, script, *args, as_expr=False): def run_async_js(self, script, *args, as_expr=False):
"""以异步方式执行js代码 """以异步方式执行js代码
:param script: js文本 :param script: js文本
:param args: 参数按顺序在js文本中对应argument[0]argument[1]... :param args: 参数按顺序在js文本中对应arguments[0]arguments[1]...
:param as_expr: 是否作为表达式运行为True时args无效 :param as_expr: 是否作为表达式运行为True时args无效
:return: None :return: None
""" """
@ -691,11 +643,12 @@ class ChromiumBase(BasePage):
"""页面停止加载""" """页面停止加载"""
if self._debug: if self._debug:
print('停止页面加载') print('停止页面加载')
if self._debug_recorder: try:
self._debug_recorder.add_data((perf_counter(), '操作', '停止页面加载'))
self.run_cdp('Page.stopLoading') self.run_cdp('Page.stopLoading')
while self.ready_state not in ('complete', None): except TabClosedError:
pass
end_time = perf_counter() + self.timeouts.page_load
while self._ready_state != 'complete' and perf_counter() < end_time:
sleep(.1) sleep(.1)
def remove_ele(self, loc_or_ele): def remove_ele(self, loc_or_ele):
@ -881,32 +834,39 @@ class ChromiumBase(BasePage):
""" """
err = None err = None
timeout = timeout if timeout is not None else self.timeouts.page_load timeout = timeout if timeout is not None else self.timeouts.page_load
for t in range(times + 1): for t in range(times + 1):
err = None err = None
result = self.run_cdp('Page.navigate', url=to_url) end_time = perf_counter() + timeout
result = self.run_cdp('Page.navigate', url=to_url, _timeout=timeout)
is_timeout = self._wait_loaded(timeout) if result.get('error') == 'timeout':
if is_timeout is None:
return None
is_timeout = not is_timeout
self.wait.load_complete()
if is_timeout:
err = TimeoutError('页面连接超时。') err = TimeoutError('页面连接超时。')
if 'errorText' in result:
elif 'errorText' in result:
err = ConnectionError(result['errorText']) err = ConnectionError(result['errorText'])
if err:
sleep(interval)
if self._debug or show_errmsg:
print(f'重试{t + 1} {to_url}')
self.stop_loading()
continue
if self.page_load_strategy == 'none':
return True
yu = end_time - perf_counter()
ok = self._wait_loaded(1 if yu <= 0 else yu)
if not ok:
err = TimeoutError('页面连接超时。')
sleep(interval)
if self._debug or show_errmsg:
print(f'重试{t + 1} {to_url}')
self.stop_loading()
continue
if not err: if not err:
break break
if t < times:
sleep(interval)
while self.ready_state not in ('complete', None):
sleep(.1)
if self._debug or show_errmsg:
print(f'重试{t + 1} {to_url}')
if err: if err:
if show_errmsg: if show_errmsg:
raise err if err is not None else ConnectionError('连接异常。') raise err if err is not None else ConnectionError('连接异常。')

View File

@ -4,9 +4,7 @@
@Contact : g1879@qq.com @Contact : g1879@qq.com
""" """
from pathlib import Path from pathlib import Path
from typing import Union, Tuple, List, Any from typing import Union, Tuple, List, Any, Optional
from DataRecorder import Recorder
from .._base.base import BasePage from .._base.base import BasePage
from .._base.browser import Browser from .._base.browser import Browser
@ -32,6 +30,7 @@ class ChromiumBase(BasePage):
self._page: ChromiumPage = ... self._page: ChromiumPage = ...
self.address: str = ... self.address: str = ...
self._driver: ChromiumDriver = ... self._driver: ChromiumDriver = ...
self._frame_id: str = ...
self._is_reading: bool = ... self._is_reading: bool = ...
self._timeouts: Timeout = ... self._timeouts: Timeout = ...
self._first_run: bool = ... self._first_run: bool = ...
@ -41,7 +40,6 @@ class ChromiumBase(BasePage):
self._url: str = ... self._url: str = ...
self._root_id: str = ... self._root_id: str = ...
self._debug: bool = ... self._debug: bool = ...
self._debug_recorder: Recorder = ...
self._upload_list: list = ... self._upload_list: list = ...
self._wait: ChromiumBaseWaiter = ... self._wait: ChromiumBaseWaiter = ...
self._set: ChromiumBaseSetter = ... self._set: ChromiumBaseSetter = ...
@ -50,6 +48,7 @@ class ChromiumBase(BasePage):
self._listener: NetworkListener = ... self._listener: NetworkListener = ...
self._alert: Alert = ... self._alert: Alert = ...
self._has_alert: bool = ... self._has_alert: bool = ...
self._ready_state: Optional[str] = ...
def _connect_browser(self, tab_id: str = None) -> None: ... def _connect_browser(self, tab_id: str = None) -> None: ...
@ -65,13 +64,13 @@ class ChromiumBase(BasePage):
def _onFrameStartedLoading(self, **kwargs): ... def _onFrameStartedLoading(self, **kwargs): ...
def _onFrameStoppedLoading(self, **kwargs): ... def _onFrameNavigated(self, **kwargs): ...
def _onDomContentEventFired(self, **kwargs): ...
def _onLoadEventFired(self, **kwargs): ... def _onLoadEventFired(self, **kwargs): ...
def _onDocumentUpdated(self, **kwargs): ... def _onFrameStoppedLoading(self, **kwargs): ...
def _onFrameNavigated(self, **kwargs): ...
def _onFileChooserOpened(self, **kwargs): ... def _onFileChooserOpened(self, **kwargs): ...

View File

@ -4,7 +4,7 @@
@Contact : g1879@qq.com @Contact : g1879@qq.com
""" """
from copy import copy from copy import copy
from re import search from re import search, findall
from threading import Thread from threading import Thread
from time import sleep, perf_counter from time import sleep, perf_counter
@ -14,7 +14,7 @@ from .._elements.chromium_element import ChromiumElement
from .._pages.chromium_base import ChromiumBase, ChromiumPageScroll from .._pages.chromium_base import ChromiumBase, ChromiumPageScroll
from .._units.setter import ChromiumFrameSetter from .._units.setter import ChromiumFrameSetter
from .._units.waiter import FrameWaiter from .._units.waiter import FrameWaiter
from ..errors import ContextLossError from ..errors import ContextLossError, ElementLossError, GetDocumentError
class ChromiumFrame(ChromiumBase): class ChromiumFrame(ChromiumBase):
@ -40,6 +40,7 @@ class ChromiumFrame(ChromiumBase):
self._backend_id = ele.ids.backend_id self._backend_id = ele.ids.backend_id
self._frame_ele = ele self._frame_ele = ele
self._states = None self._states = None
self._ids = ChromiumFrameIds(self)
if self._is_inner_frame(): if self._is_inner_frame():
self._is_diff_domain = False self._is_diff_domain = False
@ -50,9 +51,8 @@ class ChromiumFrame(ChromiumBase):
super().__init__(page.address, self.frame_id, page.timeout) super().__init__(page.address, self.frame_id, page.timeout)
obj_id = super().run_js('document;', as_expr=True)['objectId'] obj_id = super().run_js('document;', as_expr=True)['objectId']
self.doc_ele = ChromiumElement(self, obj_id=obj_id) self.doc_ele = ChromiumElement(self, obj_id=obj_id)
self._ids = ChromiumFrameIds(self)
end_time = perf_counter() + 2 end_time = perf_counter() + 5
while perf_counter() < end_time and self.url == 'about:blank': while perf_counter() < end_time and self.url == 'about:blank':
sleep(.1) sleep(.1)
@ -92,21 +92,28 @@ class ChromiumFrame(ChromiumBase):
except: except:
get(f'http://{self.address}/json', headers={'Connection': 'close'}) get(f'http://{self.address}/json', headers={'Connection': 'close'})
super()._driver_init(tab_id) super()._driver_init(tab_id)
self.driver.set_listener('Inspector.detached', self._onInspectorDetached)
def _reload(self): def _reload(self):
"""重新获取document""" """重新获取document"""
debug = self._debug debug = self._debug
d_debug = self.driver._debug
if debug: if debug:
print('重新获取document') print('重新获取document')
self._frame_ele = ChromiumElement(self._target_page, backend_id=self._backend_id) self._frame_ele = ChromiumElement(self._target_page, backend_id=self._backend_id)
node = self._target_page.run_cdp('DOM.describeNode', backendNodeId=self._frame_ele.ids.backend_id)['node'] node = self._target_page.run_cdp('DOM.describeNode', backendNodeId=self._frame_ele.ids.backend_id)['node']
end_time = perf_counter() + self.timeout
while perf_counter() < end_time:
try:
if self._is_inner_frame(): if self._is_inner_frame():
self._is_diff_domain = False self._is_diff_domain = False
self.doc_ele = ChromiumElement(self._target_page, backend_id=node['contentDocument']['backendNodeId']) self.doc_ele = ChromiumElement(self._target_page,
backend_id=node['contentDocument']['backendNodeId'])
super().__init__(self.address, self._target_page.tab_id, self._target_page.timeout) super().__init__(self.address, self._target_page.tab_id, self._target_page.timeout)
self._debug = debug self._debug = debug
self.driver._debug = d_debug
else: else:
self._is_diff_domain = True self._is_diff_domain = True
self._driver.stop() self._driver.stop()
@ -114,6 +121,17 @@ class ChromiumFrame(ChromiumBase):
obj_id = super().run_js('document;', as_expr=True)['objectId'] obj_id = super().run_js('document;', as_expr=True)['objectId']
self.doc_ele = ChromiumElement(self, obj_id=obj_id) self.doc_ele = ChromiumElement(self, obj_id=obj_id)
self._debug = debug self._debug = debug
self.driver._debug = d_debug
break
except:
pass
sleep(.1)
else:
raise GetDocumentError
self.wait.load_complete()
def _check_ok(self): def _check_ok(self):
"""用于应付同域异域之间跳转导致元素丢失问题""" """用于应付同域异域之间跳转导致元素丢失问题"""
@ -122,7 +140,7 @@ class ChromiumFrame(ChromiumBase):
try: try:
self._target_page.run_cdp('DOM.describeNode', nodeId=self.ids.node_id) self._target_page.run_cdp('DOM.describeNode', nodeId=self.ids.node_id)
except Exception: except ElementLossError:
self._reload() self._reload()
# sleep(2) # sleep(2)
@ -130,15 +148,7 @@ class ChromiumFrame(ChromiumBase):
"""刷新cdp使用的document数据""" """刷新cdp使用的document数据"""
if self._is_reading: if self._is_reading:
return return
self._is_reading = True self._is_reading = True
if self._debug:
print('---获取document')
end_time = perf_counter() + 3
while self.is_alive and perf_counter() < end_time:
try:
if self._is_diff_domain is False: if self._is_diff_domain is False:
node = self._target_page.run_cdp('DOM.describeNode', backendNodeId=self.ids.backend_id)['node'] node = self._target_page.run_cdp('DOM.describeNode', backendNodeId=self.ids.backend_id)['node']
self.doc_ele = ChromiumElement(self._target_page, self.doc_ele = ChromiumElement(self._target_page,
@ -148,54 +158,32 @@ class ChromiumFrame(ChromiumBase):
b_id = self.run_cdp('DOM.getDocument')['root']['backendNodeId'] b_id = self.run_cdp('DOM.getDocument')['root']['backendNodeId']
self.doc_ele = ChromiumElement(self, backend_id=b_id) self.doc_ele = ChromiumElement(self, backend_id=b_id)
break r = self.run_cdp('Page.getFrameTree')
for i in findall(r"'id': '(.*?)'", str(r)):
except Exception: self.browser._frames[i] = self.tab_id
sleep(.1)
# else:
# raise RuntimeError('获取document失败。')
if self._debug:
print('---获取document结束')
self._is_loading = False self._is_loading = False
self._is_reading = False self._is_reading = False
def _onFrameNavigated(self, **kwargs):
"""页面跳转时触发"""
if kwargs['frame']['id'] == self.frame_id and self._first_run is False and self._is_loading:
self._is_loading = True
if self._debug:
print('navigated')
if self._debug_recorder:
self._debug_recorder.add_data((perf_counter(), '加载流程', 'navigated'))
def _onLoadEventFired(self, **kwargs):
"""在页面刷新、变化后重新读取页面内容"""
# 用于覆盖父类方法,不能删
self._get_new_document()
if self._debug:
print('loadEventFired')
if self._debug_recorder:
self._debug_recorder.add_data((perf_counter(), '加载流程', 'loadEventFired'))
def _onFrameStartedLoading(self, **kwargs):
"""页面开始加载时触发"""
if kwargs['frameId'] == self.frame_id:
self._is_loading = True
if self._debug:
print('页面开始加载 FrameStartedLoading')
def _onFrameStoppedLoading(self, **kwargs): def _onFrameStoppedLoading(self, **kwargs):
"""页面加载完成后触发""" """页面加载完成后触发"""
if kwargs['frameId'] == self.frame_id and self._first_run is False and self._is_loading: self.browser._frames[kwargs['frameId']] = self.tab_id
if kwargs['frameId'] == self.frame_id:
self._ready_state = 'complete'
if self._debug: if self._debug:
print('页面停止加载 FrameStoppedLoading') print(f'FrameStoppedLoading {kwargs}')
self._get_new_document() self._get_new_document()
def _onInspectorDetached(self, **kwargs):
self._is_loading = True
# print('reload')
self._reload()
# def _onFrameDetached(self, **kwargs):
# if kwargs['frameId'] == self.frame_id:
# self._is_loading = True
# self._reload()
@property @property
def page(self): def page(self):
return self._page return self._page
@ -387,7 +375,7 @@ class ChromiumFrame(ChromiumBase):
def run_js(self, script, *args, as_expr=False): def run_js(self, script, *args, as_expr=False):
"""运行javascript代码 """运行javascript代码
:param script: js文本 :param script: js文本
:param args: 参数按顺序在js文本中对应argument[0]argument[1]... :param args: 参数按顺序在js文本中对应arguments[0]arguments[1]...
:param as_expr: 是否作为表达式运行为True时args无效 :param as_expr: 是否作为表达式运行为True时args无效
:return: 运行的结果 :return: 运行的结果
""" """
@ -614,34 +602,43 @@ class ChromiumFrame(ChromiumBase):
for t in range(times + 1): for t in range(times + 1):
err = None err = None
result = self.driver.call_method('Page.navigate', url=to_url, frameId=self.frame_id) end_time = perf_counter() + timeout
result = self.driver.call_method('Page.navigate', url=to_url, frameId=self.frame_id, _timeout=timeout)
is_timeout = not self._wait_loaded(timeout) if result.get('error') == 'timeout':
sleep(.5)
self.wait.load_complete()
if is_timeout:
err = TimeoutError('页面连接超时。') err = TimeoutError('页面连接超时。')
if 'errorText' in result:
elif 'errorText' in result:
err = ConnectionError(result['errorText']) err = ConnectionError(result['errorText'])
if err:
sleep(interval)
if self._debug or show_errmsg:
print(f'重试{t + 1} {to_url}')
self.stop_loading()
continue
if self.page_load_strategy == 'none':
return True
yu = end_time - perf_counter()
ok = self._wait_loaded(1 if yu <= 0 else yu)
if not ok:
err = TimeoutError('页面连接超时。')
sleep(interval)
if self._debug or show_errmsg:
print(f'重试{t + 1} {to_url}')
self.stop_loading()
continue
if not err: if not err:
break break
if t < times:
sleep(interval)
while self.ready_state not in ('complete', None):
sleep(.1)
if self._debug:
print('重试')
if show_errmsg:
print(f'重试 {to_url}')
if err: if err:
if show_errmsg: if show_errmsg:
raise err if err is not None else ConnectionError('连接异常。') raise err if err is not None else ConnectionError('连接异常。')
return False return False
self._check_ok()
return True return True
def _is_inner_frame(self): def _is_inner_frame(self):

View File

@ -49,9 +49,9 @@ class ChromiumFrame(ChromiumBase):
def _get_new_document(self) -> None: ... def _get_new_document(self) -> None: ...
def _onFrameAttached(self, **kwargs): ... def _onFrameStoppedLoading(self, **kwargs): ...
def _onFrameDetached(self, **kwargs): ... def _onInspectorDetached(self, **kwargs): ...
@property @property
def page(self) -> Union[ChromiumPage, WebPage]: ... def page(self) -> Union[ChromiumPage, WebPage]: ...

View File

@ -266,8 +266,8 @@ class DownloadMission(object):
""" """
if show: if show:
print(f'url{self.url}') print(f'url{self.url}')
t2 = perf_counter() end_time = perf_counter()
while self.name is None and perf_counter() - t2 < 4: while self.name is None and perf_counter() < end_time:
sleep(0.01) sleep(0.01)
print(f'文件名:{self.name}') print(f'文件名:{self.name}')
print(f'目标路径:{self.path}') print(f'目标路径:{self.path}')

View File

@ -82,8 +82,12 @@ class ChromiumBaseWaiter(object):
def upload_paths_inputted(self): def upload_paths_inputted(self):
"""等待自动填写上传文件路径""" """等待自动填写上传文件路径"""
while self._driver._upload_list: end_time = perf_counter() + self._driver.timeout
while perf_counter() < end_time:
if not self._driver._upload_list:
return True
sleep(.01) sleep(.01)
return False
def download_begin(self, timeout=None, cancel_it=False): def download_begin(self, timeout=None, cancel_it=False):
"""等待浏览器下载开始,可将其拦截 """等待浏览器下载开始,可将其拦截
@ -201,7 +205,7 @@ class ChromiumTabWaiter(ChromiumBaseWaiter):
else: else:
end_time = perf_counter() + timeout end_time = perf_counter() + timeout
while end_time > perf_counter(): while perf_counter() < end_time:
if not self._driver.browser._dl_mgr.get_tab_missions(self._driver.tab_id): if not self._driver.browser._dl_mgr.get_tab_missions(self._driver.tab_id):
return True return True
sleep(.5) sleep(.5)
@ -224,13 +228,14 @@ class ChromiumPageWaiter(ChromiumTabWaiter):
"""等待新标签页出现 """等待新标签页出现
:param timeout: 等待超时时间为None则使用页面对象timeout属性 :param timeout: 等待超时时间为None则使用页面对象timeout属性
:param raise_err: 等待失败时是否报错为None时根据Settings设置 :param raise_err: 等待失败时是否报错为None时根据Settings设置
:return: 是否等到新标签页出现 :return: 等到新标签页返回其id否则返回False
""" """
timeout = timeout if timeout is not None else self._driver.timeout timeout = timeout if timeout is not None else self._driver.timeout
end_time = perf_counter() + timeout end_time = perf_counter() + timeout
while perf_counter() < end_time: while perf_counter() < end_time:
if self._driver.tab_id != self._driver.latest_tab: latest_tab = self._driver.latest_tab
return True if self._driver.tab_id != latest_tab:
return latest_tab
sleep(.01) sleep(.01)
if raise_err is True or Settings.raise_when_wait_failed is True: if raise_err is True or Settings.raise_when_wait_failed is True:
@ -251,7 +256,7 @@ class ChromiumPageWaiter(ChromiumTabWaiter):
else: else:
end_time = perf_counter() + timeout end_time = perf_counter() + timeout
while end_time > perf_counter(): while perf_counter() < end_time:
if not self._driver.browser._dl_mgr._missions: if not self._driver.browser._dl_mgr._missions:
return True return True
sleep(.5) sleep(.5)

View File

@ -37,7 +37,7 @@ class ChromiumBaseWaiter(object):
def load_complete(self, timeout: float = None, raise_err: bool = None) -> bool: ... def load_complete(self, timeout: float = None, raise_err: bool = None) -> bool: ...
def upload_paths_inputted(self) -> None: ... def upload_paths_inputted(self) -> bool: ...
def download_begin(self, timeout: float = None, cancel_it: bool = False) -> Union[DownloadMission, bool]: ... def download_begin(self, timeout: float = None, cancel_it: bool = False) -> Union[DownloadMission, bool]: ...
@ -62,7 +62,7 @@ class ChromiumTabWaiter(ChromiumBaseWaiter):
class ChromiumPageWaiter(ChromiumTabWaiter): class ChromiumPageWaiter(ChromiumTabWaiter):
_driver: ChromiumPage = ... _driver: ChromiumPage = ...
def new_tab(self, timeout: float = None, raise_err: bool = None) -> bool: ... def new_tab(self, timeout: float = None, raise_err: bool = None) -> Union[str, bool]: ...
def all_downloads_done(self, timeout: float = None, cancel_if_timeout: bool = True) -> bool: ... def all_downloads_done(self, timeout: float = None, cancel_if_timeout: bool = True) -> bool: ...

View File

@ -6,7 +6,7 @@ with open("README.md", "r", encoding='utf-8') as fh:
setup( setup(
name="DrissionPage", name="DrissionPage",
version="4.0.0b0", version="4.0.0b1",
author="g1879", author="g1879",
author_email="g1879@qq.com", author_email="g1879@qq.com",
description="Python based web automation tool. It can control the browser and send and receive data packets.", description="Python based web automation tool. It can control the browser and send and receive data packets.",