4.0.0b14尝试解决获取文档失败问题

This commit is contained in:
g1879 2023-11-27 18:06:14 +08:00
parent acfd774d1f
commit 893a8e4957
7 changed files with 79 additions and 73 deletions

View File

@ -162,10 +162,8 @@ class ChromiumDriver(object):
if result is None: if result is None:
return {'error': 'tab closed', 'type': 'tab_closed'} return {'error': 'tab closed', 'type': 'tab_closed'}
if 'result' not in result and 'error' in result: if 'result' not in result and 'error' in result:
return {'error': result['error']['message'], return {'error': result['error']['message'], 'type': result.get('type', 'call_method_error'),
'type': result.get('type', 'call_method_error'), 'method': _method, 'args': kwargs}
'method': _method,
'args': kwargs}
return result['result'] return result['result']

View File

@ -26,7 +26,7 @@ class OptionsManager(object):
self.ini_path = str(path) self.ini_path = str(path)
if not Path(self.ini_path).exists(): if not Path(self.ini_path).exists():
raise FileNotFoundError('\nini文件不存在。\n如果是打包使用,请查看打包注意事项\nhttps://g1879.gitee.io/drission' input('\nini文件不存在。\n如果是打包使用,请查看打包注意事项\nhttps://g1879.gitee.io/drission'
'pagedocs/advance/packaging/') 'pagedocs/advance/packaging/')
self._conf = RawConfigParser() self._conf = RawConfigParser()
self._conf.read(self.ini_path, encoding='utf-8') self._conf.read(self.ini_path, encoding='utf-8')

View File

@ -25,8 +25,7 @@ from .._units.scroller import PageScroller
from .._units.setter import ChromiumBaseSetter from .._units.setter import ChromiumBaseSetter
from .._units.states import PageStates from .._units.states import PageStates
from .._units.waiter import BaseWaiter from .._units.waiter import BaseWaiter
from ..errors import (ContextLostError, ElementLostError, CDPError, PageClosedError, GetDocumentError, from ..errors import ContextLostError, ElementLostError, CDPError, PageClosedError, ElementNotFoundError
ElementNotFoundError)
__ERROR__ = 'error' __ERROR__ = 'error'
@ -57,6 +56,7 @@ class ChromiumBase(BasePage):
self._upload_list = None self._upload_list = None
self._doc_got = False # 用于在LoadEventFired和FrameStoppedLoading间标记是否已获取doc self._doc_got = False # 用于在LoadEventFired和FrameStoppedLoading间标记是否已获取doc
self._download_path = None self._download_path = None
self._load_end_time = 0
if isinstance(address, int) or (isinstance(address, str) and address.isdigit()): if isinstance(address, int) or (isinstance(address, str) and address.isdigit()):
address = f'127.0.0.1:{address}' address = f'127.0.0.1:{address}'
@ -142,31 +142,37 @@ class ChromiumBase(BasePage):
self._driver.set_callback('Page.frameAttached', self._onFrameAttached) self._driver.set_callback('Page.frameAttached', self._onFrameAttached)
self._driver.set_callback('Page.frameDetached', self._onFrameDetached) self._driver.set_callback('Page.frameDetached', self._onFrameDetached)
def _get_document(self): def _get_document(self, timeout=10):
"""获取页面文档
:param timeout: 超时时间
:return: 是否获取成功
"""
if self._debug: if self._debug:
print('获取文档开始') print('获取文档开始')
if self._is_reading: if self._is_reading:
return return
timeout = timeout if timeout >= .5 else .5
self._is_reading = True self._is_reading = True
end_time = perf_counter() + 10
while perf_counter() < end_time:
try: try:
b_id = self.run_cdp('DOM.getDocument')['root']['backendNodeId'] b_id = self.run_cdp('DOM.getDocument', _timeout=timeout)['root']['backendNodeId']
self._root_id = self.run_cdp('DOM.resolveNode', backendNodeId=b_id)['object']['objectId'] self._root_id = self.run_cdp('DOM.resolveNode', backendNodeId=b_id, _timeout=1)['object']['objectId']
break
except:
continue
else:
raise GetDocumentError
r = self.run_cdp('Page.getFrameTree') r = self.run_cdp('Page.getFrameTree')
for i in findall(r"'id': '(.*?)'", str(r)): for i in findall(r"'id': '(.*?)'", str(r)):
self.browser._frames[i] = self.tab_id self.browser._frames[i] = self.tab_id
self._is_loading = False
self._is_reading = False
if self._debug: if self._debug:
print('获取文档结束') print('获取文档结束')
return True
except:
print('获取文档失败')
if self._debug:
print('获取文档失败')
return False
finally:
self._is_loading = False
self._is_reading = False
def _onFrameDetached(self, **kwargs): def _onFrameDetached(self, **kwargs):
self.browser._frames.pop(kwargs['frameId'], None) self.browser._frames.pop(kwargs['frameId'], None)
@ -185,6 +191,7 @@ class ChromiumBase(BasePage):
self._doc_got = False self._doc_got = False
self._ready_state = 'loading' self._ready_state = 'loading'
self._is_loading = True self._is_loading = True
self._load_end_time = perf_counter() + self.timeouts.page_load
if self._load_mode == 'eager': if self._load_mode == 'eager':
t = Thread(target=self._wait_to_stop) t = Thread(target=self._wait_to_stop)
t.daemon = True t.daemon = True
@ -215,7 +222,7 @@ class ChromiumBase(BasePage):
if self._load_mode == 'eager': if self._load_mode == 'eager':
self.run_cdp('Page.stopLoading') self.run_cdp('Page.stopLoading')
self._get_document() self._get_document(self._load_end_time - perf_counter() - .1)
self._doc_got = True self._doc_got = True
self._ready_state = 'interactive' self._ready_state = 'interactive'
@ -229,7 +236,7 @@ class ChromiumBase(BasePage):
print('在LoadEventFired变成complete') print('在LoadEventFired变成complete')
if self._doc_got is False: if self._doc_got is False:
self._get_document() self._get_document(self._load_end_time - perf_counter() - .1)
self._doc_got = True self._doc_got = True
self._ready_state = 'complete' self._ready_state = 'complete'
@ -245,7 +252,7 @@ class ChromiumBase(BasePage):
print('在FrameStoppedLoading变成complete') print('在FrameStoppedLoading变成complete')
if self._doc_got is False: if self._doc_got is False:
self._get_document() self._get_document(self._load_end_time - perf_counter() - .1)
self._ready_state = 'complete' self._ready_state = 'complete'
if self._debug: if self._debug:
@ -680,7 +687,7 @@ class ChromiumBase(BasePage):
print('停止页面加载') print('停止页面加载')
try: try:
self.run_cdp('Page.stopLoading') self.run_cdp('Page.stopLoading')
except PageClosedError: except (PageClosedError, CDPError):
pass pass
end_time = perf_counter() + self.timeouts.page_load end_time = perf_counter() + self.timeouts.page_load
while self._ready_state != 'complete' and perf_counter() < end_time: while self._ready_state != 'complete' and perf_counter() < end_time:
@ -910,6 +917,7 @@ class ChromiumBase(BasePage):
err = TimeoutError('页面连接超时。') err = TimeoutError('页面连接超时。')
if err: if err:
if t < times:
sleep(interval) sleep(interval)
if self._debug or show_errmsg: if self._debug or show_errmsg:
print(f'重试{t + 1} {to_url}') print(f'重试{t + 1} {to_url}')
@ -923,6 +931,7 @@ class ChromiumBase(BasePage):
ok = self._wait_loaded(1 if yu <= 0 else yu) ok = self._wait_loaded(1 if yu <= 0 else yu)
if not ok: if not ok:
err = TimeoutError('页面连接超时。') err = TimeoutError('页面连接超时。')
if t < times:
sleep(interval) sleep(interval)
if self._debug or show_errmsg: if self._debug or show_errmsg:
print(f'重试{t + 1} {to_url}') print(f'重试{t + 1} {to_url}')

View File

@ -54,6 +54,7 @@ class ChromiumBase(BasePage):
self._alert: Alert = ... self._alert: Alert = ...
self._has_alert: bool = ... self._has_alert: bool = ...
self._doc_got: bool = ... self._doc_got: bool = ...
self._load_end_time: float = ...
self._ready_state: Optional[str] = ... self._ready_state: Optional[str] = ...
self._rect: TabRect = ... self._rect: TabRect = ...
@ -61,7 +62,7 @@ class ChromiumBase(BasePage):
def _driver_init(self, tab_id: str) -> None: ... def _driver_init(self, tab_id: str) -> None: ...
def _get_document(self) -> None: ... def _get_document(self, timeout: float = 10) -> bool: ...
def _wait_loaded(self, timeout: float = None) -> bool: ... def _wait_loaded(self, timeout: float = None) -> bool: ...

View File

@ -108,8 +108,7 @@ class ChromiumFrame(ChromiumBase):
self._frame_ele = ChromiumElement(self._target_page, backend_id=self._backend_id) self._frame_ele = ChromiumElement(self._target_page, backend_id=self._backend_id)
end_time = perf_counter() + 2 end_time = perf_counter() + 2
while perf_counter() < end_time: while perf_counter() < end_time:
node = self._target_page.run_cdp('DOM.describeNode', node = self._target_page.run_cdp('DOM.describeNode', backendNodeId=self._frame_ele._backend_id)['node']
backendNodeId=self._frame_ele._backend_id)['node']
if 'frameId' in node: if 'frameId' in node:
break break
@ -121,8 +120,7 @@ class ChromiumFrame(ChromiumBase):
if self._is_inner_frame(): if self._is_inner_frame():
self._is_diff_domain = False self._is_diff_domain = False
self.doc_ele = ChromiumElement(self._target_page, self.doc_ele = ChromiumElement(self._target_page, backend_id=node['contentDocument']['backendNodeId'])
backend_id=node['contentDocument']['backendNodeId'])
self._frame_id = node['frameId'] self._frame_id = node['frameId']
super().__init__(self.address, self._target_page.tab_id, self._target_page.timeout) super().__init__(self.address, self._target_page.tab_id, self._target_page.timeout)
self._debug = debug self._debug = debug
@ -152,48 +150,47 @@ class ChromiumFrame(ChromiumBase):
if self._debug: if self._debug:
print(f'{self._frame_id} reload 完毕') print(f'{self._frame_id} reload 完毕')
def _get_document(self): def _get_document(self, timeout=10):
"""刷新cdp使用的document数据""" """刷新cdp使用的document数据
:param timeout: 超时时间
:return: 是否获取成功
"""
if self._is_reading: if self._is_reading:
return return
if self._debug: if self._debug:
print('>>> get new doc') print('获取文档开始')
self._is_reading = True self._is_reading = True
end_time = perf_counter() + 10
while perf_counter() < end_time:
try: try:
if self._is_diff_domain is False: if self._is_diff_domain is False:
node = self._target_page.run_cdp('DOM.describeNode', backendNodeId=self._backend_id)['node'] node = self._target_page.run_cdp('DOM.describeNode', backendNodeId=self._backend_id)['node']
self.doc_ele = ChromiumElement(self._target_page, self.doc_ele = ChromiumElement(self._target_page, backend_id=node['contentDocument']['backendNodeId'])
backend_id=node['contentDocument']['backendNodeId'])
else: else:
b_id = self.run_cdp('DOM.getDocument')['root']['backendNodeId'] timeout = timeout if timeout >= .5 else .5
b_id = self.run_cdp('DOM.getDocument', _timeout=timeout)['root']['backendNodeId']
self.doc_ele = ChromiumElement(self, backend_id=b_id) self.doc_ele = ChromiumElement(self, backend_id=b_id)
self._root_id = self.doc_ele._obj_id self._root_id = self.doc_ele._obj_id
break
except:
continue
else:
raise GetDocumentError
r = self.run_cdp('Page.getFrameTree') r = self.run_cdp('Page.getFrameTree')
for i in findall(r"'id': '(.*?)'", str(r)): for i in findall(r"'id': '(.*?)'", str(r)):
self.browser._frames[i] = self.tab_id self.browser._frames[i] = self.tab_id
if self._debug:
print('获取文档结束')
return True
except:
if self._debug:
print('获取文档失败')
return False
finally:
if not self._reloading: # 阻止reload时标识 if not self._reloading: # 阻止reload时标识
self._is_loading = False self._is_loading = False
self._is_reading = False self._is_reading = False
if self._debug:
print('>>> new doc got')
def _onInspectorDetached(self, **kwargs): def _onInspectorDetached(self, **kwargs):
"""异域转同域或退出""" """异域转同域或退出"""
if self._debug: if self._debug:

View File

@ -47,7 +47,7 @@ class ChromiumFrame(ChromiumBase):
def _reload(self) -> None: ... def _reload(self) -> None: ...
def _get_document(self) -> None: ... def _get_document(self, timeout: float = 10) -> bool: ...
def _onFrameStoppedLoading(self, **kwargs): ... def _onFrameStoppedLoading(self, **kwargs): ...

View File

@ -199,7 +199,8 @@ class NetworkListener(object):
p = self._request_ids.setdefault(rid, DataPacket(self._page.tab_id, None)) p = self._request_ids.setdefault(rid, DataPacket(self._page.tab_id, None))
p._raw_request = kwargs p._raw_request = kwargs
if kwargs['request'].get('hasPostData', None) and not kwargs['request'].get('postData', None): if kwargs['request'].get('hasPostData', None) and not kwargs['request'].get('postData', None):
p._raw_post_data = self._driver.run('Network.getRequestPostData', requestId=rid)['postData'] p._raw_post_data = self._driver.run('Network.getRequestPostData',
requestId=rid).get('postData', None)
else: else:
rid = kwargs['requestId'] rid = kwargs['requestId']