4.0.0b14尝试解决获取文档失败问题

This commit is contained in:
g1879 2023-11-27 18:06:14 +08:00
parent acfd774d1f
commit 893a8e4957
7 changed files with 79 additions and 73 deletions

View File

@ -162,10 +162,8 @@ class ChromiumDriver(object):
if result is None:
return {'error': 'tab closed', 'type': 'tab_closed'}
if 'result' not in result and 'error' in result:
return {'error': result['error']['message'],
'type': result.get('type', 'call_method_error'),
'method': _method,
'args': kwargs}
return {'error': result['error']['message'], 'type': result.get('type', 'call_method_error'),
'method': _method, 'args': kwargs}
return result['result']

View File

@ -26,8 +26,8 @@ class OptionsManager(object):
self.ini_path = str(path)
if not Path(self.ini_path).exists():
raise FileNotFoundError('\nini文件不存在。\n如果是打包使用,请查看打包注意事项\nhttps://g1879.gitee.io/drission'
'pagedocs/advance/packaging/')
input('\nini文件不存在。\n如果是打包使用,请查看打包注意事项\nhttps://g1879.gitee.io/drission'
'pagedocs/advance/packaging/')
self._conf = RawConfigParser()
self._conf.read(self.ini_path, encoding='utf-8')

View File

@ -25,8 +25,7 @@ from .._units.scroller import PageScroller
from .._units.setter import ChromiumBaseSetter
from .._units.states import PageStates
from .._units.waiter import BaseWaiter
from ..errors import (ContextLostError, ElementLostError, CDPError, PageClosedError, GetDocumentError,
ElementNotFoundError)
from ..errors import ContextLostError, ElementLostError, CDPError, PageClosedError, ElementNotFoundError
__ERROR__ = 'error'
@ -57,6 +56,7 @@ class ChromiumBase(BasePage):
self._upload_list = None
self._doc_got = False # 用于在LoadEventFired和FrameStoppedLoading间标记是否已获取doc
self._download_path = None
self._load_end_time = 0
if isinstance(address, int) or (isinstance(address, str) and address.isdigit()):
address = f'127.0.0.1:{address}'
@ -142,31 +142,37 @@ class ChromiumBase(BasePage):
self._driver.set_callback('Page.frameAttached', self._onFrameAttached)
self._driver.set_callback('Page.frameDetached', self._onFrameDetached)
def _get_document(self):
def _get_document(self, timeout=10):
"""获取页面文档
:param timeout: 超时时间
:return: 是否获取成功
"""
if self._debug:
print('获取文档开始')
if self._is_reading:
return
timeout = timeout if timeout >= .5 else .5
self._is_reading = True
end_time = perf_counter() + 10
while perf_counter() < end_time:
try:
b_id = self.run_cdp('DOM.getDocument')['root']['backendNodeId']
self._root_id = self.run_cdp('DOM.resolveNode', backendNodeId=b_id)['object']['objectId']
break
except:
continue
else:
raise GetDocumentError
try:
b_id = self.run_cdp('DOM.getDocument', _timeout=timeout)['root']['backendNodeId']
self._root_id = self.run_cdp('DOM.resolveNode', backendNodeId=b_id, _timeout=1)['object']['objectId']
r = self.run_cdp('Page.getFrameTree')
for i in findall(r"'id': '(.*?)'", str(r)):
self.browser._frames[i] = self.tab_id
r = self.run_cdp('Page.getFrameTree')
for i in findall(r"'id': '(.*?)'", str(r)):
self.browser._frames[i] = self.tab_id
if self._debug:
print('获取文档结束')
return True
self._is_loading = False
self._is_reading = False
if self._debug:
print('获取文档结束')
except:
print('获取文档失败')
if self._debug:
print('获取文档失败')
return False
finally:
self._is_loading = False
self._is_reading = False
def _onFrameDetached(self, **kwargs):
self.browser._frames.pop(kwargs['frameId'], None)
@ -185,6 +191,7 @@ class ChromiumBase(BasePage):
self._doc_got = False
self._ready_state = 'loading'
self._is_loading = True
self._load_end_time = perf_counter() + self.timeouts.page_load
if self._load_mode == 'eager':
t = Thread(target=self._wait_to_stop)
t.daemon = True
@ -215,7 +222,7 @@ class ChromiumBase(BasePage):
if self._load_mode == 'eager':
self.run_cdp('Page.stopLoading')
self._get_document()
self._get_document(self._load_end_time - perf_counter() - .1)
self._doc_got = True
self._ready_state = 'interactive'
@ -229,7 +236,7 @@ class ChromiumBase(BasePage):
print('在LoadEventFired变成complete')
if self._doc_got is False:
self._get_document()
self._get_document(self._load_end_time - perf_counter() - .1)
self._doc_got = True
self._ready_state = 'complete'
@ -245,7 +252,7 @@ class ChromiumBase(BasePage):
print('在FrameStoppedLoading变成complete')
if self._doc_got is False:
self._get_document()
self._get_document(self._load_end_time - perf_counter() - .1)
self._ready_state = 'complete'
if self._debug:
@ -680,7 +687,7 @@ class ChromiumBase(BasePage):
print('停止页面加载')
try:
self.run_cdp('Page.stopLoading')
except PageClosedError:
except (PageClosedError, CDPError):
pass
end_time = perf_counter() + self.timeouts.page_load
while self._ready_state != 'complete' and perf_counter() < end_time:
@ -910,9 +917,10 @@ class ChromiumBase(BasePage):
err = TimeoutError('页面连接超时。')
if err:
sleep(interval)
if self._debug or show_errmsg:
print(f'重试{t + 1} {to_url}')
if t < times:
sleep(interval)
if self._debug or show_errmsg:
print(f'重试{t + 1} {to_url}')
self.stop_loading()
continue
@ -923,9 +931,10 @@ class ChromiumBase(BasePage):
ok = self._wait_loaded(1 if yu <= 0 else yu)
if not ok:
err = TimeoutError('页面连接超时。')
sleep(interval)
if self._debug or show_errmsg:
print(f'重试{t + 1} {to_url}')
if t < times:
sleep(interval)
if self._debug or show_errmsg:
print(f'重试{t + 1} {to_url}')
continue
if not err:

View File

@ -54,6 +54,7 @@ class ChromiumBase(BasePage):
self._alert: Alert = ...
self._has_alert: bool = ...
self._doc_got: bool = ...
self._load_end_time: float = ...
self._ready_state: Optional[str] = ...
self._rect: TabRect = ...
@ -61,7 +62,7 @@ class ChromiumBase(BasePage):
def _driver_init(self, tab_id: str) -> None: ...
def _get_document(self) -> None: ...
def _get_document(self, timeout: float = 10) -> bool: ...
def _wait_loaded(self, timeout: float = None) -> bool: ...

View File

@ -108,8 +108,7 @@ class ChromiumFrame(ChromiumBase):
self._frame_ele = ChromiumElement(self._target_page, backend_id=self._backend_id)
end_time = perf_counter() + 2
while perf_counter() < end_time:
node = self._target_page.run_cdp('DOM.describeNode',
backendNodeId=self._frame_ele._backend_id)['node']
node = self._target_page.run_cdp('DOM.describeNode', backendNodeId=self._frame_ele._backend_id)['node']
if 'frameId' in node:
break
@ -121,8 +120,7 @@ class ChromiumFrame(ChromiumBase):
if self._is_inner_frame():
self._is_diff_domain = False
self.doc_ele = ChromiumElement(self._target_page,
backend_id=node['contentDocument']['backendNodeId'])
self.doc_ele = ChromiumElement(self._target_page, backend_id=node['contentDocument']['backendNodeId'])
self._frame_id = node['frameId']
super().__init__(self.address, self._target_page.tab_id, self._target_page.timeout)
self._debug = debug
@ -152,47 +150,46 @@ class ChromiumFrame(ChromiumBase):
if self._debug:
print(f'{self._frame_id} reload 完毕')
def _get_document(self):
"""刷新cdp使用的document数据"""
def _get_document(self, timeout=10):
"""刷新cdp使用的document数据
:param timeout: 超时时间
:return: 是否获取成功
"""
if self._is_reading:
return
if self._debug:
print('>>> get new doc')
print('获取文档开始')
self._is_reading = True
end_time = perf_counter() + 10
while perf_counter() < end_time:
try:
if self._is_diff_domain is False:
node = self._target_page.run_cdp('DOM.describeNode', backendNodeId=self._backend_id)['node']
self.doc_ele = ChromiumElement(self._target_page,
backend_id=node['contentDocument']['backendNodeId'])
try:
if self._is_diff_domain is False:
node = self._target_page.run_cdp('DOM.describeNode', backendNodeId=self._backend_id)['node']
self.doc_ele = ChromiumElement(self._target_page, backend_id=node['contentDocument']['backendNodeId'])
else:
b_id = self.run_cdp('DOM.getDocument')['root']['backendNodeId']
self.doc_ele = ChromiumElement(self, backend_id=b_id)
else:
timeout = timeout if timeout >= .5 else .5
b_id = self.run_cdp('DOM.getDocument', _timeout=timeout)['root']['backendNodeId']
self.doc_ele = ChromiumElement(self, backend_id=b_id)
self._root_id = self.doc_ele._obj_id
self._root_id = self.doc_ele._obj_id
break
r = self.run_cdp('Page.getFrameTree')
for i in findall(r"'id': '(.*?)'", str(r)):
self.browser._frames[i] = self.tab_id
if self._debug:
print('获取文档结束')
return True
except:
continue
except:
if self._debug:
print('获取文档失败')
return False
else:
raise GetDocumentError
r = self.run_cdp('Page.getFrameTree')
for i in findall(r"'id': '(.*?)'", str(r)):
self.browser._frames[i] = self.tab_id
if not self._reloading: # 阻止reload时标识
self._is_loading = False
self._is_reading = False
if self._debug:
print('>>> new doc got')
finally:
if not self._reloading: # 阻止reload时标识
self._is_loading = False
self._is_reading = False
def _onInspectorDetached(self, **kwargs):
"""异域转同域或退出"""

View File

@ -47,7 +47,7 @@ class ChromiumFrame(ChromiumBase):
def _reload(self) -> None: ...
def _get_document(self) -> None: ...
def _get_document(self, timeout: float = 10) -> bool: ...
def _onFrameStoppedLoading(self, **kwargs): ...

View File

@ -199,7 +199,8 @@ class NetworkListener(object):
p = self._request_ids.setdefault(rid, DataPacket(self._page.tab_id, None))
p._raw_request = kwargs
if kwargs['request'].get('hasPostData', None) and not kwargs['request'].get('postData', None):
p._raw_post_data = self._driver.run('Network.getRequestPostData', requestId=rid)['postData']
p._raw_post_data = self._driver.run('Network.getRequestPostData',
requestId=rid).get('postData', None)
else:
rid = kwargs['requestId']