mirror of
https://gitee.com/g1879/DrissionPage.git
synced 2024-12-10 04:00:23 +08:00
4.0.0b14尝试解决获取文档失败问题
This commit is contained in:
parent
acfd774d1f
commit
893a8e4957
@ -162,10 +162,8 @@ class ChromiumDriver(object):
|
|||||||
if result is None:
|
if result is None:
|
||||||
return {'error': 'tab closed', 'type': 'tab_closed'}
|
return {'error': 'tab closed', 'type': 'tab_closed'}
|
||||||
if 'result' not in result and 'error' in result:
|
if 'result' not in result and 'error' in result:
|
||||||
return {'error': result['error']['message'],
|
return {'error': result['error']['message'], 'type': result.get('type', 'call_method_error'),
|
||||||
'type': result.get('type', 'call_method_error'),
|
'method': _method, 'args': kwargs}
|
||||||
'method': _method,
|
|
||||||
'args': kwargs}
|
|
||||||
|
|
||||||
return result['result']
|
return result['result']
|
||||||
|
|
||||||
|
@ -26,7 +26,7 @@ class OptionsManager(object):
|
|||||||
self.ini_path = str(path)
|
self.ini_path = str(path)
|
||||||
|
|
||||||
if not Path(self.ini_path).exists():
|
if not Path(self.ini_path).exists():
|
||||||
raise FileNotFoundError('\nini文件不存在。\n如果是打包使用,请查看打包注意事项\nhttps://g1879.gitee.io/drission'
|
input('\nini文件不存在。\n如果是打包使用,请查看打包注意事项\nhttps://g1879.gitee.io/drission'
|
||||||
'pagedocs/advance/packaging/')
|
'pagedocs/advance/packaging/')
|
||||||
self._conf = RawConfigParser()
|
self._conf = RawConfigParser()
|
||||||
self._conf.read(self.ini_path, encoding='utf-8')
|
self._conf.read(self.ini_path, encoding='utf-8')
|
||||||
|
@ -25,8 +25,7 @@ from .._units.scroller import PageScroller
|
|||||||
from .._units.setter import ChromiumBaseSetter
|
from .._units.setter import ChromiumBaseSetter
|
||||||
from .._units.states import PageStates
|
from .._units.states import PageStates
|
||||||
from .._units.waiter import BaseWaiter
|
from .._units.waiter import BaseWaiter
|
||||||
from ..errors import (ContextLostError, ElementLostError, CDPError, PageClosedError, GetDocumentError,
|
from ..errors import ContextLostError, ElementLostError, CDPError, PageClosedError, ElementNotFoundError
|
||||||
ElementNotFoundError)
|
|
||||||
|
|
||||||
__ERROR__ = 'error'
|
__ERROR__ = 'error'
|
||||||
|
|
||||||
@ -57,6 +56,7 @@ class ChromiumBase(BasePage):
|
|||||||
self._upload_list = None
|
self._upload_list = None
|
||||||
self._doc_got = False # 用于在LoadEventFired和FrameStoppedLoading间标记是否已获取doc
|
self._doc_got = False # 用于在LoadEventFired和FrameStoppedLoading间标记是否已获取doc
|
||||||
self._download_path = None
|
self._download_path = None
|
||||||
|
self._load_end_time = 0
|
||||||
|
|
||||||
if isinstance(address, int) or (isinstance(address, str) and address.isdigit()):
|
if isinstance(address, int) or (isinstance(address, str) and address.isdigit()):
|
||||||
address = f'127.0.0.1:{address}'
|
address = f'127.0.0.1:{address}'
|
||||||
@ -142,31 +142,37 @@ class ChromiumBase(BasePage):
|
|||||||
self._driver.set_callback('Page.frameAttached', self._onFrameAttached)
|
self._driver.set_callback('Page.frameAttached', self._onFrameAttached)
|
||||||
self._driver.set_callback('Page.frameDetached', self._onFrameDetached)
|
self._driver.set_callback('Page.frameDetached', self._onFrameDetached)
|
||||||
|
|
||||||
def _get_document(self):
|
def _get_document(self, timeout=10):
|
||||||
|
"""获取页面文档
|
||||||
|
:param timeout: 超时时间
|
||||||
|
:return: 是否获取成功
|
||||||
|
"""
|
||||||
if self._debug:
|
if self._debug:
|
||||||
print('获取文档开始')
|
print('获取文档开始')
|
||||||
if self._is_reading:
|
if self._is_reading:
|
||||||
return
|
return
|
||||||
|
timeout = timeout if timeout >= .5 else .5
|
||||||
self._is_reading = True
|
self._is_reading = True
|
||||||
end_time = perf_counter() + 10
|
|
||||||
while perf_counter() < end_time:
|
|
||||||
try:
|
try:
|
||||||
b_id = self.run_cdp('DOM.getDocument')['root']['backendNodeId']
|
b_id = self.run_cdp('DOM.getDocument', _timeout=timeout)['root']['backendNodeId']
|
||||||
self._root_id = self.run_cdp('DOM.resolveNode', backendNodeId=b_id)['object']['objectId']
|
self._root_id = self.run_cdp('DOM.resolveNode', backendNodeId=b_id, _timeout=1)['object']['objectId']
|
||||||
break
|
|
||||||
except:
|
|
||||||
continue
|
|
||||||
else:
|
|
||||||
raise GetDocumentError
|
|
||||||
|
|
||||||
r = self.run_cdp('Page.getFrameTree')
|
r = self.run_cdp('Page.getFrameTree')
|
||||||
for i in findall(r"'id': '(.*?)'", str(r)):
|
for i in findall(r"'id': '(.*?)'", str(r)):
|
||||||
self.browser._frames[i] = self.tab_id
|
self.browser._frames[i] = self.tab_id
|
||||||
|
|
||||||
self._is_loading = False
|
|
||||||
self._is_reading = False
|
|
||||||
if self._debug:
|
if self._debug:
|
||||||
print('获取文档结束')
|
print('获取文档结束')
|
||||||
|
return True
|
||||||
|
|
||||||
|
except:
|
||||||
|
print('获取文档失败')
|
||||||
|
if self._debug:
|
||||||
|
print('获取文档失败')
|
||||||
|
return False
|
||||||
|
|
||||||
|
finally:
|
||||||
|
self._is_loading = False
|
||||||
|
self._is_reading = False
|
||||||
|
|
||||||
def _onFrameDetached(self, **kwargs):
|
def _onFrameDetached(self, **kwargs):
|
||||||
self.browser._frames.pop(kwargs['frameId'], None)
|
self.browser._frames.pop(kwargs['frameId'], None)
|
||||||
@ -185,6 +191,7 @@ class ChromiumBase(BasePage):
|
|||||||
self._doc_got = False
|
self._doc_got = False
|
||||||
self._ready_state = 'loading'
|
self._ready_state = 'loading'
|
||||||
self._is_loading = True
|
self._is_loading = True
|
||||||
|
self._load_end_time = perf_counter() + self.timeouts.page_load
|
||||||
if self._load_mode == 'eager':
|
if self._load_mode == 'eager':
|
||||||
t = Thread(target=self._wait_to_stop)
|
t = Thread(target=self._wait_to_stop)
|
||||||
t.daemon = True
|
t.daemon = True
|
||||||
@ -215,7 +222,7 @@ class ChromiumBase(BasePage):
|
|||||||
|
|
||||||
if self._load_mode == 'eager':
|
if self._load_mode == 'eager':
|
||||||
self.run_cdp('Page.stopLoading')
|
self.run_cdp('Page.stopLoading')
|
||||||
self._get_document()
|
self._get_document(self._load_end_time - perf_counter() - .1)
|
||||||
self._doc_got = True
|
self._doc_got = True
|
||||||
self._ready_state = 'interactive'
|
self._ready_state = 'interactive'
|
||||||
|
|
||||||
@ -229,7 +236,7 @@ class ChromiumBase(BasePage):
|
|||||||
print('在LoadEventFired变成complete')
|
print('在LoadEventFired变成complete')
|
||||||
|
|
||||||
if self._doc_got is False:
|
if self._doc_got is False:
|
||||||
self._get_document()
|
self._get_document(self._load_end_time - perf_counter() - .1)
|
||||||
self._doc_got = True
|
self._doc_got = True
|
||||||
self._ready_state = 'complete'
|
self._ready_state = 'complete'
|
||||||
|
|
||||||
@ -245,7 +252,7 @@ class ChromiumBase(BasePage):
|
|||||||
print('在FrameStoppedLoading变成complete')
|
print('在FrameStoppedLoading变成complete')
|
||||||
|
|
||||||
if self._doc_got is False:
|
if self._doc_got is False:
|
||||||
self._get_document()
|
self._get_document(self._load_end_time - perf_counter() - .1)
|
||||||
self._ready_state = 'complete'
|
self._ready_state = 'complete'
|
||||||
|
|
||||||
if self._debug:
|
if self._debug:
|
||||||
@ -680,7 +687,7 @@ class ChromiumBase(BasePage):
|
|||||||
print('停止页面加载')
|
print('停止页面加载')
|
||||||
try:
|
try:
|
||||||
self.run_cdp('Page.stopLoading')
|
self.run_cdp('Page.stopLoading')
|
||||||
except PageClosedError:
|
except (PageClosedError, CDPError):
|
||||||
pass
|
pass
|
||||||
end_time = perf_counter() + self.timeouts.page_load
|
end_time = perf_counter() + self.timeouts.page_load
|
||||||
while self._ready_state != 'complete' and perf_counter() < end_time:
|
while self._ready_state != 'complete' and perf_counter() < end_time:
|
||||||
@ -910,6 +917,7 @@ class ChromiumBase(BasePage):
|
|||||||
err = TimeoutError('页面连接超时。')
|
err = TimeoutError('页面连接超时。')
|
||||||
|
|
||||||
if err:
|
if err:
|
||||||
|
if t < times:
|
||||||
sleep(interval)
|
sleep(interval)
|
||||||
if self._debug or show_errmsg:
|
if self._debug or show_errmsg:
|
||||||
print(f'重试{t + 1} {to_url}')
|
print(f'重试{t + 1} {to_url}')
|
||||||
@ -923,6 +931,7 @@ class ChromiumBase(BasePage):
|
|||||||
ok = self._wait_loaded(1 if yu <= 0 else yu)
|
ok = self._wait_loaded(1 if yu <= 0 else yu)
|
||||||
if not ok:
|
if not ok:
|
||||||
err = TimeoutError('页面连接超时。')
|
err = TimeoutError('页面连接超时。')
|
||||||
|
if t < times:
|
||||||
sleep(interval)
|
sleep(interval)
|
||||||
if self._debug or show_errmsg:
|
if self._debug or show_errmsg:
|
||||||
print(f'重试{t + 1} {to_url}')
|
print(f'重试{t + 1} {to_url}')
|
||||||
|
@ -54,6 +54,7 @@ class ChromiumBase(BasePage):
|
|||||||
self._alert: Alert = ...
|
self._alert: Alert = ...
|
||||||
self._has_alert: bool = ...
|
self._has_alert: bool = ...
|
||||||
self._doc_got: bool = ...
|
self._doc_got: bool = ...
|
||||||
|
self._load_end_time: float = ...
|
||||||
self._ready_state: Optional[str] = ...
|
self._ready_state: Optional[str] = ...
|
||||||
self._rect: TabRect = ...
|
self._rect: TabRect = ...
|
||||||
|
|
||||||
@ -61,7 +62,7 @@ class ChromiumBase(BasePage):
|
|||||||
|
|
||||||
def _driver_init(self, tab_id: str) -> None: ...
|
def _driver_init(self, tab_id: str) -> None: ...
|
||||||
|
|
||||||
def _get_document(self) -> None: ...
|
def _get_document(self, timeout: float = 10) -> bool: ...
|
||||||
|
|
||||||
def _wait_loaded(self, timeout: float = None) -> bool: ...
|
def _wait_loaded(self, timeout: float = None) -> bool: ...
|
||||||
|
|
||||||
|
@ -108,8 +108,7 @@ class ChromiumFrame(ChromiumBase):
|
|||||||
self._frame_ele = ChromiumElement(self._target_page, backend_id=self._backend_id)
|
self._frame_ele = ChromiumElement(self._target_page, backend_id=self._backend_id)
|
||||||
end_time = perf_counter() + 2
|
end_time = perf_counter() + 2
|
||||||
while perf_counter() < end_time:
|
while perf_counter() < end_time:
|
||||||
node = self._target_page.run_cdp('DOM.describeNode',
|
node = self._target_page.run_cdp('DOM.describeNode', backendNodeId=self._frame_ele._backend_id)['node']
|
||||||
backendNodeId=self._frame_ele._backend_id)['node']
|
|
||||||
if 'frameId' in node:
|
if 'frameId' in node:
|
||||||
break
|
break
|
||||||
|
|
||||||
@ -121,8 +120,7 @@ class ChromiumFrame(ChromiumBase):
|
|||||||
|
|
||||||
if self._is_inner_frame():
|
if self._is_inner_frame():
|
||||||
self._is_diff_domain = False
|
self._is_diff_domain = False
|
||||||
self.doc_ele = ChromiumElement(self._target_page,
|
self.doc_ele = ChromiumElement(self._target_page, backend_id=node['contentDocument']['backendNodeId'])
|
||||||
backend_id=node['contentDocument']['backendNodeId'])
|
|
||||||
self._frame_id = node['frameId']
|
self._frame_id = node['frameId']
|
||||||
super().__init__(self.address, self._target_page.tab_id, self._target_page.timeout)
|
super().__init__(self.address, self._target_page.tab_id, self._target_page.timeout)
|
||||||
self._debug = debug
|
self._debug = debug
|
||||||
@ -152,48 +150,47 @@ class ChromiumFrame(ChromiumBase):
|
|||||||
if self._debug:
|
if self._debug:
|
||||||
print(f'{self._frame_id} reload 完毕')
|
print(f'{self._frame_id} reload 完毕')
|
||||||
|
|
||||||
def _get_document(self):
|
def _get_document(self, timeout=10):
|
||||||
"""刷新cdp使用的document数据"""
|
"""刷新cdp使用的document数据
|
||||||
|
:param timeout: 超时时间
|
||||||
|
:return: 是否获取成功
|
||||||
|
"""
|
||||||
if self._is_reading:
|
if self._is_reading:
|
||||||
return
|
return
|
||||||
|
|
||||||
if self._debug:
|
if self._debug:
|
||||||
print('>>> get new doc')
|
print('获取文档开始')
|
||||||
|
|
||||||
self._is_reading = True
|
self._is_reading = True
|
||||||
end_time = perf_counter() + 10
|
|
||||||
while perf_counter() < end_time:
|
|
||||||
try:
|
try:
|
||||||
if self._is_diff_domain is False:
|
if self._is_diff_domain is False:
|
||||||
node = self._target_page.run_cdp('DOM.describeNode', backendNodeId=self._backend_id)['node']
|
node = self._target_page.run_cdp('DOM.describeNode', backendNodeId=self._backend_id)['node']
|
||||||
self.doc_ele = ChromiumElement(self._target_page,
|
self.doc_ele = ChromiumElement(self._target_page, backend_id=node['contentDocument']['backendNodeId'])
|
||||||
backend_id=node['contentDocument']['backendNodeId'])
|
|
||||||
|
|
||||||
else:
|
else:
|
||||||
b_id = self.run_cdp('DOM.getDocument')['root']['backendNodeId']
|
timeout = timeout if timeout >= .5 else .5
|
||||||
|
b_id = self.run_cdp('DOM.getDocument', _timeout=timeout)['root']['backendNodeId']
|
||||||
self.doc_ele = ChromiumElement(self, backend_id=b_id)
|
self.doc_ele = ChromiumElement(self, backend_id=b_id)
|
||||||
|
|
||||||
self._root_id = self.doc_ele._obj_id
|
self._root_id = self.doc_ele._obj_id
|
||||||
|
|
||||||
break
|
|
||||||
|
|
||||||
except:
|
|
||||||
continue
|
|
||||||
|
|
||||||
else:
|
|
||||||
raise GetDocumentError
|
|
||||||
|
|
||||||
r = self.run_cdp('Page.getFrameTree')
|
r = self.run_cdp('Page.getFrameTree')
|
||||||
for i in findall(r"'id': '(.*?)'", str(r)):
|
for i in findall(r"'id': '(.*?)'", str(r)):
|
||||||
self.browser._frames[i] = self.tab_id
|
self.browser._frames[i] = self.tab_id
|
||||||
|
if self._debug:
|
||||||
|
print('获取文档结束')
|
||||||
|
return True
|
||||||
|
|
||||||
|
except:
|
||||||
|
if self._debug:
|
||||||
|
print('获取文档失败')
|
||||||
|
return False
|
||||||
|
|
||||||
|
finally:
|
||||||
if not self._reloading: # 阻止reload时标识
|
if not self._reloading: # 阻止reload时标识
|
||||||
self._is_loading = False
|
self._is_loading = False
|
||||||
self._is_reading = False
|
self._is_reading = False
|
||||||
|
|
||||||
if self._debug:
|
|
||||||
print('>>> new doc got')
|
|
||||||
|
|
||||||
def _onInspectorDetached(self, **kwargs):
|
def _onInspectorDetached(self, **kwargs):
|
||||||
"""异域转同域或退出"""
|
"""异域转同域或退出"""
|
||||||
if self._debug:
|
if self._debug:
|
||||||
|
@ -47,7 +47,7 @@ class ChromiumFrame(ChromiumBase):
|
|||||||
|
|
||||||
def _reload(self) -> None: ...
|
def _reload(self) -> None: ...
|
||||||
|
|
||||||
def _get_document(self) -> None: ...
|
def _get_document(self, timeout: float = 10) -> bool: ...
|
||||||
|
|
||||||
def _onFrameStoppedLoading(self, **kwargs): ...
|
def _onFrameStoppedLoading(self, **kwargs): ...
|
||||||
|
|
||||||
|
@ -199,7 +199,8 @@ class NetworkListener(object):
|
|||||||
p = self._request_ids.setdefault(rid, DataPacket(self._page.tab_id, None))
|
p = self._request_ids.setdefault(rid, DataPacket(self._page.tab_id, None))
|
||||||
p._raw_request = kwargs
|
p._raw_request = kwargs
|
||||||
if kwargs['request'].get('hasPostData', None) and not kwargs['request'].get('postData', None):
|
if kwargs['request'].get('hasPostData', None) and not kwargs['request'].get('postData', None):
|
||||||
p._raw_post_data = self._driver.run('Network.getRequestPostData', requestId=rid)['postData']
|
p._raw_post_data = self._driver.run('Network.getRequestPostData',
|
||||||
|
requestId=rid).get('postData', None)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
rid = kwargs['requestId']
|
rid = kwargs['requestId']
|
||||||
|
Loading…
x
Reference in New Issue
Block a user