diff --git a/DrissionPage/chromium_base.py b/DrissionPage/chromium_base.py index 6b584e0..f90be65 100644 --- a/DrissionPage/chromium_base.py +++ b/DrissionPage/chromium_base.py @@ -174,7 +174,10 @@ class ChromiumBase(BasePage): self._debug_recorder.add_data((perf_counter(), '加载流程', 'navigated')) def _set_options(self): - pass + self.set_timeouts(page_load=10, + script=10, + implicit=10) + self._page_load_strategy = 'normal' def __call__(self, loc_or_str, timeout=None): """在内部查找元素 \n @@ -583,7 +586,7 @@ class ChromiumBase(BasePage): err = None timeout = timeout if timeout is not None else self.timeouts.page_load - for _ in range(times + 1): + for t in range(times + 1): err = None result = self._driver.Page.navigate(url=to_url) @@ -599,7 +602,7 @@ class ChromiumBase(BasePage): if not err: break - if _ < times: + if t < times: sleep(interval) while self.ready_state != 'complete': sleep(.1) diff --git a/DrissionPage/chromium_base.pyi b/DrissionPage/chromium_base.pyi index 95b690b..6079d05 100644 --- a/DrissionPage/chromium_base.pyi +++ b/DrissionPage/chromium_base.pyi @@ -121,14 +121,6 @@ class ChromiumBase(BasePage): interval: float = ..., timeout: float = ...) -> Union[None, bool]: ... - def _get(self, - url: str, - show_errmsg: bool = ..., - retry: int = ..., - interval: float = ..., - timeout: float = ..., - frame_id: str = ...) -> Union[None, bool]: ... - def get_cookies(self, as_dict: bool = ...) -> Union[list, dict]: ... def set_cookies(self, cookies: Union[RequestsCookieJar, list, tuple, str, dict]) -> None: ... @@ -190,8 +182,7 @@ class ChromiumBase(BasePage): times: int = ..., interval: float = ..., show_errmsg: bool = ..., - timeout: float = ..., - frame_id: str = ...) -> Union[bool, None]: ... + timeout: float = ...) -> Union[bool, None]: ... class Timeout(object): diff --git a/DrissionPage/chromium_element.py b/DrissionPage/chromium_element.py index a444189..f9dd5af 100644 --- a/DrissionPage/chromium_element.py +++ b/DrissionPage/chromium_element.py @@ -20,25 +20,32 @@ __FRAME_ELEMENT__ = ('iframe', 'frame') class ChromiumElement(DrissionElement): """ChromePage页面对象中的元素对象""" - def __init__(self, page, node_id=None, obj_id=None): + def __init__(self, page, node_id=None, obj_id=None, backend_id=None): """初始化,node_id和obj_id必须至少传入一个 \n :param page: 元素所在ChromePage页面对象 :param node_id: cdp中的node id :param obj_id: js中的object id + :param backend_id: backend id """ super().__init__(page) self._select = None self._scroll = None self._tag = None - if not node_id and not obj_id: - raise RuntimeError('元素可能已失效。') if node_id: self._node_id = node_id self._obj_id = self._get_obj_id(node_id) - else: + self._backend_id = self._get_backend_id(self._node_id) + elif obj_id: self._node_id = self._get_node_id(obj_id) self._obj_id = obj_id + self._backend_id = self._get_backend_id(self._node_id) + elif backend_id: + self._obj_id = self._get_obj_id(backend_id=backend_id) + self._node_id = self._get_node_id(obj_id=self._obj_id) + self._backend_id = backend_id + else: + raise RuntimeError('元素可能已失效。') doc = self.run_script('return this.ownerDocument;') self._doc_id = doc['objectId'] if doc else None @@ -112,6 +119,11 @@ class ChromiumElement(DrissionElement): """返回cdp中的node id""" return self._node_id + @property + def backend_id(self): + """返回backend id""" + return self._backend_id + @property def doc_id(self): """返回document的object id""" @@ -723,19 +735,34 @@ class ChromiumElement(DrissionElement): current_x, current_y = x, y actions.release() - def _get_obj_id(self, node_id): + def _get_obj_id(self, node_id=None, backend_id=None): """根据传入node id获取js中的object id \n :param node_id: cdp中的node id + :param backend_id: backend id :return: js中的object id """ - return self.page.run_cdp('DOM.resolveNode', nodeId=node_id, not_change=True)['object']['objectId'] + if node_id: + return self.page.run_cdp('DOM.resolveNode', nodeId=node_id, not_change=True)['object']['objectId'] + else: + return self.page.run_cdp('DOM.resolveNode', backendNodeId=backend_id, not_change=True)['object']['objectId'] - def _get_node_id(self, obj_id): + def _get_node_id(self, obj_id=None, backend_id=None): """根据传入object id获取cdp中的node id \n :param obj_id: js中的object id + :param backend_id: backend id :return: cdp中的node id """ - return self.page.run_cdp('DOM.requestNode', objectId=obj_id, not_change=True)['nodeId'] + if obj_id: + return self.page.run_cdp('DOM.requestNode', objectId=obj_id, not_change=True)['nodeId'] + else: + return self.page.run_cdp('DOM.describeNode', backendNodeId=backend_id, not_change=True)['node']['nodeId'] + + def _get_backend_id(self, node_id): + """根据传入node id获取backend id + :param node_id: + :return: backend id + """ + return self.page.run_cdp('DOM.describeNode', nodeId=node_id, not_change=True)['node']['backendNodeId'] def _get_ele_path(self, mode): """返获取css路径或xpath路径""" @@ -848,9 +875,14 @@ class ChromiumShadowRootElement(BaseElement): @property def obj_id(self): - """返回元素js中的obect id""" + """返回元素js中的object id""" return self._obj_id + @property + def backend_id(self): + """返回backend id""" + return self._backend_id + @property def tag(self): """返回元素标签名""" diff --git a/DrissionPage/chromium_element.pyi b/DrissionPage/chromium_element.pyi index f070f5e..a0905b5 100644 --- a/DrissionPage/chromium_element.pyi +++ b/DrissionPage/chromium_element.pyi @@ -19,12 +19,12 @@ class ChromiumElement(DrissionElement): def __init__(self, page: ChromiumBase, - node_id: str = ..., - obj_id: str = ...): + node_id: str = ..., obj_id: str = ..., backend_id: str = ...): self._tag: str = ... self.page: Union[ChromiumPage, WebPage] = ... self._node_id: str = ... self._obj_id: str = ... + self._backend_id: str = ... self._doc_id: str = ... self._scroll: ChromeScroll = ... self._select: ChromeSelect = ... @@ -60,6 +60,9 @@ class ChromiumElement(DrissionElement): @property def node_id(self) -> str: ... + @property + def backend_id(self) -> str: ... + @property def doc_id(self) -> str: ... @@ -221,15 +224,17 @@ class ChromiumElement(DrissionElement): speed: int = ..., shake: bool = ...) -> None: ... - def _get_obj_id(self, node_id) -> str: ... + def _get_obj_id(self, node_id: str = ..., backend_id: str = ...) -> str: ... - def _get_node_id(self, obj_id) -> str: ... + def _get_node_id(self, obj_id: str = ..., backend_id: str = ...) -> str: ... - def _get_ele_path(self, mode) -> str: ... + def _get_backend_id(self, node_id: str) -> str: ... + + def _get_ele_path(self, mode: str) -> str: ... def _get_client_rect(self, quad: str) -> Union[dict, None]: ... - def _get_absolute_rect(self, x, y) -> dict: ... + def _get_absolute_rect(self, x: int, y: int) -> dict: ... class ChromiumShadowRootElement(BaseElement): @@ -241,6 +246,7 @@ class ChromiumShadowRootElement(BaseElement): backend_id: str = ...): self._obj_id: str = ... self._node_id: str = ... + self._backend_id: str = ... self.page: ChromiumPage = ... self.parent_ele: ChromiumElement = ... @@ -262,6 +268,9 @@ class ChromiumShadowRootElement(BaseElement): @property def obj_id(self) -> str: ... + @property + def backend_id(self) -> str: ... + @property def tag(self) -> str: ... @@ -321,10 +330,11 @@ class ChromiumShadowRootElement(BaseElement): def find_in_chromium_ele(ele: ChromiumElement, - loc: Union[str, Tuple[str, str]], - single: bool = ..., - timeout: float = ..., - relative: bool = ...) -> Union[ChromiumElement, str, None, List[Union[ChromiumElement, str]]]: ... + loc: Union[str, Tuple[str, str]], + single: bool = ..., + timeout: float = ..., + relative: bool = ...) -> Union[ + ChromiumElement, str, None, List[Union[ChromiumElement, str]]]: ... def _find_by_xpath(ele: ChromiumElement, @@ -340,14 +350,15 @@ def _find_by_css(ele: ChromiumElement, timeout: float) -> Union[ChromiumElement, List[ChromiumElement], None]: ... -def make_chromium_ele(page: ChromiumBase, node_id: str = ..., obj_id: str = ...) -> Union[ChromiumElement, ChromiumFrame]: ... +def make_chromium_ele(page: ChromiumBase, node_id: str = ..., obj_id: str = ...) -> Union[ + ChromiumElement, ChromiumFrame]: ... def _make_js_for_find_ele_by_xpath(xpath: str, type_txt: str, node_txt: str) -> str: ... def run_script(page_or_ele: Union[ChromiumBase, ChromiumElement, ChromiumShadowRootElement], script: str, - as_expr: bool = ..., timeout: float = ..., args: tuple = ..., not_change:bool=...) -> Any: ... + as_expr: bool = ..., timeout: float = ..., args: tuple = ..., not_change: bool = ...) -> Any: ... def _parse_js_result(page: ChromiumBase, ele: ChromiumElement, result: dict): ... @@ -449,7 +460,7 @@ class ChromiumElementWaiter(object): """等待元素在dom中某种状态,如删除、显示、隐藏""" def __init__(self, - page_or_ele, + page_or_ele: Union[ChromiumBase, ChromiumElement], loc_or_ele: Union[str, tuple, ChromiumElement], timeout: float = ...): self.loc_or_ele: Union[str, tuple, ChromiumElement] = ... diff --git a/DrissionPage/chromium_frame.py b/DrissionPage/chromium_frame.py index 67e2e47..9126b24 100644 --- a/DrissionPage/chromium_frame.py +++ b/DrissionPage/chromium_frame.py @@ -4,12 +4,108 @@ @Contact : g1879@qq.com """ from re import search +from time import sleep from .chromium_base import ChromiumBase from .chromium_element import ChromiumElement from .session_element import make_session_ele +class cc(ChromiumBase): + def __init__(self, address: str, tab_id=None, timeout=None, frame_id=None, page=None, frame_ele=None): + self.frame_id = frame_id + self.frame_ele = frame_ele + self.ppp = page + super().__init__(address, tab_id, timeout) + self.backend_id = frame_ele.backend_id + self.doc_ele = ChromiumElement(self.ppp, backend_id=self.backend_id) + + def _get_new_document(self): + """刷新cdp使用的document数据""" + if not self._is_reading: + self._is_reading = True + + if self._debug: + print('---获取document') + + while True: + try: + self.doc_ele = ChromiumElement(self.ppp, backend_id=self.backend_id) + break + + except Exception: + # raise + pass + + if self._debug: + print('---获取document结束') + + self._is_loading = False + self._is_reading = False + + def _onFrameStartedLoading(self, **kwargs): + """页面开始加载时触发""" + # pass + if kwargs['frameId'] == self.frame_id: + self._is_loading = True + + if self._debug: + print('页面开始加载 FrameStartedLoading') + + def _onFrameStoppedLoading(self, **kwargs): + """页面加载完成后触发""" + # pass + if kwargs['frameId'] == self.frame_id and self._first_run is False and self._is_loading: + if self._debug: + print('页面停止加载 FrameStoppedLoading') + + self._get_new_document() + + def _d_connect(self, to_url, times=0, interval=1, show_errmsg=False, timeout=None): + """尝试连接,重试若干次 \n + :param to_url: 要访问的url + :param times: 重试次数 + :param interval: 重试间隔(秒) + :param show_errmsg: 是否抛出异常 + :param timeout: 连接超时时间 + :return: 是否成功,返回None表示不确定 + """ + err = None + timeout = timeout if timeout is not None else self.timeouts.page_load + + for t in range(times + 1): + err = None + result = self._driver.Page.navigate(url=to_url, frameId=self.frame_id) + + is_timeout = not self._wait_loading(timeout) + while self.is_loading: + sleep(.1) + + if is_timeout: + err = TimeoutError('页面连接超时。') + if 'errorText' in result: + err = ConnectionError(result['errorText']) + + if not err: + break + + if t < times: + sleep(interval) + while self.ready_state != 'complete': + sleep(.1) + if self._debug: + print('重试') + if show_errmsg: + print(f'重试 {to_url}') + + if err: + if show_errmsg: + raise err if err is not None else ConnectionError('连接异常。') + return False + + return True + + class ChromiumFrame(object): """frame元素的类。 frame既是元素,也是页面,可以获取元素属性和定位周边元素,也能跳转到网址。 @@ -28,9 +124,12 @@ class ChromiumFrame(object): if self._is_inner_frame(): self._is_diff_domain = False self.frame_page = None - backend_id = node.get('contentDocument', None).get('backendNodeId', None) - obj_id = self.page.driver.DOM.resolveNode(backendNodeId=backend_id)['object']['objectId'] - self._doc_ele = ChromiumElement(page, obj_id=obj_id) + # self.backend_id = node.get('contentDocument', None).get('backendNodeId', None) + # obj_id = self.page.driver.DOM.resolveNode(backendNodeId=self.backend_id)['object']['objectId'] + self.cc = cc(page.address, page.tab_id, page.timeout, self.frame_id, self.page, self.frame_ele) + self.cc._debug = True + # self._doc_ele = ChromiumElement(page, obj_id=obj_id) + self._doc_ele = self.cc.doc_ele else: # 若frame_id不在frame_tree中,为异域frame self._is_diff_domain = True @@ -132,6 +231,11 @@ class ChromiumFrame(object): """返回cdp中的node id""" return self.frame_ele.node_id + @property + def backend_id(self): + """返回cdp中的node id""" + return self.frame_ele.backend_id + @property def location(self): """返回frame元素左上角的绝对坐标""" @@ -163,9 +267,12 @@ class ChromiumFrame(object): """ # todo: 处理同域名跳转到异域,及同域跳转到异域的情况 if self._is_diff_domain: - return self.frame_page.get(url, show_errmsg, retry, interval, timeout) + r = self.frame_page.get(url, show_errmsg, retry, interval, timeout) else: - self.frame_ele.run_script(f'this.contentWindow.location="{url}";') + r = self.cc.get(url, show_errmsg, retry, interval, timeout) + + # self.frame_ele.run_script(f'this.contentWindow.location="{url}";') + return r def refresh(self): """刷新frame页面""" @@ -209,7 +316,7 @@ class ChromiumFrame(object): :param timeout: 查找超时时间 :return: ChromiumElement对象 """ - d = self.frame_page if self._is_diff_domain else self._doc_ele + d = self.frame_page if self._is_diff_domain else self.cc.doc_ele return d.ele(loc_or_str, timeout) def eles(self, loc_or_str, timeout=None): @@ -218,7 +325,7 @@ class ChromiumFrame(object): :param timeout: 查找超时时间 :return: ChromiumElement对象组成的列表 """ - d = self.frame_page if self._is_diff_domain else self._doc_ele + d = self.frame_page if self._is_diff_domain else self.cc.doc_ele return d.eles(loc_or_str, timeout) def s_ele(self, loc_or_str=None): diff --git a/DrissionPage/chromium_frame.pyi b/DrissionPage/chromium_frame.pyi index cd8e6e1..a6e025b 100644 --- a/DrissionPage/chromium_frame.pyi +++ b/DrissionPage/chromium_frame.pyi @@ -65,6 +65,9 @@ class ChromiumFrame(object): @property def node_id(self) -> str: ... + @property + def backend_id(self) -> str: ... + @property def location(self) -> dict: ...