diff --git a/DrissionPage/chromium_base.py b/DrissionPage/chromium_base.py index 23d3dc7..df2b72d 100644 --- a/DrissionPage/chromium_base.py +++ b/DrissionPage/chromium_base.py @@ -116,16 +116,14 @@ class ChromiumBase(BasePage): return False def _onFrameStartedLoading(self, **kwargs): - """页面跳转时触发""" - # print('FrameStartedLoading') + """页面开始加载时触发""" if kwargs['frameId'] == self.tab_id: self._is_loading = True if self._debug: print('FrameStartedLoading') def _onFrameStoppedLoading(self, **kwargs): - """页面跳转时触发""" - # print('FrameStoppedLoading') + """页面加载完成后触发""" if kwargs['frameId'] == self.tab_id and self._first_run is False and self._is_loading: if self._debug: print('FrameStoppedLoading') @@ -216,7 +214,7 @@ class ChromiumBase(BasePage): @property def ready_state(self) -> str: """返回当前页面加载状态,'loading' 'interactive' 'complete'""" - return self._driver.Runtime.evaluate(expression='document.readyState;')['result']['value'] + return self._tab_obj.Runtime.evaluate(expression='document.readyState;')['result']['value'] @property def size(self) -> dict: @@ -300,14 +298,32 @@ class ChromiumBase(BasePage): :param timeout: 连接超时时间 :return: 目标url是否可用,返回None表示不确定 """ - retry, interval = self._before_connect(url, retry, interval) - self._url_available = self._d_connect(self._url, - times=retry, - interval=interval, - show_errmsg=show_errmsg, - timeout=timeout) + self._url_available = self._get(url, show_errmsg, retry, interval, timeout) return self._url_available + def _get(self, + url: str, + show_errmsg: bool = False, + retry: int = None, + interval: float = None, + timeout: float = None, + frame_id: str = None) -> Union[None, bool]: + """访问url \n + :param url: 目标url + :param show_errmsg: 是否显示和抛出异常 + :param retry: 重试次数 + :param interval: 重试间隔(秒) + :param timeout: 连接超时时间 + :return: 目标url是否可用,返回None表示不确定 + """ + retry, interval = self._before_connect(url, retry, interval) + return self._d_connect(self._url, + times=retry, + interval=interval, + show_errmsg=show_errmsg, + timeout=timeout, + frame_id=frame_id) + def get_cookies(self, as_dict: bool = False) -> Union[list, dict]: """获取cookies信息 \n :param as_dict: 为True时返回由{name: value}键值对组成的dict @@ -499,7 +515,7 @@ class ChromiumBase(BasePage): """页面停止加载""" if self._debug: print('stopLoading') - self._driver.Page.stopLoading() + self._tab_obj.Page.stopLoading() self._get_document() def run_cdp(self, cmd: str, **cmd_args) -> dict: @@ -582,7 +598,8 @@ class ChromiumBase(BasePage): times: int = 0, interval: float = 1, show_errmsg: bool = False, - timeout: float = None) -> Union[bool, None]: + timeout: float = None, + frame_id: str = None) -> Union[bool, None]: """尝试连接,重试若干次 \n :param to_url: 要访问的url :param times: 重试次数 @@ -596,7 +613,10 @@ class ChromiumBase(BasePage): for _ in range(times + 1): err = None - result = self._driver.Page.navigate(url=to_url) + if frame_id: + result = self._driver.Page.navigate(url=to_url, frameId=frame_id) + else: + result = self._driver.Page.navigate(url=to_url) is_timeout = not self._wait_loading(timeout) if is_timeout: @@ -638,7 +658,8 @@ class ChromiumFrame(ChromiumBase): super().__init__(page.address, frame_id, page.timeout) def __repr__(self) -> str: - attrs = [f"{attr}='{self.attrs[attr]}'" for attr in self.attrs] + attrs = self.attrs + attrs = [f"{attr}='{attrs[attr]}'" for attr in attrs] return f'' @property diff --git a/DrissionPage/chromium_element.py b/DrissionPage/chromium_element.py index 703659e..4435996 100644 --- a/DrissionPage/chromium_element.py +++ b/DrissionPage/chromium_element.py @@ -32,7 +32,7 @@ class ChromiumElement(DrissionElement): self._scroll = None self._tag = None if not node_id and not obj_id: - raise TypeError('node_id或obj_id必须传入一个。') + raise RuntimeError('元素可能已失效。') if node_id: self._node_id = node_id @@ -42,7 +42,8 @@ class ChromiumElement(DrissionElement): self._obj_id = obj_id def __repr__(self) -> str: - attrs = [f"{attr}='{self.attrs[attr]}'" for attr in self.attrs] + attrs = self.attrs + attrs = [f"{attr}='{attrs[attr]}'" for attr in attrs] return f'' def __call__(self, @@ -1101,6 +1102,251 @@ class ChromiumShadowRootElement(BaseElement): return self.page.run_cdp('DOM.describeNode', nodeId=node_id)['node']['backendNodeId'] +# class ChromiumFrame(object): +# def __init__(self, page, ele: ChromiumElement): +# from .chromium_base import ChromiumBase +# self.page: ChromiumBase = page +# self._inner_ele = ele +# self._is_diff_domain = False +# self.frame_id = page.run_cdp('DOM.describeNode', nodeId=ele.node_id)['node'].get('frameId', None) +# +# src = ele.attr('src') +# if src: +# netloc1 = urlparse(src).netloc +# netloc2 = urlparse(page.url).netloc +# if netloc1 != netloc2: +# self._is_diff_domain = True +# from .chromium_base import ChromiumBase +# self.inner_page = ChromiumBase(page.address, self.frame_id, page.timeout) +# self.inner_page.set_page_load_strategy(self.page.page_load_strategy) +# self.inner_page.timeouts = self.page.timeouts +# +# def __repr__(self) -> str: +# attrs = self._inner_ele.attrs +# attrs = [f"{attr}='{attrs[attr]}'" for attr in attrs] +# return f'' +# +# @property +# def tag(self) -> str: +# """返回元素tag""" +# return self._inner_ele.tag +# +# @property +# def url(self) -> str: +# """""" +# if self._is_diff_domain: +# return self.inner_page.url +# else: +# r = self.page.run_cdp('DOM.describeNode', nodeId=self._inner_ele.node_id) +# return r['node']['contentDocument']['documentURL'] +# +# @property +# def html(self) -> str: +# """返回元素outerHTML文本""" +# if self._is_diff_domain: +# tag = self.tag +# out_html = self.page.run_cdp('DOM.getOuterHTML', nodeId=self._inner_ele.node_id)['outerHTML'] +# in_html = self.inner_page.html +# sign = search(rf'<{tag}.*?>', out_html).group(0) +# return f'{sign}{in_html}' +# +# else: +# return self._inner_ele.html +# +# @property +# def title(self) -> str: +# d = self.inner_page if self._is_diff_domain else self._inner_ele +# ele = d.ele('xpath://title') +# return ele.text if ele else None +# +# @property +# def cookies(self): +# return self.inner_page.cookies if self._is_diff_domain else self.page.cookies +# +# @property +# def inner_html(self) -> str: +# """返回元素innerHTML文本""" +# return self.inner_page.html if self._is_diff_domain else self._inner_ele.inner_html +# +# @property +# def attrs(self) -> dict: +# return self._inner_ele.attrs +# +# @property +# def frame_size(self) -> dict: +# if self._is_diff_domain: +# return self.inner_page.size +# else: +# h = self._inner_ele.run_script('return this.contentDocument.body.scrollHeight;') +# w = self._inner_ele.run_script('return this.contentDocument.body.scrollWidth;') +# return {'height': h, 'width': w} +# +# @property +# def size(self) -> dict: +# """返回frame元素大小""" +# return self._inner_ele.size +# +# @property +# def obj_id(self) -> str: +# """返回js中的object id""" +# return self._inner_ele.obj_id +# +# @property +# def node_id(self) -> str: +# """返回cdp中的node id""" +# return self._inner_ele.node_id +# +# @property +# def location(self) -> dict: +# """返回frame元素左上角的绝对坐标""" +# return self._inner_ele.location +# +# @property +# def is_displayed(self) -> bool: +# """返回frame元素是否显示""" +# return self._inner_ele.is_displayed +# +# def get(self, url): +# self.page._get(url, False, None, None, None, self.frame_id) +# +# def ele(self, +# loc_or_ele: Union[Tuple[str, str], str, ChromiumElement, 'ChromiumFrame'], +# timeout: float = None): +# d = self.inner_page if self._is_diff_domain else self._inner_ele +# return d.ele(loc_or_ele, timeout) +# +# def eles(self, +# loc_or_ele: Union[Tuple[str, str], str], +# timeout: float = None): +# d = self.inner_page if self._is_diff_domain else self._inner_ele +# return d.eles(loc_or_ele, timeout) +# +# # def s_ele(self, loc_or_ele: Union[Tuple[str, str], str, ChromiumElement] = None) \ +# # -> Union[SessionElement, str, None]: +# # """查找第一个符合条件的元素以SessionElement形式返回,处理复杂页面时效率很高 \n +# # :param loc_or_ele: 元素的定位信息,可以是loc元组,或查询字符串 +# # :return: SessionElement对象或属性、文本 +# # """ +# # if isinstance(loc_or_ele, ChromiumElement): +# # return make_session_ele(loc_or_ele) +# # else: +# # return make_session_ele(self, loc_or_ele) +# # +# # def s_eles(self, loc_or_str: Union[Tuple[str, str], str] = None) -> List[Union[SessionElement, str]]: +# # """查找所有符合条件的元素以SessionElement列表形式返回 \n +# # :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 +# # :return: SessionElement对象组成的列表 +# # """ +# # return make_session_ele(self, loc_or_str, single=False) +# +# def attr(self, attr: str) -> Union[str, None]: +# """返回frame元素attribute属性值 \n +# :param attr: 属性名 +# :return: 属性值文本,没有该属性返回None +# """ +# return self._inner_ele.attr(attr) +# +# def set_attr(self, attr: str, value: str) -> None: +# """设置frame元素attribute属性 \n +# :param attr: 属性名 +# :param value: 属性值 +# :return: None +# """ +# self._inner_ele.set_attr(attr, value) +# +# def remove_attr(self, attr: str) -> None: +# """删除frame元素attribute属性 \n +# :param attr: 属性名 +# :return: None +# """ +# self._inner_ele.remove_attr(attr) +# +# def parent(self, level_or_loc: Union[tuple, str, int] = 1) -> Union['ChromiumElement', None]: +# """返回上面某一级父元素,可指定层数或用查询语法定位 \n +# :param level_or_loc: 第几级父元素,或定位符 +# :return: 上级元素对象 +# """ +# return self._inner_ele.parent(level_or_loc) +# +# def prev(self, +# index: int = 1, +# filter_loc: Union[tuple, str] = '', +# timeout: float = 0) -> Union['ChromiumElement', str, None]: +# """返回前面的一个兄弟元素,可用查询语法筛选,可指定返回筛选结果的第几个 \n +# :param index: 前面第几个查询结果元素 +# :param filter_loc: 用于筛选元素的查询语法 +# :param timeout: 查找元素的超时时间 +# :return: 兄弟元素 +# """ +# return self._inner_ele.prev(index, filter_loc, timeout) +# +# def next(self, +# index: int = 1, +# filter_loc: Union[tuple, str] = '', +# timeout: float = 0) -> Union['ChromiumElement', str, None]: +# """返回后面的一个兄弟元素,可用查询语法筛选,可指定返回筛选结果的第几个 \n +# :param index: 后面第几个查询结果元素 +# :param filter_loc: 用于筛选元素的查询语法 +# :param timeout: 查找元素的超时时间 +# :return: 兄弟元素 +# """ +# return self._inner_ele.next(index, filter_loc, timeout) +# +# def before(self, +# index: int = 1, +# filter_loc: Union[tuple, str] = '', +# timeout: float = None) -> Union['ChromiumElement', str, None]: +# """返回当前元素前面的一个元素,可指定筛选条件和第几个。查找范围不限兄弟元素,而是整个DOM文档 \n +# :param index: 前面第几个查询结果元素 +# :param filter_loc: 用于筛选元素的查询语法 +# :param timeout: 查找元素的超时时间 +# :return: 本元素前面的某个元素或节点 +# """ +# return self._inner_ele.before(index, filter_loc, timeout) +# +# def after(self, +# index: int = 1, +# filter_loc: Union[tuple, str] = '', +# timeout: float = None) -> Union['ChromiumElement', str, None]: +# """返回当前元素后面的一个元素,可指定筛选条件和第几个。查找范围不限兄弟元素,而是整个DOM文档 \n +# :param index: 后面第几个查询结果元素 +# :param filter_loc: 用于筛选元素的查询语法 +# :param timeout: 查找元素的超时时间 +# :return: 本元素后面的某个元素或节点 +# """ +# return self._inner_ele.after(index, filter_loc, timeout) +# +# def prevs(self, +# filter_loc: Union[tuple, str] = '', +# timeout: float = 0) -> List[Union['ChromiumElement', str]]: +# """返回前面全部兄弟元素或节点组成的列表,可用查询语法筛选 \n +# :param filter_loc: 用于筛选元素的查询语法 +# :param timeout: 查找元素的超时时间 +# :return: 兄弟元素或节点文本组成的列表 +# """ +# return self._inner_ele.prevs(filter_loc, timeout) +# +# def nexts(self, +# filter_loc: Union[tuple, str] = '', +# timeout: float = 0) -> List[Union['ChromiumElement', str]]: +# """返回后面全部兄弟元素或节点组成的列表,可用查询语法筛选 \n +# :param filter_loc: 用于筛选元素的查询语法 +# :param timeout: 查找元素的超时时间 +# :return: 兄弟元素或节点文本组成的列表 +# """ +# return self._inner_ele.nexts(filter_loc, timeout) +# +# def befores(self, +# filter_loc: Union[tuple, str] = '', +# timeout: float = None) -> List[Union['ChromiumElement', str]]: +# """返回当前元素后面符合条件的全部兄弟元素或节点组成的列表,可用查询语法筛选。查找范围不限兄弟元素,而是整个DOM文档 \n +# :param filter_loc: 用于筛选元素的查询语法 +# :param timeout: 查找元素的超时时间 +# :return: 本元素前面的元素或节点组成的列表 +# """ +# return self._inner_ele.befores(filter_loc, timeout) + + def make_chromium_ele(ele: ChromiumElement, loc: Union[str, Tuple[str, str]], single: bool = True, diff --git a/DrissionPage/mix_page.py b/DrissionPage/mix_page.py index fbde471..fbbfd45 100644 --- a/DrissionPage/mix_page.py +++ b/DrissionPage/mix_page.py @@ -350,6 +350,7 @@ class MixPage(SessionPage, DriverPage, BasePage): @property def download(self) -> DownloadKit: + """返回下载器对象""" if self.mode == 'd': self.cookies_to_session() return super().download diff --git a/DrissionPage/session_page.py b/DrissionPage/session_page.py index ce9ac6f..b4ad6f8 100644 --- a/DrissionPage/session_page.py +++ b/DrissionPage/session_page.py @@ -194,6 +194,7 @@ class SessionPage(BasePage): @property def download(self) -> DownloadKit: + """返回下载器对象""" if not hasattr(self, '_download_kit'): self._download_kit = DownloadKit(session=self) diff --git a/DrissionPage/web_page.py b/DrissionPage/web_page.py index d3b2c3c..0244d0c 100644 --- a/DrissionPage/web_page.py +++ b/DrissionPage/web_page.py @@ -383,6 +383,7 @@ class WebPage(SessionPage, ChromiumPage, BasePage): @property def download(self) -> DownloadKit: + """返回下载器对象""" if self.mode == 'd': self.cookies_to_session() return super().download diff --git a/setup.py b/setup.py index 4670dc1..5dc995b 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ with open("README.md", "r", encoding='utf-8') as fh: setup( name="DrissionPage", - version="3.0.19", + version="3.0.20", author="g1879", author_email="g1879@qq.com", description="A module that integrates selenium and requests session, encapsulates common page operations.",