diff --git a/DrissionPage/chrome_element.py b/DrissionPage/chrome_element.py index 5dd5879..d909a1f 100644 --- a/DrissionPage/chrome_element.py +++ b/DrissionPage/chrome_element.py @@ -5,10 +5,11 @@ @File : chrome_element.py """ from pathlib import Path +from re import search from typing import Union, Tuple, List, Any from time import perf_counter, sleep -from .session_element import make_session_ele +from .session_element import make_session_ele, SessionElement from .base import DrissionElement from .common import make_absolute_link, get_loc, get_ele_txt, format_html, is_js_func @@ -32,17 +33,26 @@ class ChromeElement(DrissionElement): attrs = [f"{attr}='{self.attrs[attr]}'" for attr in self.attrs] return f'' - @property - def obj_id(self) -> str: - return self._obj_id - - @property - def node_id(self) -> str: - return self._node_id + def __call__(self, + loc_or_str: Union[Tuple[str, str], str], + timeout: float = None) -> Union['ChromeElement', str, None]: + """在内部查找元素 \n + 例:ele2 = ele1('@id=ele_id') \n + :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 + :param timeout: 超时时间 + :return: ChromeElement对象或属性、文本 + """ + return self.ele(loc_or_str, timeout) @property def html(self) -> str: """返回元素outerHTML文本""" + tag = self.tag + if tag in ('iframe', 'frame'): + out_html = self.page.driver.DOM.getOuterHTML(nodeId=self._node_id)['outerHTML'] + in_html = self.inner_html + sign = search(rf'<{tag}.*?>', out_html).group(0) + return f'{sign}{in_html}' return self.page.driver.DOM.getOuterHTML(nodeId=self._node_id)['outerHTML'] @property @@ -52,7 +62,10 @@ class ChromeElement(DrissionElement): @property def inner_html(self) -> str: """返回元素innerHTML文本""" - return self.run_script('this.innerHTML;')['result']['value'] + if self.tag in ('iframe', 'frame'): + return _run_script(self, 'this.contentDocument.documentElement;').html + # return _run_script(self, 'this.contentDocument.body;').html + return self.run_script('this.innerHTML;') @property def attrs(self) -> dict: @@ -60,6 +73,25 @@ class ChromeElement(DrissionElement): attrs_len = len(attrs) return {attrs[i]: attrs[i + 1] for i in range(0, attrs_len, 2)} + @property + def text(self) -> str: + """返回元素内所有文本""" + return get_ele_txt(make_session_ele(self.html)) + + @property + def raw_text(self): + """返回未格式化处理的元素内文本""" + return self.prop('innerText') + + # -----------------driver独有属性------------------- + @property + def obj_id(self) -> str: + return self._obj_id + + @property + def node_id(self) -> str: + return self._node_id + @property def size(self) -> dict: """返回元素宽和高""" @@ -70,7 +102,7 @@ class ChromeElement(DrissionElement): def client_location(self) -> dict: """返回元素左上角坐标""" js = 'this.getBoundingClientRect().left.toString()+" "+this.getBoundingClientRect().top.toString();' - xy = self.run_script(js)['result']['value'] + xy = self.run_script(js) x, y = xy.split(' ') return {'x': int(x.split('.')[0]), 'y': int(y.split('.')[0])} @@ -94,10 +126,33 @@ function getElementPagePosition(element){ return actualLeft.toString() +' '+actualTop.toString(); } return getElementPagePosition(this);}''' - xy = self.run_script(js)['result']['value'] + xy = self.run_script(js) x, y = xy.split(' ') return {'x': int(x.split('.')[0]), 'y': int(y.split('.')[0])} + # @property + # def shadow_root(self): + # """返回当前元素的shadow_root元素对象""" + # shadow = self.run_script('return arguments[0].shadowRoot') + # if shadow: + # from .shadow_root_element import ShadowRootElement + # return ShadowRootElement(shadow, self) + # + # @property + # def sr(self): + # """返回当前元素的shadow_root元素对象""" + # return self.shadow_root + + @property + def pseudo_before(self) -> str: + """返回当前元素的::before伪元素内容""" + return self.style('content', 'before') + + @property + def pseudo_after(self) -> str: + """返回当前元素的::after伪元素内容""" + return self.style('content', 'after') + @property def scroll(self) -> 'ChromeScroll': """用于滚动滚动条的对象""" @@ -105,6 +160,116 @@ function getElementPagePosition(element){ self._scroll = ChromeScroll(self) return self._scroll + def parent(self, level_or_loc: Union[tuple, str, int] = 1) -> Union['ChromeElement', None]: + """返回上面某一级父元素,可指定层数或用查询语法定位 \n + :param level_or_loc: 第几级父元素,或定位符 + :return: 上级元素对象 + """ + return super().parent(level_or_loc) + + def prev(self, + index: int = 1, + filter_loc: Union[tuple, str] = '', + timeout: float = 0) -> Union['ChromeElement', str, None]: + """返回前面的一个兄弟元素,可用查询语法筛选,可指定返回筛选结果的第几个 \n + :param index: 前面第几个查询结果元素 + :param filter_loc: 用于筛选元素的查询语法 + :param timeout: 查找元素的超时时间 + :return: 兄弟元素 + """ + return super().prev(index, filter_loc, timeout) + + def next(self, + index: int = 1, + filter_loc: Union[tuple, str] = '', + timeout: float = 0) -> Union['ChromeElement', str, None]: + """返回后面的一个兄弟元素,可用查询语法筛选,可指定返回筛选结果的第几个 \n + :param index: 后面第几个查询结果元素 + :param filter_loc: 用于筛选元素的查询语法 + :param timeout: 查找元素的超时时间 + :return: 兄弟元素 + """ + return super().next(index, filter_loc, timeout) + + def before(self, + index: int = 1, + filter_loc: Union[tuple, str] = '', + timeout: float = None) -> Union['ChromeElement', str, None]: + """返回当前元素前面的一个元素,可指定筛选条件和第几个。查找范围不限兄弟元,而是整个DOM文档 \n + :param index: 前面第几个查询结果元素 + :param filter_loc: 用于筛选元素的查询语法 + :param timeout: 查找元素的超时时间 + :return: 本元素前面的某个元素或节点 + """ + return super().before(index, filter_loc, timeout) + + def after(self, + index: int = 1, + filter_loc: Union[tuple, str] = '', + timeout: float = None) -> Union['ChromeElement', str, None]: + """返回当前元素后面的一个元素,可指定筛选条件和第几个。查找范围不限兄弟元,而是整个DOM文档 \n + :param index: 后面第几个查询结果元素 + :param filter_loc: 用于筛选元素的查询语法 + :param timeout: 查找元素的超时时间 + :return: 本元素后面的某个元素或节点 + """ + return super().after(index, filter_loc, timeout) + + def prevs(self, + filter_loc: Union[tuple, str] = '', + timeout: float = 0) -> List[Union['ChromeElement', str]]: + """返回前面全部兄弟元素或节点组成的列表,可用查询语法筛选 \n + :param filter_loc: 用于筛选元素的查询语法 + :param timeout: 查找元素的超时时间 + :return: 兄弟元素或节点文本组成的列表 + """ + return super().prevs(filter_loc, timeout) + + def nexts(self, + filter_loc: Union[tuple, str] = '', + timeout: float = 0) -> List[Union['ChromeElement', str]]: + """返回后面全部兄弟元素或节点组成的列表,可用查询语法筛选 \n + :param filter_loc: 用于筛选元素的查询语法 + :param timeout: 查找元素的超时时间 + :return: 兄弟元素或节点文本组成的列表 + """ + return super().nexts(filter_loc, timeout) + + def befores(self, + filter_loc: Union[tuple, str] = '', + timeout: float = None) -> List[Union['ChromeElement', str]]: + """返回当前元素后面符合条件的全部兄弟元素或节点组成的列表,可用查询语法筛选。查找范围不限兄弟元,而是整个DOM文档 \n + :param filter_loc: 用于筛选元素的查询语法 + :param timeout: 查找元素的超时时间 + :return: 本元素前面的元素或节点组成的列表 + """ + return super().befores(filter_loc, timeout) + + # def wait_ele(self, + # loc_or_ele: Union[str, tuple, 'ChromeElement'], + # timeout: float = None) -> 'ElementWaiter': + # """等待子元素从dom删除、显示、隐藏 \n + # :param loc_or_ele: 可以是元素、查询字符串、loc元组 + # :param timeout: 等待超时时间 + # :return: 等待是否成功 + # """ + # return ElementWaiter(self, loc_or_ele, timeout) + + @property + def select(self): + """返回专门处理下拉列表的Select类,非下拉列表元素返回False""" + if self._select is None: + if self.tag != 'select': + self._select = False + else: + self._select = ChromeSelect(self) + + return self._select + + @property + def is_selected(self): + return self.run_script('this.selected;') + @property def is_in_view(self) -> bool: """返回元素是否出现在视口中,已元素中点为判断""" @@ -116,48 +281,7 @@ const vWidth = window.innerWidth || document.documentElement.clientWidth; const vHeight = window.innerHeight || document.documentElement.clientHeight; if (x< 0 || y < 0 || x > vWidth || y > vHeight){return false;} return true;}""" - return self.run_script(js)['result']['value'] - - def run_script(self, script: str, as_expr: bool = False, *args: Any) -> Any: - """运行javascript代码 \n - :param script: js文本 - :param as_expr: 是否作为表达式运行,为True时args无效 - :param args: 参数,按顺序在js文本中对应argument[0]、argument[2]... - :return: 运行的结果 - """ - return run_script(self, script, as_expr, *args) - - def ele(self, - loc_or_str: Union[Tuple[str, str], str], - timeout: float = None) -> Union['ChromeElement', str, None]: - """返回当前元素下级符合条件的第一个元素、属性或节点文本 \n - :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 - :param timeout: 查找元素超时时间,默认与元素所在页面等待时间一致 - :return: DriverElement对象或属性、文本 - """ - return self._ele(loc_or_str, timeout) - - def eles(self, - loc_or_str: Union[Tuple[str, str], str], - timeout: float = None) -> List[Union['ChromeElement', str]]: - """返回当前元素下级所有符合条件的子元素、属性或节点文本 \n - :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 - :param timeout: 查找元素超时时间,默认与元素所在页面等待时间一致 - :return: DriverElement对象或属性、文本组成的列表 - """ - return self._ele(loc_or_str, timeout=timeout, single=False) - - def _ele(self, - loc_or_str: Union[Tuple[str, str], str], - timeout: float = None, - single: bool = True) -> Union['ChromeElement', str, None, List[Union['ChromeElement', str]]]: - """返回当前元素下级符合条件的子元素、属性或节点文本,默认返回第一个 \n - :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 - :param timeout: 查找元素超时时间 - :param single: True则返回第一个,False则返回全部 - :return: DriverElement对象 - """ - return make_chrome_ele(self, loc_or_str, single, timeout) + return self.run_script(js) def attr(self, attr: str) -> Union[str, None]: """返回attribute属性值 \n @@ -168,11 +292,9 @@ return true;}""" attrs = self.attrs if attr == 'href': link = attrs['href'] - # 若为链接为None、js或邮件,直接返回 if not link or link.lower().startswith(('javascript:', 'mailto:')): return link - - else: # 其它情况直接返回绝对url + else: return make_absolute_link(link, self.page) elif attr == 'src': @@ -191,7 +313,66 @@ return true;}""" return self.inner_html else: - return attrs[attr] + return attrs.get(attr, None) + + def run_script(self, script: str, as_expr: bool = False, *args: Any) -> Any: + """运行javascript代码 \n + :param script: js文本 + :param as_expr: 是否作为表达式运行,为True时args无效 + :param args: 参数,按顺序在js文本中对应argument[0]、argument[2]... + :return: 运行的结果 + """ + return _run_script(self, script, as_expr, *args) + + def ele(self, + loc_or_str: Union[Tuple[str, str], str], + timeout: float = None) -> Union['ChromeElement', str, None]: + """返回当前元素下级符合条件的第一个元素、属性或节点文本 \n + :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 + :param timeout: 查找元素超时时间,默认与元素所在页面等待时间一致 + :return: ChromeElement对象或属性、文本 + """ + return self._ele(loc_or_str, timeout) + + def eles(self, + loc_or_str: Union[Tuple[str, str], str], + timeout: float = None) -> List[Union['ChromeElement', str]]: + """返回当前元素下级所有符合条件的子元素、属性或节点文本 \n + :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 + :param timeout: 查找元素超时时间,默认与元素所在页面等待时间一致 + :return: ChromeElement对象或属性、文本组成的列表 + """ + return self._ele(loc_or_str, timeout=timeout, single=False) + + def s_ele(self, loc_or_str: Union[Tuple[str, str], str] = None) -> Union[SessionElement, str, None]: + """查找第一个符合条件的元素以SessionElement形式返回,处理复杂页面时效率很高 \n + :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 + :return: SessionElement对象或属性、文本 + """ + if self.tag in ('iframe', 'frame'): + return make_session_ele(self.inner_html, loc_or_str) + return make_session_ele(self, loc_or_str) + + def s_eles(self, loc_or_str: Union[Tuple[str, str], str] = None) -> List[Union[SessionElement, str]]: + """查找所有符合条件的元素以SessionElement列表形式返回 \n + :param loc_or_str: 定位符 + :return: SessionElement或属性、文本组成的列表 + """ + if self.tag in ('iframe', 'frame'): + return make_session_ele(self.inner_html, loc_or_str, single=False) + return make_session_ele(self, loc_or_str, single=False) + + def _ele(self, + loc_or_str: Union[Tuple[str, str], str], + timeout: float = None, + single: bool = True) -> Union['ChromeElement', str, None, List[Union['ChromeElement', str]]]: + """返回当前元素下级符合条件的子元素、属性或节点文本,默认返回第一个 \n + :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 + :param timeout: 查找元素超时时间 + :param single: True则返回第一个,False则返回全部 + :return: ChromeElement对象 + """ + return make_chrome_ele(self, loc_or_str, single, timeout) def prop(self, prop: str) -> Union[str, int, None]: """获取property属性值 \n @@ -213,9 +394,22 @@ return true;}""" :return: 是否设置成功 """ value = value.replace("'", "\\'") - r = self.run_script(f'this.{prop}="{value}";') - if 'exceptionDetails' in r: - raise SyntaxError(r['result']['description']) + self.run_script(f'this.{prop}="{value}";') + + def set_attr(self, attr: str, value: str) -> None: + """设置元素attribute属性 \n + :param attr: 属性名 + :param value: 属性值 + :return: 是否设置成功 + """ + self.run_script(f'this.setAttribute(arguments[0], arguments[1]);', False, attr, str(value)) + + def remove_attr(self, attr: str) -> None: + """删除元素attribute属性 \n + :param attr: 属性名 + :return: 是否删除成功 + """ + self.run_script(f'this.removeAttribute("{attr}");') def style(self, style: str, pseudo_ele: str = '') -> str: """返回元素样式属性值,可获取伪元素属性值 \n @@ -367,15 +561,6 @@ return true;}""" def is_valid(self): return True - @property - def text(self) -> str: - """返回元素内所有文本""" - return get_ele_txt(make_session_ele(self.html)) - - @property - def raw_text(self): - return - def _get_ele_path(self, mode) -> str: """返获取css路径或xpath路径""" if mode == 'xpath': @@ -415,7 +600,7 @@ return true;}""" } return e(this);} ''' - t = self.run_script(js)['result']['value'] + t = self.run_script(js) return f':root{t}' if mode == 'css' else t @@ -428,7 +613,7 @@ def make_chrome_ele(ele: ChromeElement, :param loc: 元素定位元组 :param single: True则返回第一个,False则返回全部 :param timeout: 查找元素超时时间 - :return: 返回DriverElement元素或它们组成的列表 + :return: 返回ChromeElement元素或它们组成的列表 """ # ---------------处理定位符--------------- if isinstance(loc, (str, tuple)): @@ -457,14 +642,18 @@ def _find_by_xpath(ele: ChromeElement, xpath: str, single: bool, timeout: float) type_txt = '9' if single else '7' node_txt = 'this.contentDocument' if ele.tag in ('iframe', 'frame') else 'this' js = _make_js(xpath, type_txt, node_txt) - r = ele.run_script(js) + r = ele.page.run_cdp('Runtime.callFunctionOn', + functionDeclaration=js, objectId=ele.obj_id, returnByValue=False, awaitPromise=True, + userGesture=True) if r['result']['type'] == 'string': return r['result']['value'] if 'exceptionDetails' in r: if 'The result is not a node set' in r['result']['description']: js = _make_js(xpath, '1', node_txt) - r = ele.run_script(js) + r = ele.page.run_cdp('Runtime.callFunctionOn', + functionDeclaration=js, objectId=ele.obj_id, returnByValue=False, awaitPromise=True, + userGesture=True) return r['result']['value'] else: raise SyntaxError(f'查询语句错误:\n{r}') @@ -472,7 +661,9 @@ def _find_by_xpath(ele: ChromeElement, xpath: str, single: bool, timeout: float) t1 = perf_counter() while (r['result']['subtype'] == 'null' or r['result']['description'] == 'NodeList(0)') and perf_counter() - t1 < timeout: - r = ele.run_script(js) + r = ele.page.run_cdp('Runtime.callFunctionOn', + functionDeclaration=js, objectId=ele.obj_id, returnByValue=False, awaitPromise=True, + userGesture=True) if single: if r['result']['subtype'] == 'null': @@ -494,14 +685,18 @@ def _find_by_css(ele: ChromeElement, selector: str, single: bool, timeout: float selector = selector.replace('"', r'\"') find_all = '' if single else 'All' js = f'this.querySelector{find_all}("{selector}");' - r = ele.run_script(js) + r = ele.page.run_cdp('Runtime.callFunctionOn', + functionDeclaration=js, objectId=ele.obj_id, returnByValue=False, awaitPromise=True, + userGesture=True) if 'exceptionDetails' in r: raise SyntaxError(f'查询语句错误:\n{r}') t1 = perf_counter() while (r['result']['subtype'] == 'null' or r['result']['description'] == 'NodeList(0)') and perf_counter() - t1 < timeout: - r = ele.run_script(js) + r = ele.page.run_cdp('Runtime.callFunctionOn', + functionDeclaration=js, objectId=ele.obj_id, returnByValue=False, awaitPromise=True, + userGesture=True) if single: if r['result']['subtype'] == 'null': @@ -552,7 +747,7 @@ else{a.push(e.snapshotItem(i));}}""" return js -def run_script(page_or_ele, script: str, as_expr: bool = False, *args: Any) -> Any: +def _run_script(page_or_ele, script: str, as_expr: bool = False, *args: Any) -> Any: """运行javascript代码 \n :param page_or_ele: 页面对象或元素对象 :param script: js文本 @@ -570,7 +765,6 @@ def run_script(page_or_ele, script: str, as_expr: bool = False, *args: Any) -> A if as_expr: res = page.run_cdp('Runtime.evaluate', expression=script, - # contextId=self._contextId, returnByValue=False, awaitPromise=True, userGesture=True) @@ -581,7 +775,6 @@ def run_script(page_or_ele, script: str, as_expr: bool = False, *args: Any) -> A res = page.run_cdp('Runtime.callFunctionOn', functionDeclaration=script, objectId=obj_id, - # 'executionContextId': self._contextId, arguments=[_convert_argument(arg) for arg in args], returnByValue=False, awaitPromise=True, @@ -734,3 +927,274 @@ class ChromeScroll(object): :return: None """ self._run_script(f'{{}}.scrollBy({pixel},0);') + + +class ChromeSelect(object): + """ChromeSelect 类专门用于处理 d 模式下 select 标签""" + + def __init__(self, ele: ChromeElement): + """初始化 \n + :param ele: select 元素对象 + """ + if ele.tag != 'select': + raise TypeError(f"select方法只能在