From 74b7185563e19fc9bacf6aa5a77eee922c123290 Mon Sep 17 00:00:00 2001 From: g1879 Date: Sun, 28 Nov 2021 00:20:13 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BC=98=E5=8C=96=E6=9F=A5=E6=89=BE=E5=85=83?= =?UTF-8?q?=E7=B4=A0=E6=97=B6=E7=9A=84=E4=BB=A3=E7=A0=81=E7=BB=93=E6=9E=84?= =?UTF-8?q?=EF=BC=9Bs=5Fele()=E5=85=81=E8=AE=B8=E5=8F=82=E6=95=B0=E4=B8=BA?= =?UTF-8?q?None=EF=BC=8C=E4=BB=A5=E7=94=9F=E6=88=90=E5=85=83=E7=B4=A0?= =?UTF-8?q?=E8=87=AA=E5=B7=B1=E7=9A=84SessionElement=E7=89=88=E6=9C=AC?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/base.py | 2 +- DrissionPage/driver_element.py | 56 +++++++++-------- DrissionPage/session_element.py | 93 ++++++++++++++++++----------- DrissionPage/shadow_root_element.py | 2 +- 4 files changed, 90 insertions(+), 63 deletions(-) diff --git a/DrissionPage/base.py b/DrissionPage/base.py index ad6d894..b2085b7 100644 --- a/DrissionPage/base.py +++ b/DrissionPage/base.py @@ -144,7 +144,7 @@ class DrissionElement(BaseElement): return self._get_brother(num, mode, 'prev') def _get_brother(self, num: int = 1, mode: str = 'ele', direction: str = 'next'): - """返回前面第num个兄弟节点或元素 \n + """返回前面第num个兄弟节点或元素 \n :param num: 前面第几个兄弟节点或元素 :param mode: 'ele', 'node' 或 'text',匹配元素、节点、或文本节点 :param direction: 'next' 或 'prev',查找的方向 diff --git a/DrissionPage/driver_element.py b/DrissionPage/driver_element.py index 514df11..d4df95a 100644 --- a/DrissionPage/driver_element.py +++ b/DrissionPage/driver_element.py @@ -93,6 +93,11 @@ class DriverElement(DrissionElement): """返回未格式化处理的元素内文本""" return self.inner_ele.get_attribute('innerText') + @property + def parent(self): + """返回父级元素""" + return self.parents() + def parents(self, num: int = 1): """返回上面第num级父元素 \n :param num: 第几级父元素 @@ -132,7 +137,7 @@ class DriverElement(DrissionElement): """ return self._ele(loc_or_str, timeout=timeout, single=False) - def s_ele(self, loc_or_ele): + def s_ele(self, loc_or_ele=None): """查找第一个符合条件的元素以SessionElement形式返回,处理复杂页面时效率很高 \n :param loc_or_ele: 元素的定位信息,可以是loc元组,或查询字符串 :return: SessionElement对象或属性、文本 @@ -156,16 +161,6 @@ class DriverElement(DrissionElement): :param single: True则返回第一个,False则返回全部 :return: DriverElement对象 """ - loc_or_str = str_to_loc(loc_or_str) if isinstance(loc_or_str, str) else translate_loc(loc_or_str) - loc_str = loc_or_str[1] - - if loc_or_str[0] == 'xpath' and loc_or_str[1].lstrip().startswith('/'): - loc_str = f'.{loc_str}' - - if loc_or_str[0] == 'css selector' and loc_or_str[1].lstrip().startswith('>'): - loc_str = f'{self.css_path}{loc_or_str[1]}' - - loc_or_str = loc_or_str[0], loc_str return make_driver_ele(self, loc_or_str, single, timeout) def _get_ele_path(self, mode) -> str: @@ -292,7 +287,7 @@ class DriverElement(DrissionElement): while perf_counter() - t1 <= timeout: try: self.inner_ele.click() - break + return True except: pass @@ -556,20 +551,6 @@ def make_driver_ele(page_or_ele, :param timeout: 查找元素超时时间 :return: 返回DriverElement元素或它们组成的列表 """ - if isinstance(page_or_ele, BaseElement): - page = page_or_ele.page - driver = page_or_ele.inner_ele - else: # 传入的是DriverPage对象 - page = page_or_ele - driver = page_or_ele.driver - - # 设置等待对象 - if timeout is not None and timeout != page.timeout: - wait = WebDriverWait(driver, timeout=timeout) - else: - page.wait_object._driver = driver - wait = page.wait_object - # ---------------处理定位符--------------- if isinstance(loc, str): loc = str_to_loc(loc) @@ -578,6 +559,29 @@ def make_driver_ele(page_or_ele, else: raise ValueError("定位符必须为str或长度为2的tuple。") + # ---------------设置 page 和 driver--------------- + if isinstance(page_or_ele, BaseElement): # 传入DriverElement 或 ShadowRootElement + loc_str = loc[1] + if loc[0] == 'xpath' and loc[1].lstrip().startswith('/'): + loc_str = f'.{loc_str}' + elif loc[0] == 'css selector' and loc[1].lstrip().startswith('>') and isinstance(page_or_ele, DriverElement): + loc_str = f'{page_or_ele.css_path}{loc[1]}' + loc = loc[0], loc_str + + page = page_or_ele.page + driver = page_or_ele.inner_ele + + else: # 传入的是DriverPage对象 + page = page_or_ele + driver = page_or_ele.driver + + # -----------------设置等待对象----------------- + if timeout is not None and timeout != page.timeout: + wait = WebDriverWait(driver, timeout=timeout) + else: + page.wait_object._driver = driver + wait = page.wait_object + # ---------------执行查找----------------- try: # 使用xpath查找 diff --git a/DrissionPage/session_element.py b/DrissionPage/session_element.py index ba9bf15..d2fb4a1 100644 --- a/DrissionPage/session_element.py +++ b/DrissionPage/session_element.py @@ -102,6 +102,11 @@ class SessionElement(DrissionElement): """返回未格式化处理的元素内文本""" return str(self._inner_ele.text_content()) + @property + def parent(self): + """返回父级元素""" + return self.parents() + def parents(self, num: int = 1): """返回上面第num级父元素 \n :param num: 第几级父元素 @@ -155,7 +160,7 @@ class SessionElement(DrissionElement): """ return self._ele(loc_or_str, single=False) - def s_ele(self, loc_or_str: Union[Tuple[str, str], str]): + def s_ele(self, loc_or_str: Union[Tuple[str, str], str] = None): """返回当前元素下级符合条件的第一个元素、属性或节点文本 \n :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 :return: SessionElement对象或属性、文本 @@ -176,20 +181,7 @@ class SessionElement(DrissionElement): :param single: True则返回第一个,False则返回全部 :return: SessionElement对象 """ - loc_or_str = str_to_loc(loc_or_str) if isinstance(loc_or_str, str) else translate_loc(loc_or_str) - element = self - loc_str = loc_or_str[1] - - if loc_or_str[0] == 'xpath' and loc_or_str[1].lstrip().startswith('/'): - loc_str = f'.{loc_str}' - - # 若css以>开头,表示找元素的直接子元素,要用page以绝对路径才能找到 - if loc_or_str[0] == 'css selector' and loc_or_str[1].lstrip().startswith('>'): - loc_str = f'{self.css_path}{loc_or_str[1]}' - element = self.page - - loc_or_str = loc_or_str[0], loc_str - return make_session_ele(element, loc_or_str, single) + return make_session_ele(self, loc_or_str, single) def _get_ele_path(self, mode) -> str: """获取css路径或xpath路径 @@ -246,26 +238,6 @@ def make_session_ele(html_or_ele: Union[str, BaseElement, BasePage], :param single: True则返回第一个,False则返回全部 :return: 返回SessionElement元素或列表,或属性文本 """ - # 根据传入对象类型获取页面对象和lxml元素对象 - if isinstance(html_or_ele, SessionElement): # SessionElement - page = html_or_ele.page - html_or_ele = html_or_ele.inner_ele - - elif isinstance(html_or_ele, BasePage): # MixPage, DriverPage 或 SessionPage - page = html_or_ele - html_or_ele = fromstring(html_or_ele.html) - - elif isinstance(html_or_ele, str): # 直接传入html文本 - page = None - html_or_ele = fromstring(html_or_ele) - - elif isinstance(html_or_ele, BaseElement): # DrissionElement 或 ShadowRootElement - page = html_or_ele.page - html_or_ele = fromstring(html_or_ele.html) - - else: - raise TypeError('html_or_ele参数只能是元素、页面对象或html文本。') - # ---------------处理定位符--------------- if not loc: loc = ('xpath', '.') @@ -277,6 +249,57 @@ def make_session_ele(html_or_ele: Union[str, BaseElement, BasePage], else: raise ValueError("定位符必须为str或长度为2的tuple。") + # ---------------根据传入对象类型获取页面对象和lxml元素对象--------------- + if isinstance(html_or_ele, SessionElement): # SessionElement + page = html_or_ele.page + + loc_str = loc[1] + if loc[0] == 'xpath' and loc[1].lstrip().startswith('/'): + loc_str = f'.{loc[1]}' + html_or_ele = html_or_ele.inner_ele + + # 若css以>开头,表示找元素的直接子元素,要用page以绝对路径才能找到 + elif loc[0] == 'css selector' and loc[1].lstrip().startswith('>'): + loc_str = f'{html_or_ele.css_path}{loc[1]}' + if html_or_ele.page: + html_or_ele = fromstring(html_or_ele.page.html) + else: # 接收html文本,无page的情况 + html_or_ele = fromstring(html_or_ele('xpath:/ancestor::*').html) + + else: + html_or_ele = html_or_ele.inner_ele + + loc = loc[0], loc_str + + elif isinstance(html_or_ele, DrissionElement): # DriverElement + loc_str = loc[1] + if loc[0] == 'xpath' and loc[1].lstrip().startswith('/'): + loc_str = f'.{loc[1]}' + elif loc[0] == 'css selector' and loc[1].lstrip().startswith('>'): + loc_str = f'{html_or_ele.css_path}{loc[1]}' + loc = loc[0], loc_str + + # 获取整个页面html再定位到当前元素,以实现查找上级元素 + page = html_or_ele.page + xpath = html_or_ele.xpath + html_or_ele = fromstring(html_or_ele.page.html) + html_or_ele = html_or_ele.xpath(xpath)[0] + + elif isinstance(html_or_ele, BasePage): # MixPage, DriverPage 或 SessionPage + page = html_or_ele + html_or_ele = fromstring(html_or_ele.html) + + elif isinstance(html_or_ele, str): # 直接传入html文本 + page = None + html_or_ele = fromstring(html_or_ele) + + elif isinstance(html_or_ele, BaseElement): # ShadowRootElement + page = html_or_ele.page + html_or_ele = fromstring(html_or_ele.html) + + else: + raise TypeError('html_or_ele参数只能是元素、页面对象或html文本。') + # ---------------执行查找----------------- try: if loc[0] == 'xpath': # 用lxml内置方法获取lxml的元素对象列表 diff --git a/DrissionPage/shadow_root_element.py b/DrissionPage/shadow_root_element.py index a507810..17b649f 100644 --- a/DrissionPage/shadow_root_element.py +++ b/DrissionPage/shadow_root_element.py @@ -85,7 +85,7 @@ class ShadowRootElement(BaseElement): """ return self._ele(loc_or_str, timeout=timeout, single=False) - def s_ele(self, loc_or_ele): + def s_ele(self, loc_or_ele=None): """查找第一个符合条件的元素以SessionElement形式返回,处理复杂页面时效率很高 \n :param loc_or_ele: 元素的定位信息,可以是loc元组,或查询字符串 :return: SessionElement对象或属性、文本