优化查找元素时的代码结构;s_ele()允许参数为None,以生成元素自己的SessionElement版本

This commit is contained in:
g1879 2021-11-28 00:20:13 +08:00
parent 12c03e24c8
commit 74b7185563
4 changed files with 90 additions and 63 deletions

View File

@ -144,7 +144,7 @@ class DrissionElement(BaseElement):
return self._get_brother(num, mode, 'prev')
def _get_brother(self, num: int = 1, mode: str = 'ele', direction: str = 'next'):
"""返回前面第num个兄弟节点或元素 \n
"""返回前面第num个兄弟节点或元素 \n
:param num: 前面第几个兄弟节点或元素
:param mode: 'ele', 'node' 'text'匹配元素节点或文本节点
:param direction: 'next' 'prev'查找的方向

View File

@ -93,6 +93,11 @@ class DriverElement(DrissionElement):
"""返回未格式化处理的元素内文本"""
return self.inner_ele.get_attribute('innerText')
@property
def parent(self):
"""返回父级元素"""
return self.parents()
def parents(self, num: int = 1):
"""返回上面第num级父元素 \n
:param num: 第几级父元素
@ -132,7 +137,7 @@ class DriverElement(DrissionElement):
"""
return self._ele(loc_or_str, timeout=timeout, single=False)
def s_ele(self, loc_or_ele):
def s_ele(self, loc_or_ele=None):
"""查找第一个符合条件的元素以SessionElement形式返回处理复杂页面时效率很高 \n
:param loc_or_ele: 元素的定位信息可以是loc元组或查询字符串
:return: SessionElement对象或属性文本
@ -156,16 +161,6 @@ class DriverElement(DrissionElement):
:param single: True则返回第一个False则返回全部
:return: DriverElement对象
"""
loc_or_str = str_to_loc(loc_or_str) if isinstance(loc_or_str, str) else translate_loc(loc_or_str)
loc_str = loc_or_str[1]
if loc_or_str[0] == 'xpath' and loc_or_str[1].lstrip().startswith('/'):
loc_str = f'.{loc_str}'
if loc_or_str[0] == 'css selector' and loc_or_str[1].lstrip().startswith('>'):
loc_str = f'{self.css_path}{loc_or_str[1]}'
loc_or_str = loc_or_str[0], loc_str
return make_driver_ele(self, loc_or_str, single, timeout)
def _get_ele_path(self, mode) -> str:
@ -292,7 +287,7 @@ class DriverElement(DrissionElement):
while perf_counter() - t1 <= timeout:
try:
self.inner_ele.click()
break
return True
except:
pass
@ -556,20 +551,6 @@ def make_driver_ele(page_or_ele,
:param timeout: 查找元素超时时间
:return: 返回DriverElement元素或它们组成的列表
"""
if isinstance(page_or_ele, BaseElement):
page = page_or_ele.page
driver = page_or_ele.inner_ele
else: # 传入的是DriverPage对象
page = page_or_ele
driver = page_or_ele.driver
# 设置等待对象
if timeout is not None and timeout != page.timeout:
wait = WebDriverWait(driver, timeout=timeout)
else:
page.wait_object._driver = driver
wait = page.wait_object
# ---------------处理定位符---------------
if isinstance(loc, str):
loc = str_to_loc(loc)
@ -578,6 +559,29 @@ def make_driver_ele(page_or_ele,
else:
raise ValueError("定位符必须为str或长度为2的tuple。")
# ---------------设置 page 和 driver---------------
if isinstance(page_or_ele, BaseElement): # 传入DriverElement 或 ShadowRootElement
loc_str = loc[1]
if loc[0] == 'xpath' and loc[1].lstrip().startswith('/'):
loc_str = f'.{loc_str}'
elif loc[0] == 'css selector' and loc[1].lstrip().startswith('>') and isinstance(page_or_ele, DriverElement):
loc_str = f'{page_or_ele.css_path}{loc[1]}'
loc = loc[0], loc_str
page = page_or_ele.page
driver = page_or_ele.inner_ele
else: # 传入的是DriverPage对象
page = page_or_ele
driver = page_or_ele.driver
# -----------------设置等待对象-----------------
if timeout is not None and timeout != page.timeout:
wait = WebDriverWait(driver, timeout=timeout)
else:
page.wait_object._driver = driver
wait = page.wait_object
# ---------------执行查找-----------------
try:
# 使用xpath查找

View File

@ -102,6 +102,11 @@ class SessionElement(DrissionElement):
"""返回未格式化处理的元素内文本"""
return str(self._inner_ele.text_content())
@property
def parent(self):
"""返回父级元素"""
return self.parents()
def parents(self, num: int = 1):
"""返回上面第num级父元素 \n
:param num: 第几级父元素
@ -155,7 +160,7 @@ class SessionElement(DrissionElement):
"""
return self._ele(loc_or_str, single=False)
def s_ele(self, loc_or_str: Union[Tuple[str, str], str]):
def s_ele(self, loc_or_str: Union[Tuple[str, str], str] = None):
"""返回当前元素下级符合条件的第一个元素、属性或节点文本 \n
:param loc_or_str: 元素的定位信息可以是loc元组或查询字符串
:return: SessionElement对象或属性文本
@ -176,20 +181,7 @@ class SessionElement(DrissionElement):
:param single: True则返回第一个False则返回全部
:return: SessionElement对象
"""
loc_or_str = str_to_loc(loc_or_str) if isinstance(loc_or_str, str) else translate_loc(loc_or_str)
element = self
loc_str = loc_or_str[1]
if loc_or_str[0] == 'xpath' and loc_or_str[1].lstrip().startswith('/'):
loc_str = f'.{loc_str}'
# 若css以>开头表示找元素的直接子元素要用page以绝对路径才能找到
if loc_or_str[0] == 'css selector' and loc_or_str[1].lstrip().startswith('>'):
loc_str = f'{self.css_path}{loc_or_str[1]}'
element = self.page
loc_or_str = loc_or_str[0], loc_str
return make_session_ele(element, loc_or_str, single)
return make_session_ele(self, loc_or_str, single)
def _get_ele_path(self, mode) -> str:
"""获取css路径或xpath路径
@ -246,26 +238,6 @@ def make_session_ele(html_or_ele: Union[str, BaseElement, BasePage],
:param single: True则返回第一个False则返回全部
:return: 返回SessionElement元素或列表或属性文本
"""
# 根据传入对象类型获取页面对象和lxml元素对象
if isinstance(html_or_ele, SessionElement): # SessionElement
page = html_or_ele.page
html_or_ele = html_or_ele.inner_ele
elif isinstance(html_or_ele, BasePage): # MixPage, DriverPage 或 SessionPage
page = html_or_ele
html_or_ele = fromstring(html_or_ele.html)
elif isinstance(html_or_ele, str): # 直接传入html文本
page = None
html_or_ele = fromstring(html_or_ele)
elif isinstance(html_or_ele, BaseElement): # DrissionElement 或 ShadowRootElement
page = html_or_ele.page
html_or_ele = fromstring(html_or_ele.html)
else:
raise TypeError('html_or_ele参数只能是元素、页面对象或html文本。')
# ---------------处理定位符---------------
if not loc:
loc = ('xpath', '.')
@ -277,6 +249,57 @@ def make_session_ele(html_or_ele: Union[str, BaseElement, BasePage],
else:
raise ValueError("定位符必须为str或长度为2的tuple。")
# ---------------根据传入对象类型获取页面对象和lxml元素对象---------------
if isinstance(html_or_ele, SessionElement): # SessionElement
page = html_or_ele.page
loc_str = loc[1]
if loc[0] == 'xpath' and loc[1].lstrip().startswith('/'):
loc_str = f'.{loc[1]}'
html_or_ele = html_or_ele.inner_ele
# 若css以>开头表示找元素的直接子元素要用page以绝对路径才能找到
elif loc[0] == 'css selector' and loc[1].lstrip().startswith('>'):
loc_str = f'{html_or_ele.css_path}{loc[1]}'
if html_or_ele.page:
html_or_ele = fromstring(html_or_ele.page.html)
else: # 接收html文本无page的情况
html_or_ele = fromstring(html_or_ele('xpath:/ancestor::*').html)
else:
html_or_ele = html_or_ele.inner_ele
loc = loc[0], loc_str
elif isinstance(html_or_ele, DrissionElement): # DriverElement
loc_str = loc[1]
if loc[0] == 'xpath' and loc[1].lstrip().startswith('/'):
loc_str = f'.{loc[1]}'
elif loc[0] == 'css selector' and loc[1].lstrip().startswith('>'):
loc_str = f'{html_or_ele.css_path}{loc[1]}'
loc = loc[0], loc_str
# 获取整个页面html再定位到当前元素以实现查找上级元素
page = html_or_ele.page
xpath = html_or_ele.xpath
html_or_ele = fromstring(html_or_ele.page.html)
html_or_ele = html_or_ele.xpath(xpath)[0]
elif isinstance(html_or_ele, BasePage): # MixPage, DriverPage 或 SessionPage
page = html_or_ele
html_or_ele = fromstring(html_or_ele.html)
elif isinstance(html_or_ele, str): # 直接传入html文本
page = None
html_or_ele = fromstring(html_or_ele)
elif isinstance(html_or_ele, BaseElement): # ShadowRootElement
page = html_or_ele.page
html_or_ele = fromstring(html_or_ele.html)
else:
raise TypeError('html_or_ele参数只能是元素、页面对象或html文本。')
# ---------------执行查找-----------------
try:
if loc[0] == 'xpath': # 用lxml内置方法获取lxml的元素对象列表

View File

@ -85,7 +85,7 @@ class ShadowRootElement(BaseElement):
"""
return self._ele(loc_or_str, timeout=timeout, single=False)
def s_ele(self, loc_or_ele):
def s_ele(self, loc_or_ele=None):
"""查找第一个符合条件的元素以SessionElement形式返回处理复杂页面时效率很高 \n
:param loc_or_ele: 元素的定位信息可以是loc元组或查询字符串
:return: SessionElement对象或属性文本