修改next, prev, parent等逻辑,未完成

This commit is contained in:
g1879 2021-12-20 00:01:07 +08:00
parent 4938c79814
commit 81f678c6e3
3 changed files with 86 additions and 58 deletions

View File

@ -11,7 +11,7 @@ from typing import Union, Tuple
from lxml.html import HtmlElement from lxml.html import HtmlElement
from selenium.webdriver.remote.webelement import WebElement from selenium.webdriver.remote.webelement import WebElement
from .common import format_html from .common import format_html, translate_loc, str_to_loc
class BaseParser(object): class BaseParser(object):
@ -55,12 +55,13 @@ class BaseElement(BaseParser):
def inner_ele(self) -> Union[WebElement, HtmlElement]: def inner_ele(self) -> Union[WebElement, HtmlElement]:
return self._inner_ele return self._inner_ele
def next(self, index: int = 1): def next(self, index: int = 1, filter_loc: Union[tuple, str] = ''):
"""返回后面的一个兄弟元素,可指定第几个 \n """返回后面的一个兄弟元素,可用查询语法筛选,可指定返回筛选结果的第几个 \n
:param index: 后面第几个兄弟元素 :param index: 后面第几个兄弟元素
:param filter_loc: 用于筛选元素的查询语法
:return: 兄弟元素 :return: 兄弟元素
""" """
nexts = self.nexts(total=1, begin=index) nexts = self.nexts(total=1, begin=index, filter_loc=filter_loc)
return nexts[0] if nexts else None return nexts[0] if nexts else None
# ----------------以下属性或方法由后代实现---------------- # ----------------以下属性或方法由后代实现----------------
@ -68,18 +69,21 @@ class BaseElement(BaseParser):
def tag(self): def tag(self):
return return
def parent(self, level: int = 1): def parent(self, level_or_loc: Union[tuple, str, int] = 1):
pass pass
def prev(self, index: int = 1): def prev(self, index: int = 1, filter_loc: Union[tuple, str] = ''):
return return None # ShadowRootElement直接继承
def prevs(self, total: int = None, begin: int = 1, filter_loc: Union[tuple, str] = ''):
return None # ShadowRootElement直接继承
@property @property
def is_valid(self): def is_valid(self):
return True return True
@abstractmethod @abstractmethod
def nexts(self, total: int = None, begin: int = 1): def nexts(self, total: int = None, begin: int = 1, filter_loc: Union[tuple, str] = ''):
pass pass
@ -87,16 +91,17 @@ class DrissionElement(BaseElement):
"""DriverElement 和 SessionElement的基类但不是ShadowRootElement的基类""" """DriverElement 和 SessionElement的基类但不是ShadowRootElement的基类"""
@abstractmethod @abstractmethod
def parent(self, level: int = 1): def parent(self, level_or_loc: Union[tuple, str, int] = 1):
"""返回父级元素""" """返回父级元素"""
pass pass
def prev(self, index: int = 1): def prev(self, index: int = 1, filter_loc: Union[tuple, str] = ''):
"""返回前面的一个兄弟元素,可指定第几个 \n """返回前面的一个兄弟元素,可用查询语法筛选,可指定返回筛选结果的第几个 \n
:param index: 前面第几个 :param index: 前面第几个
:param filter_loc: 用于筛选元素的查询语法
:return: 兄弟元素 :return: 兄弟元素
""" """
prevs = self.prevs(total=1, begin=index) prevs = self.prevs(total=1, begin=index, filter_loc=filter_loc)
return prevs[0] if prevs else None return prevs[0] if prevs else None
@property @property
@ -131,55 +136,72 @@ class DrissionElement(BaseElement):
return [format_html(x.strip(' ').rstrip('\n')) for x in texts if x and sub('[\r\n\t ]', '', x) != ''] return [format_html(x.strip(' ').rstrip('\n')) for x in texts if x and sub('[\r\n\t ]', '', x) != '']
def nexts(self, total: int = None, begin: int = 1, mode: str = 'ele'): def nexts(self, total: int = None, begin: int = 1, filter_loc: Union[tuple, str] = ''):
"""返回后面若干个兄弟元素或节点组成的列表total为None返回所有 \n """返回后面若干个兄弟元素或节点组成的列表 \n
可用查询语法筛选可指定返回筛选结果的哪几个total为None返回所有 \n
:param total: 获取多少个元素或节点 :param total: 获取多少个元素或节点
:param begin: 从第几个开始获取从1起 :param begin: 从第几个开始获取从1起
:param mode: 'ele', 'node' 'text'匹配元素节点或文本节点 :param filter_loc: 用于筛选元素的查询语法
:return: SessionElement对象 :return: SessionElement对象
""" """
return self._get_brothers(begin=begin, total=total, mode=mode, direction='next') return self._get_brothers(begin=begin, total=total, filter_loc=filter_loc, direction='following')
def prevs(self, total: int = None, begin: int = 1, mode: str = 'ele'): def prevs(self, total: int = None, begin: int = 1, filter_loc: Union[tuple, str] = ''):
"""返回前面若干个兄弟元素或节点组成的列表total为None返回所有 \n """返回前面若干个兄弟元素或节点组成的列表 \n
可用查询语法筛选可指定返回筛选结果的哪几个total为None返回所有 \n
:param total: 获取多少个元素或节点 :param total: 获取多少个元素或节点
:param begin: 从第几个开始获取从1起 :param begin: 从第几个开始获取从1起
:param mode: 'ele', 'node' 'text'匹配元素节点或文本节点 :param filter_loc: 用于筛选元素的查询语法
:return: SessionElement对象 :return: SessionElement对象
""" """
return self._get_brothers(begin=begin, total=total, mode=mode, direction='prev') return self._get_brothers(begin=begin, total=total, filter_loc=filter_loc, direction='preceding')
def _get_brothers(self, begin: int = 1, total: int = None, mode: str = 'ele', direction: str = 'next'): def _get_brothers(self,
"""按要求返回兄弟元素或节点组成的列表 \n begin: int = 1,
total: int = None,
filter_loc: Union[tuple, str] = '',
direction: str = 'following'):
"""按要求返回兄弟元素或节点组成的列表 \n
:param begin: 从第几个兄弟节点或元素开始 :param begin: 从第几个兄弟节点或元素开始
:param total: 获取多少个 :param total: 获取多少个
:param mode: 'ele', 'node' 'text'匹配元素节点或文本节点 :param filter_loc: 用于筛选元素的查询语法
:param direction: 'next' 'prev'查找的方向 :param direction: 'following' 'preceding'查找的方向
:return: DriverElement对象或字符串 :return: DriverElement对象或字符串
""" """
# 查找节点的类型 timeout = 0 if direction == 'preceding' else .5
node_txt = {'ele': '*', 'node': 'node()', 'text': 'text()'}.get(mode)
if not node_txt:
raise ValueError(f"mode参数只能是'node''ele''text',现在是:'{mode}'")
# 查找节点的方向 if isinstance(filter_loc, tuple):
direction_txt = {'next': 'following', 'prev': 'preceding'}.get(direction) node_txt = translate_loc(filter_loc)
if not direction_txt:
raise ValueError(f"direction参数只能是'next''prev',现在是:'{direction}'")
timeout = 0 if direction == 'prev' else .5 elif isinstance(filter_loc, str):
node_txt = str_to_loc(filter_loc)
else:
raise TypeError('filter_loc参数只能是tuple或str。')
if node_txt[0] == 'css selector':
raise ValueError('此处暂不支持css selector选择器。')
node_txt = node_txt[1].lstrip('./')
# 获取所有节点 # 获取所有节点
nodes = self._ele(f'xpath:./{direction_txt}-sibling::{node_txt}', timeout=timeout, single=False) t = f'xpath:./{direction}-sibling::{node_txt}'
nodes = self._ele(t, timeout=timeout, single=False)
nodes = [e for e in nodes if not (isinstance(e, str) and sub('[ \n\t\r]', '', e) == '')]
if direction == 'next': len_nodes = len(nodes)
end = None if not total or total >= len(nodes) else begin + total - 1 if direction == 'following':
end = None if not total else begin - 1 + total
begin -= 1 begin -= 1
else:
begin = None if not total or total >= len(nodes) else begin - total - 1
end = None
return [e for e in nodes[begin:end] if not (isinstance(e, str) and sub('[ \n\t\r]', '', e) == '')] else:
tmp = len_nodes - begin
begin = tmp - total + 1
end = tmp + 1
if begin < 0:
begin = None
return nodes[begin:end]
# ----------------以下属性或方法由后代实现---------------- # ----------------以下属性或方法由后代实现----------------
@property @property

View File

@ -91,13 +91,15 @@ class DriverElement(DrissionElement):
"""返回未格式化处理的元素内文本""" """返回未格式化处理的元素内文本"""
return self.inner_ele.get_attribute('innerText') return self.inner_ele.get_attribute('innerText')
def parent(self, level: int = 1) -> 'DriverElement': def parent(self, level_or_loc: Union[tuple, str, int] = 1) -> 'DriverElement':
"""返回上面第level级父元素 \n """返回上面某一级父元素,可指定层数或用查询语法定位 \n
:param level: 第几级父元素 :param level_or_loc: 第几级父元素或定位符
:return: DriverElement对象 :return: DriverElement对象
""" """
loc = 'xpath', f'.{"/.." * level}' if isinstance(level_or_loc, int):
loc = 'xpath', f'.{"/.." * level_or_loc}'
return self.ele(loc, timeout=0) return self.ele(loc, timeout=0)
# TODO: 完善用定位符时的逻辑
def attr(self, attr: str) -> str: def attr(self, attr: str) -> str:
"""获取attribute属性值 \n """获取attribute属性值 \n
@ -247,7 +249,7 @@ class DriverElement(DrissionElement):
return self._select return self._select
def left(self, filter_loc: Union[tuple, str] = '', index: int = 1) -> 'DriverElement': def left(self, index: int = 1, filter_loc: Union[tuple, str] = '') -> 'DriverElement':
"""获取网页上显示在当前元素左边的某个元素,可设置选取条件 \n """获取网页上显示在当前元素左边的某个元素,可设置选取条件 \n
:param filter_loc: 筛选条件可用selenium的(By, str)也可用本库定位语法 :param filter_loc: 筛选条件可用selenium的(By, str)也可用本库定位语法
:param index: 获取第几个 :param index: 获取第几个
@ -255,7 +257,7 @@ class DriverElement(DrissionElement):
""" """
return self._get_relative_eles('left', filter_loc, 1, index)[0] return self._get_relative_eles('left', filter_loc, 1, index)[0]
def right(self, filter_loc: Union[tuple, str] = '', index: int = 1) -> 'DriverElement': def right(self, index: int = 1, filter_loc: Union[tuple, str] = '') -> 'DriverElement':
"""获取网页上显示在当前元素右边的某个元素,可设置选取条件 \n """获取网页上显示在当前元素右边的某个元素,可设置选取条件 \n
:param filter_loc: 筛选条件可用selenium的(By, str)也可用本库定位语法 :param filter_loc: 筛选条件可用selenium的(By, str)也可用本库定位语法
:param index: 获取第几个 :param index: 获取第几个
@ -263,7 +265,7 @@ class DriverElement(DrissionElement):
""" """
return self._get_relative_eles('right', filter_loc, 1, index)[0] return self._get_relative_eles('right', filter_loc, 1, index)[0]
def above(self, filter_loc: Union[tuple, str] = '', index: int = 1) -> 'DriverElement': def above(self, index: int = 1, filter_loc: Union[tuple, str] = '') -> 'DriverElement':
"""获取网页上显示在当前元素上边的某个元素,可设置选取条件 \n """获取网页上显示在当前元素上边的某个元素,可设置选取条件 \n
:param filter_loc: 筛选条件可用selenium的(By, str)也可用本库定位语法 :param filter_loc: 筛选条件可用selenium的(By, str)也可用本库定位语法
:param index: 获取第几个 :param index: 获取第几个
@ -271,7 +273,7 @@ class DriverElement(DrissionElement):
""" """
return self._get_relative_eles('left', filter_loc, 1, index)[0] return self._get_relative_eles('left', filter_loc, 1, index)[0]
def below(self, filter_loc: Union[tuple, str] = '', index: int = 1) -> 'DriverElement': def below(self, index: int = 1, filter_loc: Union[tuple, str] = '') -> 'DriverElement':
"""获取网页上显示在当前元素下边的某个元素,可设置选取条件 \n """获取网页上显示在当前元素下边的某个元素,可设置选取条件 \n
:param filter_loc: 筛选条件可用selenium的(By, str)也可用本库定位语法 :param filter_loc: 筛选条件可用selenium的(By, str)也可用本库定位语法
:param index: 获取第几个 :param index: 获取第几个
@ -279,7 +281,7 @@ class DriverElement(DrissionElement):
""" """
return self._get_relative_eles('left', filter_loc, 1, index)[0] return self._get_relative_eles('left', filter_loc, 1, index)[0]
def near(self, filter_loc: Union[tuple, str] = '', index: int = 1) -> 'DriverElement': def near(self, index: int = 1, filter_loc: Union[tuple, str] = '') -> 'DriverElement':
"""获取网页上显示在当前元素最近的某个元素,可设置选取条件 \n """获取网页上显示在当前元素最近的某个元素,可设置选取条件 \n
:param filter_loc: 筛选条件可用selenium的(By, str)也可用本库定位语法 :param filter_loc: 筛选条件可用selenium的(By, str)也可用本库定位语法
:param index: 获取第几个 :param index: 获取第几个
@ -287,7 +289,7 @@ class DriverElement(DrissionElement):
""" """
return self._get_relative_eles('near', filter_loc, 1, index)[0] return self._get_relative_eles('near', filter_loc, 1, index)[0]
def lefts(self, filter_loc: Union[tuple, str] = '', total: int = None, begin: int = 1) -> List['DriverElement']: def lefts(self, total: int = None, begin: int = 1, filter_loc: Union[tuple, str] = '') -> List['DriverElement']:
"""获取网页上显示在当前元素左边的某个元素,可设置选取条件 \n """获取网页上显示在当前元素左边的某个元素,可设置选取条件 \n
:param filter_loc: 筛选条件可用selenium的(By, str)也可用本库定位语法 :param filter_loc: 筛选条件可用selenium的(By, str)也可用本库定位语法
:param total: 获取多少个 :param total: 获取多少个
@ -296,7 +298,7 @@ class DriverElement(DrissionElement):
""" """
return self._get_relative_eles('left', filter_loc, total, begin) return self._get_relative_eles('left', filter_loc, total, begin)
def rights(self, filter_loc: Union[tuple, str] = '', total: int = None, begin: int = 1) -> List['DriverElement']: def rights(self, total: int = None, begin: int = 1, filter_loc: Union[tuple, str] = '') -> List['DriverElement']:
"""获取网页上显示在当前元素右边的某个元素,可设置选取条件 \n """获取网页上显示在当前元素右边的某个元素,可设置选取条件 \n
:param filter_loc: 筛选条件可用selenium的(By, str)也可用本库定位语法 :param filter_loc: 筛选条件可用selenium的(By, str)也可用本库定位语法
:param total: 获取多少个 :param total: 获取多少个
@ -305,7 +307,7 @@ class DriverElement(DrissionElement):
""" """
return self._get_relative_eles('right', filter_loc, total, begin) return self._get_relative_eles('right', filter_loc, total, begin)
def aboves(self, filter_loc: Union[tuple, str] = '', total: int = None, begin: int = 1) -> List['DriverElement']: def aboves(self, total: int = None, begin: int = 1, filter_loc: Union[tuple, str] = '') -> List['DriverElement']:
"""获取网页上显示在当前元素上边的某个元素,可设置选取条件 \n """获取网页上显示在当前元素上边的某个元素,可设置选取条件 \n
:param filter_loc: 筛选条件可用selenium的(By, str)也可用本库定位语法 :param filter_loc: 筛选条件可用selenium的(By, str)也可用本库定位语法
:param total: 获取多少个 :param total: 获取多少个
@ -314,7 +316,7 @@ class DriverElement(DrissionElement):
""" """
return self._get_relative_eles('left', filter_loc, total, begin) return self._get_relative_eles('left', filter_loc, total, begin)
def belows(self, filter_loc: Union[tuple, str] = '', total: int = None, begin: int = 1) -> List['DriverElement']: def belows(self, total: int = None, begin: int = 1, filter_loc: Union[tuple, str] = '') -> List['DriverElement']:
"""获取网页上显示在当前元素下边的某个元素,可设置选取条件 \n """获取网页上显示在当前元素下边的某个元素,可设置选取条件 \n
:param filter_loc: 筛选条件可用selenium的(By, str)也可用本库定位语法 :param filter_loc: 筛选条件可用selenium的(By, str)也可用本库定位语法
:param total: 获取多少个 :param total: 获取多少个
@ -323,7 +325,7 @@ class DriverElement(DrissionElement):
""" """
return self._get_relative_eles('left', filter_loc, total, begin) return self._get_relative_eles('left', filter_loc, total, begin)
def nears(self, filter_loc: Union[tuple, str] = '', total: int = None, begin: int = 1) -> List['DriverElement']: def nears(self, total: int = None, begin: int = 1, filter_loc: Union[tuple, str] = '') -> List['DriverElement']:
"""获取网页上显示在当前元素最近的某个元素,可设置选取条件 \n """获取网页上显示在当前元素最近的某个元素,可设置选取条件 \n
:param filter_loc: 筛选条件可用selenium的(By, str)也可用本库定位语法 :param filter_loc: 筛选条件可用selenium的(By, str)也可用本库定位语法
:param begin: 从第几个开始返回 :param begin: 从第几个开始返回
@ -675,6 +677,8 @@ class DriverElement(DrissionElement):
if not isinstance(begin, int) or begin < 1 or not isinstance(total, int) or total < 1: if not isinstance(begin, int) or begin < 1 or not isinstance(total, int) or total < 1:
raise ValueError('begin和total参数只能是大于0的整数。') raise ValueError('begin和total参数只能是大于0的整数。')
# TODO: 支持selenium原生相对定位符
try: try:
from selenium.webdriver.support.relative_locator import RelativeBy from selenium.webdriver.support.relative_locator import RelativeBy
except ImportError: except ImportError:

View File

@ -70,12 +70,14 @@ class SessionElement(DrissionElement):
"""返回未格式化处理的元素内文本""" """返回未格式化处理的元素内文本"""
return str(self._inner_ele.text_content()) return str(self._inner_ele.text_content())
def parent(self, level: int = 1) -> 'SessionElement': def parent(self, level_or_loc: Union[tuple, str, int] = 1) -> 'SessionElement':
"""返回上面第level级父元素 \n """返回上面某一级父元素,可指定层数或用查询语法定位 \n
:param level: 第几级父元素 :param level_or_loc: 第几级父元素或定位符
:return: SessionElement对象 :return: SessionElement对象
""" """
return self.ele(f'xpath:..{"/.." * (level - 1)}') if isinstance(level_or_loc, int):
return self.ele(f'xpath:..{"/.." * (level_or_loc - 1)}')
# TODO: 完善用定位符时的逻辑
def attr(self, attr: str) -> Union[str, None]: def attr(self, attr: str) -> Union[str, None]:
"""返回attribute属性值 \n """返回attribute属性值 \n