修改next, prev, parent等逻辑,未完成

This commit is contained in:
g1879 2021-12-20 00:01:07 +08:00
parent 4938c79814
commit 81f678c6e3
3 changed files with 86 additions and 58 deletions

View File

@ -11,7 +11,7 @@ from typing import Union, Tuple
from lxml.html import HtmlElement
from selenium.webdriver.remote.webelement import WebElement
from .common import format_html
from .common import format_html, translate_loc, str_to_loc
class BaseParser(object):
@ -55,12 +55,13 @@ class BaseElement(BaseParser):
def inner_ele(self) -> Union[WebElement, HtmlElement]:
return self._inner_ele
def next(self, index: int = 1):
"""返回后面的一个兄弟元素,可指定第几个 \n
def next(self, index: int = 1, filter_loc: Union[tuple, str] = ''):
"""返回后面的一个兄弟元素,可用查询语法筛选,可指定返回筛选结果的第几个 \n
:param index: 后面第几个兄弟元素
:param filter_loc: 用于筛选元素的查询语法
:return: 兄弟元素
"""
nexts = self.nexts(total=1, begin=index)
nexts = self.nexts(total=1, begin=index, filter_loc=filter_loc)
return nexts[0] if nexts else None
# ----------------以下属性或方法由后代实现----------------
@ -68,18 +69,21 @@ class BaseElement(BaseParser):
def tag(self):
return
def parent(self, level: int = 1):
def parent(self, level_or_loc: Union[tuple, str, int] = 1):
pass
def prev(self, index: int = 1):
return
def prev(self, index: int = 1, filter_loc: Union[tuple, str] = ''):
return None # ShadowRootElement直接继承
def prevs(self, total: int = None, begin: int = 1, filter_loc: Union[tuple, str] = ''):
return None # ShadowRootElement直接继承
@property
def is_valid(self):
return True
@abstractmethod
def nexts(self, total: int = None, begin: int = 1):
def nexts(self, total: int = None, begin: int = 1, filter_loc: Union[tuple, str] = ''):
pass
@ -87,16 +91,17 @@ class DrissionElement(BaseElement):
"""DriverElement 和 SessionElement的基类但不是ShadowRootElement的基类"""
@abstractmethod
def parent(self, level: int = 1):
def parent(self, level_or_loc: Union[tuple, str, int] = 1):
"""返回父级元素"""
pass
def prev(self, index: int = 1):
"""返回前面的一个兄弟元素,可指定第几个 \n
def prev(self, index: int = 1, filter_loc: Union[tuple, str] = ''):
"""返回前面的一个兄弟元素,可用查询语法筛选,可指定返回筛选结果的第几个 \n
:param index: 前面第几个
:param filter_loc: 用于筛选元素的查询语法
:return: 兄弟元素
"""
prevs = self.prevs(total=1, begin=index)
prevs = self.prevs(total=1, begin=index, filter_loc=filter_loc)
return prevs[0] if prevs else None
@property
@ -131,55 +136,72 @@ class DrissionElement(BaseElement):
return [format_html(x.strip(' ').rstrip('\n')) for x in texts if x and sub('[\r\n\t ]', '', x) != '']
def nexts(self, total: int = None, begin: int = 1, mode: str = 'ele'):
"""返回后面若干个兄弟元素或节点组成的列表total为None返回所有 \n
def nexts(self, total: int = None, begin: int = 1, filter_loc: Union[tuple, str] = ''):
"""返回后面若干个兄弟元素或节点组成的列表 \n
可用查询语法筛选可指定返回筛选结果的哪几个total为None返回所有 \n
:param total: 获取多少个元素或节点
:param begin: 从第几个开始获取从1起
:param mode: 'ele', 'node' 'text'匹配元素节点或文本节点
:param filter_loc: 用于筛选元素的查询语法
:return: SessionElement对象
"""
return self._get_brothers(begin=begin, total=total, mode=mode, direction='next')
return self._get_brothers(begin=begin, total=total, filter_loc=filter_loc, direction='following')
def prevs(self, total: int = None, begin: int = 1, mode: str = 'ele'):
"""返回前面若干个兄弟元素或节点组成的列表total为None返回所有 \n
def prevs(self, total: int = None, begin: int = 1, filter_loc: Union[tuple, str] = ''):
"""返回前面若干个兄弟元素或节点组成的列表 \n
可用查询语法筛选可指定返回筛选结果的哪几个total为None返回所有 \n
:param total: 获取多少个元素或节点
:param begin: 从第几个开始获取从1起
:param mode: 'ele', 'node' 'text'匹配元素节点或文本节点
:param filter_loc: 用于筛选元素的查询语法
:return: SessionElement对象
"""
return self._get_brothers(begin=begin, total=total, mode=mode, direction='prev')
return self._get_brothers(begin=begin, total=total, filter_loc=filter_loc, direction='preceding')
def _get_brothers(self, begin: int = 1, total: int = None, mode: str = 'ele', direction: str = 'next'):
"""按要求返回兄弟元素或节点组成的列表 \n
def _get_brothers(self,
begin: int = 1,
total: int = None,
filter_loc: Union[tuple, str] = '',
direction: str = 'following'):
"""按要求返回兄弟元素或节点组成的列表 \n
:param begin: 从第几个兄弟节点或元素开始
:param total: 获取多少个
:param mode: 'ele', 'node' 'text'匹配元素节点或文本节点
:param direction: 'next' 'prev'查找的方向
:param filter_loc: 用于筛选元素的查询语法
:param direction: 'following' 'preceding'查找的方向
:return: DriverElement对象或字符串
"""
# 查找节点的类型
node_txt = {'ele': '*', 'node': 'node()', 'text': 'text()'}.get(mode)
if not node_txt:
raise ValueError(f"mode参数只能是'node''ele''text',现在是:'{mode}'")
timeout = 0 if direction == 'preceding' else .5
# 查找节点的方向
direction_txt = {'next': 'following', 'prev': 'preceding'}.get(direction)
if not direction_txt:
raise ValueError(f"direction参数只能是'next''prev',现在是:'{direction}'")
if isinstance(filter_loc, tuple):
node_txt = translate_loc(filter_loc)
timeout = 0 if direction == 'prev' else .5
elif isinstance(filter_loc, str):
node_txt = str_to_loc(filter_loc)
else:
raise TypeError('filter_loc参数只能是tuple或str。')
if node_txt[0] == 'css selector':
raise ValueError('此处暂不支持css selector选择器。')
node_txt = node_txt[1].lstrip('./')
# 获取所有节点
nodes = self._ele(f'xpath:./{direction_txt}-sibling::{node_txt}', timeout=timeout, single=False)
t = f'xpath:./{direction}-sibling::{node_txt}'
nodes = self._ele(t, timeout=timeout, single=False)
nodes = [e for e in nodes if not (isinstance(e, str) and sub('[ \n\t\r]', '', e) == '')]
if direction == 'next':
end = None if not total or total >= len(nodes) else begin + total - 1
len_nodes = len(nodes)
if direction == 'following':
end = None if not total else begin - 1 + total
begin -= 1
else:
begin = None if not total or total >= len(nodes) else begin - total - 1
end = None
return [e for e in nodes[begin:end] if not (isinstance(e, str) and sub('[ \n\t\r]', '', e) == '')]
else:
tmp = len_nodes - begin
begin = tmp - total + 1
end = tmp + 1
if begin < 0:
begin = None
return nodes[begin:end]
# ----------------以下属性或方法由后代实现----------------
@property

View File

@ -91,13 +91,15 @@ class DriverElement(DrissionElement):
"""返回未格式化处理的元素内文本"""
return self.inner_ele.get_attribute('innerText')
def parent(self, level: int = 1) -> 'DriverElement':
"""返回上面第level级父元素 \n
:param level: 第几级父元素
def parent(self, level_or_loc: Union[tuple, str, int] = 1) -> 'DriverElement':
"""返回上面某一级父元素,可指定层数或用查询语法定位 \n
:param level_or_loc: 第几级父元素或定位符
:return: DriverElement对象
"""
loc = 'xpath', f'.{"/.." * level}'
if isinstance(level_or_loc, int):
loc = 'xpath', f'.{"/.." * level_or_loc}'
return self.ele(loc, timeout=0)
# TODO: 完善用定位符时的逻辑
def attr(self, attr: str) -> str:
"""获取attribute属性值 \n
@ -247,7 +249,7 @@ class DriverElement(DrissionElement):
return self._select
def left(self, filter_loc: Union[tuple, str] = '', index: int = 1) -> 'DriverElement':
def left(self, index: int = 1, filter_loc: Union[tuple, str] = '') -> 'DriverElement':
"""获取网页上显示在当前元素左边的某个元素,可设置选取条件 \n
:param filter_loc: 筛选条件可用selenium的(By, str)也可用本库定位语法
:param index: 获取第几个
@ -255,7 +257,7 @@ class DriverElement(DrissionElement):
"""
return self._get_relative_eles('left', filter_loc, 1, index)[0]
def right(self, filter_loc: Union[tuple, str] = '', index: int = 1) -> 'DriverElement':
def right(self, index: int = 1, filter_loc: Union[tuple, str] = '') -> 'DriverElement':
"""获取网页上显示在当前元素右边的某个元素,可设置选取条件 \n
:param filter_loc: 筛选条件可用selenium的(By, str)也可用本库定位语法
:param index: 获取第几个
@ -263,7 +265,7 @@ class DriverElement(DrissionElement):
"""
return self._get_relative_eles('right', filter_loc, 1, index)[0]
def above(self, filter_loc: Union[tuple, str] = '', index: int = 1) -> 'DriverElement':
def above(self, index: int = 1, filter_loc: Union[tuple, str] = '') -> 'DriverElement':
"""获取网页上显示在当前元素上边的某个元素,可设置选取条件 \n
:param filter_loc: 筛选条件可用selenium的(By, str)也可用本库定位语法
:param index: 获取第几个
@ -271,7 +273,7 @@ class DriverElement(DrissionElement):
"""
return self._get_relative_eles('left', filter_loc, 1, index)[0]
def below(self, filter_loc: Union[tuple, str] = '', index: int = 1) -> 'DriverElement':
def below(self, index: int = 1, filter_loc: Union[tuple, str] = '') -> 'DriverElement':
"""获取网页上显示在当前元素下边的某个元素,可设置选取条件 \n
:param filter_loc: 筛选条件可用selenium的(By, str)也可用本库定位语法
:param index: 获取第几个
@ -279,7 +281,7 @@ class DriverElement(DrissionElement):
"""
return self._get_relative_eles('left', filter_loc, 1, index)[0]
def near(self, filter_loc: Union[tuple, str] = '', index: int = 1) -> 'DriverElement':
def near(self, index: int = 1, filter_loc: Union[tuple, str] = '') -> 'DriverElement':
"""获取网页上显示在当前元素最近的某个元素,可设置选取条件 \n
:param filter_loc: 筛选条件可用selenium的(By, str)也可用本库定位语法
:param index: 获取第几个
@ -287,7 +289,7 @@ class DriverElement(DrissionElement):
"""
return self._get_relative_eles('near', filter_loc, 1, index)[0]
def lefts(self, filter_loc: Union[tuple, str] = '', total: int = None, begin: int = 1) -> List['DriverElement']:
def lefts(self, total: int = None, begin: int = 1, filter_loc: Union[tuple, str] = '') -> List['DriverElement']:
"""获取网页上显示在当前元素左边的某个元素,可设置选取条件 \n
:param filter_loc: 筛选条件可用selenium的(By, str)也可用本库定位语法
:param total: 获取多少个
@ -296,7 +298,7 @@ class DriverElement(DrissionElement):
"""
return self._get_relative_eles('left', filter_loc, total, begin)
def rights(self, filter_loc: Union[tuple, str] = '', total: int = None, begin: int = 1) -> List['DriverElement']:
def rights(self, total: int = None, begin: int = 1, filter_loc: Union[tuple, str] = '') -> List['DriverElement']:
"""获取网页上显示在当前元素右边的某个元素,可设置选取条件 \n
:param filter_loc: 筛选条件可用selenium的(By, str)也可用本库定位语法
:param total: 获取多少个
@ -305,7 +307,7 @@ class DriverElement(DrissionElement):
"""
return self._get_relative_eles('right', filter_loc, total, begin)
def aboves(self, filter_loc: Union[tuple, str] = '', total: int = None, begin: int = 1) -> List['DriverElement']:
def aboves(self, total: int = None, begin: int = 1, filter_loc: Union[tuple, str] = '') -> List['DriverElement']:
"""获取网页上显示在当前元素上边的某个元素,可设置选取条件 \n
:param filter_loc: 筛选条件可用selenium的(By, str)也可用本库定位语法
:param total: 获取多少个
@ -314,7 +316,7 @@ class DriverElement(DrissionElement):
"""
return self._get_relative_eles('left', filter_loc, total, begin)
def belows(self, filter_loc: Union[tuple, str] = '', total: int = None, begin: int = 1) -> List['DriverElement']:
def belows(self, total: int = None, begin: int = 1, filter_loc: Union[tuple, str] = '') -> List['DriverElement']:
"""获取网页上显示在当前元素下边的某个元素,可设置选取条件 \n
:param filter_loc: 筛选条件可用selenium的(By, str)也可用本库定位语法
:param total: 获取多少个
@ -323,7 +325,7 @@ class DriverElement(DrissionElement):
"""
return self._get_relative_eles('left', filter_loc, total, begin)
def nears(self, filter_loc: Union[tuple, str] = '', total: int = None, begin: int = 1) -> List['DriverElement']:
def nears(self, total: int = None, begin: int = 1, filter_loc: Union[tuple, str] = '') -> List['DriverElement']:
"""获取网页上显示在当前元素最近的某个元素,可设置选取条件 \n
:param filter_loc: 筛选条件可用selenium的(By, str)也可用本库定位语法
:param begin: 从第几个开始返回
@ -675,6 +677,8 @@ class DriverElement(DrissionElement):
if not isinstance(begin, int) or begin < 1 or not isinstance(total, int) or total < 1:
raise ValueError('begin和total参数只能是大于0的整数。')
# TODO: 支持selenium原生相对定位符
try:
from selenium.webdriver.support.relative_locator import RelativeBy
except ImportError:

View File

@ -70,12 +70,14 @@ class SessionElement(DrissionElement):
"""返回未格式化处理的元素内文本"""
return str(self._inner_ele.text_content())
def parent(self, level: int = 1) -> 'SessionElement':
"""返回上面第level级父元素 \n
:param level: 第几级父元素
def parent(self, level_or_loc: Union[tuple, str, int] = 1) -> 'SessionElement':
"""返回上面某一级父元素,可指定层数或用查询语法定位 \n
:param level_or_loc: 第几级父元素或定位符
:return: SessionElement对象
"""
return self.ele(f'xpath:..{"/.." * (level - 1)}')
if isinstance(level_or_loc, int):
return self.ele(f'xpath:..{"/.." * (level_or_loc - 1)}')
# TODO: 完善用定位符时的逻辑
def attr(self, attr: str) -> Union[str, None]:
"""返回attribute属性值 \n