mirror of
https://gitee.com/g1879/DrissionPage.git
synced 2024-12-10 04:00:23 +08:00
修改next, prev, parent等逻辑,未完成
This commit is contained in:
parent
4938c79814
commit
81f678c6e3
@ -11,7 +11,7 @@ from typing import Union, Tuple
|
||||
from lxml.html import HtmlElement
|
||||
from selenium.webdriver.remote.webelement import WebElement
|
||||
|
||||
from .common import format_html
|
||||
from .common import format_html, translate_loc, str_to_loc
|
||||
|
||||
|
||||
class BaseParser(object):
|
||||
@ -55,12 +55,13 @@ class BaseElement(BaseParser):
|
||||
def inner_ele(self) -> Union[WebElement, HtmlElement]:
|
||||
return self._inner_ele
|
||||
|
||||
def next(self, index: int = 1):
|
||||
"""返回后面的一个兄弟元素,可指定第几个 \n
|
||||
def next(self, index: int = 1, filter_loc: Union[tuple, str] = ''):
|
||||
"""返回后面的一个兄弟元素,可用查询语法筛选,可指定返回筛选结果的第几个 \n
|
||||
:param index: 后面第几个兄弟元素
|
||||
:param filter_loc: 用于筛选元素的查询语法
|
||||
:return: 兄弟元素
|
||||
"""
|
||||
nexts = self.nexts(total=1, begin=index)
|
||||
nexts = self.nexts(total=1, begin=index, filter_loc=filter_loc)
|
||||
return nexts[0] if nexts else None
|
||||
|
||||
# ----------------以下属性或方法由后代实现----------------
|
||||
@ -68,18 +69,21 @@ class BaseElement(BaseParser):
|
||||
def tag(self):
|
||||
return
|
||||
|
||||
def parent(self, level: int = 1):
|
||||
def parent(self, level_or_loc: Union[tuple, str, int] = 1):
|
||||
pass
|
||||
|
||||
def prev(self, index: int = 1):
|
||||
return
|
||||
def prev(self, index: int = 1, filter_loc: Union[tuple, str] = ''):
|
||||
return None # ShadowRootElement直接继承
|
||||
|
||||
def prevs(self, total: int = None, begin: int = 1, filter_loc: Union[tuple, str] = ''):
|
||||
return None # ShadowRootElement直接继承
|
||||
|
||||
@property
|
||||
def is_valid(self):
|
||||
return True
|
||||
|
||||
@abstractmethod
|
||||
def nexts(self, total: int = None, begin: int = 1):
|
||||
def nexts(self, total: int = None, begin: int = 1, filter_loc: Union[tuple, str] = ''):
|
||||
pass
|
||||
|
||||
|
||||
@ -87,16 +91,17 @@ class DrissionElement(BaseElement):
|
||||
"""DriverElement 和 SessionElement的基类,但不是ShadowRootElement的基类"""
|
||||
|
||||
@abstractmethod
|
||||
def parent(self, level: int = 1):
|
||||
def parent(self, level_or_loc: Union[tuple, str, int] = 1):
|
||||
"""返回父级元素"""
|
||||
pass
|
||||
|
||||
def prev(self, index: int = 1):
|
||||
"""返回前面的一个兄弟元素,可指定第几个 \n
|
||||
def prev(self, index: int = 1, filter_loc: Union[tuple, str] = ''):
|
||||
"""返回前面的一个兄弟元素,可用查询语法筛选,可指定返回筛选结果的第几个 \n
|
||||
:param index: 前面第几个
|
||||
:param filter_loc: 用于筛选元素的查询语法
|
||||
:return: 兄弟元素
|
||||
"""
|
||||
prevs = self.prevs(total=1, begin=index)
|
||||
prevs = self.prevs(total=1, begin=index, filter_loc=filter_loc)
|
||||
return prevs[0] if prevs else None
|
||||
|
||||
@property
|
||||
@ -131,55 +136,72 @@ class DrissionElement(BaseElement):
|
||||
|
||||
return [format_html(x.strip(' ').rstrip('\n')) for x in texts if x and sub('[\r\n\t ]', '', x) != '']
|
||||
|
||||
def nexts(self, total: int = None, begin: int = 1, mode: str = 'ele'):
|
||||
"""返回后面若干个兄弟元素或节点组成的列表,total为None返回所有 \n
|
||||
def nexts(self, total: int = None, begin: int = 1, filter_loc: Union[tuple, str] = ''):
|
||||
"""返回后面若干个兄弟元素或节点组成的列表 \n
|
||||
可用查询语法筛选,可指定返回筛选结果的哪几个,total为None返回所有 \n
|
||||
:param total: 获取多少个元素或节点
|
||||
:param begin: 从第几个开始获取,从1起
|
||||
:param mode: 'ele', 'node' 或 'text',匹配元素、节点、或文本节点
|
||||
:param filter_loc: 用于筛选元素的查询语法
|
||||
:return: SessionElement对象
|
||||
"""
|
||||
return self._get_brothers(begin=begin, total=total, mode=mode, direction='next')
|
||||
return self._get_brothers(begin=begin, total=total, filter_loc=filter_loc, direction='following')
|
||||
|
||||
def prevs(self, total: int = None, begin: int = 1, mode: str = 'ele'):
|
||||
"""返回前面若干个兄弟元素或节点组成的列表,total为None返回所有 \n
|
||||
def prevs(self, total: int = None, begin: int = 1, filter_loc: Union[tuple, str] = ''):
|
||||
"""返回前面若干个兄弟元素或节点组成的列表 \n
|
||||
可用查询语法筛选,可指定返回筛选结果的哪几个,total为None返回所有 \n
|
||||
:param total: 获取多少个元素或节点
|
||||
:param begin: 从第几个开始获取,从1起
|
||||
:param mode: 'ele', 'node' 或 'text',匹配元素、节点、或文本节点
|
||||
:param filter_loc: 用于筛选元素的查询语法
|
||||
:return: SessionElement对象
|
||||
"""
|
||||
return self._get_brothers(begin=begin, total=total, mode=mode, direction='prev')
|
||||
return self._get_brothers(begin=begin, total=total, filter_loc=filter_loc, direction='preceding')
|
||||
|
||||
def _get_brothers(self, begin: int = 1, total: int = None, mode: str = 'ele', direction: str = 'next'):
|
||||
"""按要求返回兄弟元素或节点组成的列表 \n
|
||||
def _get_brothers(self,
|
||||
begin: int = 1,
|
||||
total: int = None,
|
||||
filter_loc: Union[tuple, str] = '',
|
||||
direction: str = 'following'):
|
||||
"""按要求返回兄弟元素或节点组成的列表 \n
|
||||
:param begin: 从第几个兄弟节点或元素开始
|
||||
:param total: 获取多少个
|
||||
:param mode: 'ele', 'node' 或 'text',匹配元素、节点、或文本节点
|
||||
:param direction: 'next' 或 'prev',查找的方向
|
||||
:param filter_loc: 用于筛选元素的查询语法
|
||||
:param direction: 'following' 或 'preceding',查找的方向
|
||||
:return: DriverElement对象或字符串
|
||||
"""
|
||||
# 查找节点的类型
|
||||
node_txt = {'ele': '*', 'node': 'node()', 'text': 'text()'}.get(mode)
|
||||
if not node_txt:
|
||||
raise ValueError(f"mode参数只能是'node'、'ele'或'text',现在是:'{mode}'。")
|
||||
timeout = 0 if direction == 'preceding' else .5
|
||||
|
||||
# 查找节点的方向
|
||||
direction_txt = {'next': 'following', 'prev': 'preceding'}.get(direction)
|
||||
if not direction_txt:
|
||||
raise ValueError(f"direction参数只能是'next'或'prev',现在是:'{direction}'。")
|
||||
if isinstance(filter_loc, tuple):
|
||||
node_txt = translate_loc(filter_loc)
|
||||
|
||||
timeout = 0 if direction == 'prev' else .5
|
||||
elif isinstance(filter_loc, str):
|
||||
node_txt = str_to_loc(filter_loc)
|
||||
|
||||
else:
|
||||
raise TypeError('filter_loc参数只能是tuple或str。')
|
||||
|
||||
if node_txt[0] == 'css selector':
|
||||
raise ValueError('此处暂不支持css selector选择器。')
|
||||
|
||||
node_txt = node_txt[1].lstrip('./')
|
||||
|
||||
# 获取所有节点
|
||||
nodes = self._ele(f'xpath:./{direction_txt}-sibling::{node_txt}', timeout=timeout, single=False)
|
||||
t = f'xpath:./{direction}-sibling::{node_txt}'
|
||||
nodes = self._ele(t, timeout=timeout, single=False)
|
||||
nodes = [e for e in nodes if not (isinstance(e, str) and sub('[ \n\t\r]', '', e) == '')]
|
||||
|
||||
if direction == 'next':
|
||||
end = None if not total or total >= len(nodes) else begin + total - 1
|
||||
len_nodes = len(nodes)
|
||||
if direction == 'following':
|
||||
end = None if not total else begin - 1 + total
|
||||
begin -= 1
|
||||
else:
|
||||
begin = None if not total or total >= len(nodes) else begin - total - 1
|
||||
end = None
|
||||
|
||||
return [e for e in nodes[begin:end] if not (isinstance(e, str) and sub('[ \n\t\r]', '', e) == '')]
|
||||
else:
|
||||
tmp = len_nodes - begin
|
||||
begin = tmp - total + 1
|
||||
end = tmp + 1
|
||||
if begin < 0:
|
||||
begin = None
|
||||
|
||||
return nodes[begin:end]
|
||||
|
||||
# ----------------以下属性或方法由后代实现----------------
|
||||
@property
|
||||
|
@ -91,13 +91,15 @@ class DriverElement(DrissionElement):
|
||||
"""返回未格式化处理的元素内文本"""
|
||||
return self.inner_ele.get_attribute('innerText')
|
||||
|
||||
def parent(self, level: int = 1) -> 'DriverElement':
|
||||
"""返回上面第level级父元素 \n
|
||||
:param level: 第几级父元素
|
||||
def parent(self, level_or_loc: Union[tuple, str, int] = 1) -> 'DriverElement':
|
||||
"""返回上面某一级父元素,可指定层数或用查询语法定位 \n
|
||||
:param level_or_loc: 第几级父元素,或定位符
|
||||
:return: DriverElement对象
|
||||
"""
|
||||
loc = 'xpath', f'.{"/.." * level}'
|
||||
if isinstance(level_or_loc, int):
|
||||
loc = 'xpath', f'.{"/.." * level_or_loc}'
|
||||
return self.ele(loc, timeout=0)
|
||||
# TODO: 完善用定位符时的逻辑
|
||||
|
||||
def attr(self, attr: str) -> str:
|
||||
"""获取attribute属性值 \n
|
||||
@ -247,7 +249,7 @@ class DriverElement(DrissionElement):
|
||||
|
||||
return self._select
|
||||
|
||||
def left(self, filter_loc: Union[tuple, str] = '', index: int = 1) -> 'DriverElement':
|
||||
def left(self, index: int = 1, filter_loc: Union[tuple, str] = '') -> 'DriverElement':
|
||||
"""获取网页上显示在当前元素左边的某个元素,可设置选取条件 \n
|
||||
:param filter_loc: 筛选条件,可用selenium的(By, str),也可用本库定位语法
|
||||
:param index: 获取第几个
|
||||
@ -255,7 +257,7 @@ class DriverElement(DrissionElement):
|
||||
"""
|
||||
return self._get_relative_eles('left', filter_loc, 1, index)[0]
|
||||
|
||||
def right(self, filter_loc: Union[tuple, str] = '', index: int = 1) -> 'DriverElement':
|
||||
def right(self, index: int = 1, filter_loc: Union[tuple, str] = '') -> 'DriverElement':
|
||||
"""获取网页上显示在当前元素右边的某个元素,可设置选取条件 \n
|
||||
:param filter_loc: 筛选条件,可用selenium的(By, str),也可用本库定位语法
|
||||
:param index: 获取第几个
|
||||
@ -263,7 +265,7 @@ class DriverElement(DrissionElement):
|
||||
"""
|
||||
return self._get_relative_eles('right', filter_loc, 1, index)[0]
|
||||
|
||||
def above(self, filter_loc: Union[tuple, str] = '', index: int = 1) -> 'DriverElement':
|
||||
def above(self, index: int = 1, filter_loc: Union[tuple, str] = '') -> 'DriverElement':
|
||||
"""获取网页上显示在当前元素上边的某个元素,可设置选取条件 \n
|
||||
:param filter_loc: 筛选条件,可用selenium的(By, str),也可用本库定位语法
|
||||
:param index: 获取第几个
|
||||
@ -271,7 +273,7 @@ class DriverElement(DrissionElement):
|
||||
"""
|
||||
return self._get_relative_eles('left', filter_loc, 1, index)[0]
|
||||
|
||||
def below(self, filter_loc: Union[tuple, str] = '', index: int = 1) -> 'DriverElement':
|
||||
def below(self, index: int = 1, filter_loc: Union[tuple, str] = '') -> 'DriverElement':
|
||||
"""获取网页上显示在当前元素下边的某个元素,可设置选取条件 \n
|
||||
:param filter_loc: 筛选条件,可用selenium的(By, str),也可用本库定位语法
|
||||
:param index: 获取第几个
|
||||
@ -279,7 +281,7 @@ class DriverElement(DrissionElement):
|
||||
"""
|
||||
return self._get_relative_eles('left', filter_loc, 1, index)[0]
|
||||
|
||||
def near(self, filter_loc: Union[tuple, str] = '', index: int = 1) -> 'DriverElement':
|
||||
def near(self, index: int = 1, filter_loc: Union[tuple, str] = '') -> 'DriverElement':
|
||||
"""获取网页上显示在当前元素最近的某个元素,可设置选取条件 \n
|
||||
:param filter_loc: 筛选条件,可用selenium的(By, str),也可用本库定位语法
|
||||
:param index: 获取第几个
|
||||
@ -287,7 +289,7 @@ class DriverElement(DrissionElement):
|
||||
"""
|
||||
return self._get_relative_eles('near', filter_loc, 1, index)[0]
|
||||
|
||||
def lefts(self, filter_loc: Union[tuple, str] = '', total: int = None, begin: int = 1) -> List['DriverElement']:
|
||||
def lefts(self, total: int = None, begin: int = 1, filter_loc: Union[tuple, str] = '') -> List['DriverElement']:
|
||||
"""获取网页上显示在当前元素左边的某个元素,可设置选取条件 \n
|
||||
:param filter_loc: 筛选条件,可用selenium的(By, str),也可用本库定位语法
|
||||
:param total: 获取多少个
|
||||
@ -296,7 +298,7 @@ class DriverElement(DrissionElement):
|
||||
"""
|
||||
return self._get_relative_eles('left', filter_loc, total, begin)
|
||||
|
||||
def rights(self, filter_loc: Union[tuple, str] = '', total: int = None, begin: int = 1) -> List['DriverElement']:
|
||||
def rights(self, total: int = None, begin: int = 1, filter_loc: Union[tuple, str] = '') -> List['DriverElement']:
|
||||
"""获取网页上显示在当前元素右边的某个元素,可设置选取条件 \n
|
||||
:param filter_loc: 筛选条件,可用selenium的(By, str),也可用本库定位语法
|
||||
:param total: 获取多少个
|
||||
@ -305,7 +307,7 @@ class DriverElement(DrissionElement):
|
||||
"""
|
||||
return self._get_relative_eles('right', filter_loc, total, begin)
|
||||
|
||||
def aboves(self, filter_loc: Union[tuple, str] = '', total: int = None, begin: int = 1) -> List['DriverElement']:
|
||||
def aboves(self, total: int = None, begin: int = 1, filter_loc: Union[tuple, str] = '') -> List['DriverElement']:
|
||||
"""获取网页上显示在当前元素上边的某个元素,可设置选取条件 \n
|
||||
:param filter_loc: 筛选条件,可用selenium的(By, str),也可用本库定位语法
|
||||
:param total: 获取多少个
|
||||
@ -314,7 +316,7 @@ class DriverElement(DrissionElement):
|
||||
"""
|
||||
return self._get_relative_eles('left', filter_loc, total, begin)
|
||||
|
||||
def belows(self, filter_loc: Union[tuple, str] = '', total: int = None, begin: int = 1) -> List['DriverElement']:
|
||||
def belows(self, total: int = None, begin: int = 1, filter_loc: Union[tuple, str] = '') -> List['DriverElement']:
|
||||
"""获取网页上显示在当前元素下边的某个元素,可设置选取条件 \n
|
||||
:param filter_loc: 筛选条件,可用selenium的(By, str),也可用本库定位语法
|
||||
:param total: 获取多少个
|
||||
@ -323,7 +325,7 @@ class DriverElement(DrissionElement):
|
||||
"""
|
||||
return self._get_relative_eles('left', filter_loc, total, begin)
|
||||
|
||||
def nears(self, filter_loc: Union[tuple, str] = '', total: int = None, begin: int = 1) -> List['DriverElement']:
|
||||
def nears(self, total: int = None, begin: int = 1, filter_loc: Union[tuple, str] = '') -> List['DriverElement']:
|
||||
"""获取网页上显示在当前元素最近的某个元素,可设置选取条件 \n
|
||||
:param filter_loc: 筛选条件,可用selenium的(By, str),也可用本库定位语法
|
||||
:param begin: 从第几个开始返回
|
||||
@ -675,6 +677,8 @@ class DriverElement(DrissionElement):
|
||||
if not isinstance(begin, int) or begin < 1 or not isinstance(total, int) or total < 1:
|
||||
raise ValueError('begin和total参数只能是大于0的整数。')
|
||||
|
||||
# TODO: 支持selenium原生相对定位符
|
||||
|
||||
try:
|
||||
from selenium.webdriver.support.relative_locator import RelativeBy
|
||||
except ImportError:
|
||||
|
@ -70,12 +70,14 @@ class SessionElement(DrissionElement):
|
||||
"""返回未格式化处理的元素内文本"""
|
||||
return str(self._inner_ele.text_content())
|
||||
|
||||
def parent(self, level: int = 1) -> 'SessionElement':
|
||||
"""返回上面第level级父元素 \n
|
||||
:param level: 第几级父元素
|
||||
def parent(self, level_or_loc: Union[tuple, str, int] = 1) -> 'SessionElement':
|
||||
"""返回上面某一级父元素,可指定层数或用查询语法定位 \n
|
||||
:param level_or_loc: 第几级父元素,或定位符
|
||||
:return: SessionElement对象
|
||||
"""
|
||||
return self.ele(f'xpath:..{"/.." * (level - 1)}')
|
||||
if isinstance(level_or_loc, int):
|
||||
return self.ele(f'xpath:..{"/.." * (level_or_loc - 1)}')
|
||||
# TODO: 完善用定位符时的逻辑
|
||||
|
||||
def attr(self, attr: str) -> Union[str, None]:
|
||||
"""返回attribute属性值 \n
|
||||
|
Loading…
x
Reference in New Issue
Block a user