mirror of
https://gitee.com/g1879/DrissionPage.git
synced 2024-12-10 04:00:23 +08:00
基本完成相对定位语法修改,待测试
This commit is contained in:
parent
81f678c6e3
commit
7ec51417ce
@ -6,12 +6,12 @@
|
|||||||
"""
|
"""
|
||||||
from abc import abstractmethod
|
from abc import abstractmethod
|
||||||
from re import sub
|
from re import sub
|
||||||
from typing import Union, Tuple
|
from typing import Union, Tuple, List
|
||||||
|
|
||||||
from lxml.html import HtmlElement
|
from lxml.html import HtmlElement
|
||||||
from selenium.webdriver.remote.webelement import WebElement
|
from selenium.webdriver.remote.webelement import WebElement
|
||||||
|
|
||||||
from .common import format_html, translate_loc, str_to_loc
|
from .common import format_html, get_loc
|
||||||
|
|
||||||
|
|
||||||
class BaseParser(object):
|
class BaseParser(object):
|
||||||
@ -31,11 +31,9 @@ class BaseParser(object):
|
|||||||
def html(self) -> str:
|
def html(self) -> str:
|
||||||
return ''
|
return ''
|
||||||
|
|
||||||
@abstractmethod
|
|
||||||
def s_ele(self, loc_or_ele):
|
def s_ele(self, loc_or_ele):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
@abstractmethod
|
|
||||||
def s_eles(self, loc_or_str):
|
def s_eles(self, loc_or_str):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
@ -55,55 +53,38 @@ class BaseElement(BaseParser):
|
|||||||
def inner_ele(self) -> Union[WebElement, HtmlElement]:
|
def inner_ele(self) -> Union[WebElement, HtmlElement]:
|
||||||
return self._inner_ele
|
return self._inner_ele
|
||||||
|
|
||||||
def next(self, index: int = 1, filter_loc: Union[tuple, str] = ''):
|
|
||||||
"""返回后面的一个兄弟元素,可用查询语法筛选,可指定返回筛选结果的第几个 \n
|
|
||||||
:param index: 后面第几个兄弟元素
|
|
||||||
:param filter_loc: 用于筛选元素的查询语法
|
|
||||||
:return: 兄弟元素
|
|
||||||
"""
|
|
||||||
nexts = self.nexts(total=1, begin=index, filter_loc=filter_loc)
|
|
||||||
return nexts[0] if nexts else None
|
|
||||||
|
|
||||||
# ----------------以下属性或方法由后代实现----------------
|
# ----------------以下属性或方法由后代实现----------------
|
||||||
@property
|
@property
|
||||||
def tag(self):
|
def tag(self):
|
||||||
return
|
return
|
||||||
|
|
||||||
def parent(self, level_or_loc: Union[tuple, str, int] = 1):
|
|
||||||
pass
|
|
||||||
|
|
||||||
def prev(self, index: int = 1, filter_loc: Union[tuple, str] = ''):
|
|
||||||
return None # ShadowRootElement直接继承
|
|
||||||
|
|
||||||
def prevs(self, total: int = None, begin: int = 1, filter_loc: Union[tuple, str] = ''):
|
|
||||||
return None # ShadowRootElement直接继承
|
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def is_valid(self):
|
def is_valid(self):
|
||||||
return True
|
return True
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def nexts(self, total: int = None, begin: int = 1, filter_loc: Union[tuple, str] = ''):
|
def _ele(self, loc_or_ele, timeout=None, single=True):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def parent(self, level_or_loc: Union[tuple, str, int] = 1):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def prev(self, index: int = 1):
|
||||||
|
return None # ShadowRootElement直接继承
|
||||||
|
|
||||||
|
def prevs(self):
|
||||||
|
return None # ShadowRootElement直接继承
|
||||||
|
|
||||||
|
def next(self, index: int = 1):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def nexts(self):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
class DrissionElement(BaseElement):
|
class DrissionElement(BaseElement):
|
||||||
"""DriverElement 和 SessionElement的基类,但不是ShadowRootElement的基类"""
|
"""DriverElement 和 SessionElement的基类,但不是ShadowRootElement的基类"""
|
||||||
|
|
||||||
@abstractmethod
|
|
||||||
def parent(self, level_or_loc: Union[tuple, str, int] = 1):
|
|
||||||
"""返回父级元素"""
|
|
||||||
pass
|
|
||||||
|
|
||||||
def prev(self, index: int = 1, filter_loc: Union[tuple, str] = ''):
|
|
||||||
"""返回前面的一个兄弟元素,可用查询语法筛选,可指定返回筛选结果的第几个 \n
|
|
||||||
:param index: 前面第几个
|
|
||||||
:param filter_loc: 用于筛选元素的查询语法
|
|
||||||
:return: 兄弟元素
|
|
||||||
"""
|
|
||||||
prevs = self.prevs(total=1, begin=index, filter_loc=filter_loc)
|
|
||||||
return prevs[0] if prevs else None
|
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def link(self) -> str:
|
def link(self) -> str:
|
||||||
"""返回href或src绝对url"""
|
"""返回href或src绝对url"""
|
||||||
@ -136,72 +117,92 @@ class DrissionElement(BaseElement):
|
|||||||
|
|
||||||
return [format_html(x.strip(' ').rstrip('\n')) for x in texts if x and sub('[\r\n\t ]', '', x) != '']
|
return [format_html(x.strip(' ').rstrip('\n')) for x in texts if x and sub('[\r\n\t ]', '', x) != '']
|
||||||
|
|
||||||
def nexts(self, total: int = None, begin: int = 1, filter_loc: Union[tuple, str] = ''):
|
def parent(self, level_or_loc: Union[tuple, str, int] = 1) -> 'DrissionElement':
|
||||||
"""返回后面若干个兄弟元素或节点组成的列表 \n
|
"""返回上面某一级父元素,可指定层数或用查询语法定位 \n
|
||||||
可用查询语法筛选,可指定返回筛选结果的哪几个,total为None返回所有 \n
|
:param level_or_loc: 第几级父元素,或定位符
|
||||||
:param total: 获取多少个元素或节点
|
:return: DriverElement对象
|
||||||
:param begin: 从第几个开始获取,从1起
|
|
||||||
:param filter_loc: 用于筛选元素的查询语法
|
|
||||||
:return: SessionElement对象
|
|
||||||
"""
|
"""
|
||||||
return self._get_brothers(begin=begin, total=total, filter_loc=filter_loc, direction='following')
|
if isinstance(level_or_loc, int):
|
||||||
|
loc = f'xpath:./ancestor::*[{level_or_loc}]'
|
||||||
|
|
||||||
def prevs(self, total: int = None, begin: int = 1, filter_loc: Union[tuple, str] = ''):
|
elif isinstance(level_or_loc, (tuple, str)):
|
||||||
"""返回前面若干个兄弟元素或节点组成的列表 \n
|
loc = get_loc(level_or_loc, True)
|
||||||
可用查询语法筛选,可指定返回筛选结果的哪几个,total为None返回所有 \n
|
|
||||||
:param total: 获取多少个元素或节点
|
if loc[0] == 'css selector':
|
||||||
:param begin: 从第几个开始获取,从1起
|
raise ValueError('此css selector语法不受支持,请换成xpath。')
|
||||||
|
|
||||||
|
loc = f'xpath:./ancestor::{loc[1].lstrip(". / ")}'
|
||||||
|
|
||||||
|
else:
|
||||||
|
raise TypeError('level_or_loc参数只能是tuple、int或str。')
|
||||||
|
|
||||||
|
return self.ele(loc, timeout=0)
|
||||||
|
|
||||||
|
def prev(self, index: int = 1, filter_loc: Union[tuple, str] = ''):
|
||||||
|
"""返回前面的一个兄弟元素,可用查询语法筛选,可指定返回筛选结果的第几个 \n
|
||||||
|
:param index: 前面第几个查询结果元素
|
||||||
|
:param filter_loc: 用于筛选元素的查询语法
|
||||||
|
:return: 兄弟元素
|
||||||
|
"""
|
||||||
|
nodes = self._get_brothers(index=index, filter_loc=filter_loc, direction='preceding')
|
||||||
|
return nodes[-1] if nodes else None
|
||||||
|
|
||||||
|
def next(self, index: int = 1, filter_loc: Union[tuple, str] = ''):
|
||||||
|
"""返回后面的一个兄弟元素,可用查询语法筛选,可指定返回筛选结果的第几个 \n
|
||||||
|
:param index: 后面第几个查询结果元素
|
||||||
|
:param filter_loc: 用于筛选元素的查询语法
|
||||||
|
:return: 兄弟元素
|
||||||
|
"""
|
||||||
|
nodes = self._get_brothers(index=index, filter_loc=filter_loc, direction='following')
|
||||||
|
return nodes[0] if nodes else None
|
||||||
|
|
||||||
|
def nexts(self, filter_loc: Union[tuple, str] = ''):
|
||||||
|
"""返回后面全部兄弟元素或节点组成的列表,可用查询语法筛选 \n
|
||||||
:param filter_loc: 用于筛选元素的查询语法
|
:param filter_loc: 用于筛选元素的查询语法
|
||||||
:return: SessionElement对象
|
:return: SessionElement对象
|
||||||
"""
|
"""
|
||||||
return self._get_brothers(begin=begin, total=total, filter_loc=filter_loc, direction='preceding')
|
return self._get_brothers(filter_loc=filter_loc, direction='following')
|
||||||
|
|
||||||
|
def prevs(self, filter_loc: Union[tuple, str] = ''):
|
||||||
|
"""返回前面全部兄弟元素或节点组成的列表,可用查询语法筛选 \n
|
||||||
|
:param filter_loc: 用于筛选元素的查询语法
|
||||||
|
:return: SessionElement对象
|
||||||
|
"""
|
||||||
|
return self._get_brothers(filter_loc=filter_loc, direction='preceding')
|
||||||
|
|
||||||
def _get_brothers(self,
|
def _get_brothers(self,
|
||||||
begin: int = 1,
|
index: int = None,
|
||||||
total: int = None,
|
|
||||||
filter_loc: Union[tuple, str] = '',
|
filter_loc: Union[tuple, str] = '',
|
||||||
direction: str = 'following'):
|
direction: str = 'following') -> List['DrissionElement']:
|
||||||
"""按要求返回兄弟元素或节点组成的列表 \n
|
"""按要求返回兄弟元素或节点组成的列表 \n
|
||||||
:param begin: 从第几个兄弟节点或元素开始
|
:param index: 获取第几个
|
||||||
:param total: 获取多少个
|
|
||||||
:param filter_loc: 用于筛选元素的查询语法
|
:param filter_loc: 用于筛选元素的查询语法
|
||||||
:param direction: 'following' 或 'preceding',查找的方向
|
:param direction: 'following' 或 'preceding',查找的方向
|
||||||
:return: DriverElement对象或字符串
|
:return: DriverElement对象或字符串
|
||||||
"""
|
"""
|
||||||
|
if index is not None and index < 1:
|
||||||
|
raise ValueError('index必须大于等于1。')
|
||||||
|
|
||||||
timeout = 0 if direction == 'preceding' else .5
|
timeout = 0 if direction == 'preceding' else .5
|
||||||
|
|
||||||
if isinstance(filter_loc, tuple):
|
# 仅根据位置取一个
|
||||||
node_txt = translate_loc(filter_loc)
|
if index and not filter_loc:
|
||||||
|
xpath = f'xpath:./{direction}-sibling::*[{index}]'
|
||||||
elif isinstance(filter_loc, str):
|
|
||||||
node_txt = str_to_loc(filter_loc)
|
|
||||||
|
|
||||||
|
# 根据筛选获取所有
|
||||||
else:
|
else:
|
||||||
raise TypeError('filter_loc参数只能是tuple或str。')
|
loc = get_loc(filter_loc, True)
|
||||||
|
if loc[0] == 'css selector':
|
||||||
|
raise ValueError('此css selector语法不受支持,请换成xpath。')
|
||||||
|
|
||||||
if node_txt[0] == 'css selector':
|
loc = loc[1].lstrip('./')
|
||||||
raise ValueError('此处暂不支持css selector选择器。')
|
xpath = f'xpath:./{direction}-sibling::{loc}'
|
||||||
|
|
||||||
node_txt = node_txt[1].lstrip('./')
|
print(xpath)
|
||||||
|
nodes = self._ele(xpath, timeout=timeout, single=False)
|
||||||
# 获取所有节点
|
|
||||||
t = f'xpath:./{direction}-sibling::{node_txt}'
|
|
||||||
nodes = self._ele(t, timeout=timeout, single=False)
|
|
||||||
nodes = [e for e in nodes if not (isinstance(e, str) and sub('[ \n\t\r]', '', e) == '')]
|
nodes = [e for e in nodes if not (isinstance(e, str) and sub('[ \n\t\r]', '', e) == '')]
|
||||||
|
|
||||||
len_nodes = len(nodes)
|
return nodes
|
||||||
if direction == 'following':
|
|
||||||
end = None if not total else begin - 1 + total
|
|
||||||
begin -= 1
|
|
||||||
|
|
||||||
else:
|
|
||||||
tmp = len_nodes - begin
|
|
||||||
begin = tmp - total + 1
|
|
||||||
end = tmp + 1
|
|
||||||
if begin < 0:
|
|
||||||
begin = None
|
|
||||||
|
|
||||||
return nodes[begin:end]
|
|
||||||
|
|
||||||
# ----------------以下属性或方法由后代实现----------------
|
# ----------------以下属性或方法由后代实现----------------
|
||||||
@property
|
@property
|
||||||
@ -216,10 +217,6 @@ class DrissionElement(BaseElement):
|
|||||||
def raw_text(self):
|
def raw_text(self):
|
||||||
return
|
return
|
||||||
|
|
||||||
# @abstractmethod
|
|
||||||
# def parents(self, num: int = 1):
|
|
||||||
# pass
|
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def attr(self, attr: str):
|
def attr(self, attr: str):
|
||||||
return ''
|
return ''
|
||||||
|
@ -79,6 +79,33 @@ def get_ele_txt(e) -> str:
|
|||||||
return format_html(re_str)
|
return format_html(re_str)
|
||||||
|
|
||||||
|
|
||||||
|
def get_loc(loc: Union[tuple, str], translate_css: bool = False) -> tuple:
|
||||||
|
"""接收selenium定位元组或本库定位语法,转换为标准定位元组,可翻译css selector为xpath \n
|
||||||
|
:param loc: selenium定位元组或本库定位语法
|
||||||
|
:param translate_css: 是否翻译css selector为xpath
|
||||||
|
:return: DrissionPage定位元组
|
||||||
|
"""
|
||||||
|
if isinstance(loc, tuple):
|
||||||
|
loc = translate_loc(loc)
|
||||||
|
|
||||||
|
elif isinstance(loc, str):
|
||||||
|
loc = str_to_loc(loc)
|
||||||
|
|
||||||
|
else:
|
||||||
|
raise TypeError('loc参数只能是tuple或str。')
|
||||||
|
|
||||||
|
if loc[0] == 'css selector' and translate_css:
|
||||||
|
from lxml.cssselect import CSSSelector, ExpressionError
|
||||||
|
try:
|
||||||
|
path = str(CSSSelector(loc[1], translator='html').path)
|
||||||
|
path = path[20:] if path.startswith('descendant-or-self::') else path
|
||||||
|
loc = 'xpath', path
|
||||||
|
except ExpressionError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
return loc
|
||||||
|
|
||||||
|
|
||||||
def str_to_loc(loc: str) -> tuple:
|
def str_to_loc(loc: str) -> tuple:
|
||||||
"""处理元素查找语句 \n
|
"""处理元素查找语句 \n
|
||||||
查找方式:属性、tag name及属性、文本、xpath、css selector、id、class \n
|
查找方式:属性、tag name及属性、文本、xpath、css selector、id、class \n
|
||||||
|
@ -16,7 +16,7 @@ from selenium.webdriver.support import expected_conditions as ec
|
|||||||
from selenium.webdriver.support.wait import WebDriverWait
|
from selenium.webdriver.support.wait import WebDriverWait
|
||||||
|
|
||||||
from .base import DrissionElement, BaseElement
|
from .base import DrissionElement, BaseElement
|
||||||
from .common import str_to_loc, get_usable_path, translate_loc, format_html, get_ele_txt
|
from .common import str_to_loc, get_usable_path, format_html, get_ele_txt, get_loc
|
||||||
from .session_element import make_session_ele
|
from .session_element import make_session_ele
|
||||||
|
|
||||||
|
|
||||||
@ -91,16 +91,6 @@ class DriverElement(DrissionElement):
|
|||||||
"""返回未格式化处理的元素内文本"""
|
"""返回未格式化处理的元素内文本"""
|
||||||
return self.inner_ele.get_attribute('innerText')
|
return self.inner_ele.get_attribute('innerText')
|
||||||
|
|
||||||
def parent(self, level_or_loc: Union[tuple, str, int] = 1) -> 'DriverElement':
|
|
||||||
"""返回上面某一级父元素,可指定层数或用查询语法定位 \n
|
|
||||||
:param level_or_loc: 第几级父元素,或定位符
|
|
||||||
:return: DriverElement对象
|
|
||||||
"""
|
|
||||||
if isinstance(level_or_loc, int):
|
|
||||||
loc = 'xpath', f'.{"/.." * level_or_loc}'
|
|
||||||
return self.ele(loc, timeout=0)
|
|
||||||
# TODO: 完善用定位符时的逻辑
|
|
||||||
|
|
||||||
def attr(self, attr: str) -> str:
|
def attr(self, attr: str) -> str:
|
||||||
"""获取attribute属性值 \n
|
"""获取attribute属性值 \n
|
||||||
:param attr: 属性名
|
:param attr: 属性名
|
||||||
@ -250,89 +240,84 @@ class DriverElement(DrissionElement):
|
|||||||
return self._select
|
return self._select
|
||||||
|
|
||||||
def left(self, index: int = 1, filter_loc: Union[tuple, str] = '') -> 'DriverElement':
|
def left(self, index: int = 1, filter_loc: Union[tuple, str] = '') -> 'DriverElement':
|
||||||
"""获取网页上显示在当前元素左边的某个元素,可设置选取条件 \n
|
"""获取网页上显示在当前元素左边的某个元素,可设置选取条件,可指定结果中第几个 \n
|
||||||
:param filter_loc: 筛选条件,可用selenium的(By, str),也可用本库定位语法
|
|
||||||
:param index: 获取第几个
|
:param index: 获取第几个
|
||||||
|
:param filter_loc: 筛选条件,可用selenium的(By, str),也可用本库定位语法
|
||||||
:return: DriverElement对象
|
:return: DriverElement对象
|
||||||
"""
|
"""
|
||||||
return self._get_relative_eles('left', filter_loc, 1, index)[0]
|
eles = self._get_relative_eles('left', filter_loc)
|
||||||
|
return eles[index - 1] if index <= len(eles) else None
|
||||||
|
|
||||||
def right(self, index: int = 1, filter_loc: Union[tuple, str] = '') -> 'DriverElement':
|
def right(self, index: int = 1, filter_loc: Union[tuple, str] = '') -> 'DriverElement':
|
||||||
"""获取网页上显示在当前元素右边的某个元素,可设置选取条件 \n
|
"""获取网页上显示在当前元素右边的某个元素,可设置选取条件,可指定结果中第几个 \n
|
||||||
:param filter_loc: 筛选条件,可用selenium的(By, str),也可用本库定位语法
|
|
||||||
:param index: 获取第几个
|
:param index: 获取第几个
|
||||||
|
:param filter_loc: 筛选条件,可用selenium的(By, str),也可用本库定位语法
|
||||||
:return: DriverElement对象
|
:return: DriverElement对象
|
||||||
"""
|
"""
|
||||||
return self._get_relative_eles('right', filter_loc, 1, index)[0]
|
eles = self._get_relative_eles('right', filter_loc)
|
||||||
|
return eles[index - 1] if index <= len(eles) else None
|
||||||
|
|
||||||
def above(self, index: int = 1, filter_loc: Union[tuple, str] = '') -> 'DriverElement':
|
def above(self, index: int = 1, filter_loc: Union[tuple, str] = '') -> 'DriverElement':
|
||||||
"""获取网页上显示在当前元素上边的某个元素,可设置选取条件 \n
|
"""获取网页上显示在当前元素上边的某个元素,可设置选取条件,可指定结果中第几个 \n
|
||||||
:param filter_loc: 筛选条件,可用selenium的(By, str),也可用本库定位语法
|
|
||||||
:param index: 获取第几个
|
:param index: 获取第几个
|
||||||
|
:param filter_loc: 筛选条件,可用selenium的(By, str),也可用本库定位语法
|
||||||
:return: DriverElement对象
|
:return: DriverElement对象
|
||||||
"""
|
"""
|
||||||
return self._get_relative_eles('left', filter_loc, 1, index)[0]
|
eles = self._get_relative_eles('left', filter_loc)
|
||||||
|
return eles[index - 1] if index <= len(eles) else None
|
||||||
|
|
||||||
def below(self, index: int = 1, filter_loc: Union[tuple, str] = '') -> 'DriverElement':
|
def below(self, index: int = 1, filter_loc: Union[tuple, str] = '') -> 'DriverElement':
|
||||||
"""获取网页上显示在当前元素下边的某个元素,可设置选取条件 \n
|
"""获取网页上显示在当前元素下边的某个元素,可设置选取条件,可指定结果中第几个 \n
|
||||||
:param filter_loc: 筛选条件,可用selenium的(By, str),也可用本库定位语法
|
|
||||||
:param index: 获取第几个
|
:param index: 获取第几个
|
||||||
|
:param filter_loc: 筛选条件,可用selenium的(By, str),也可用本库定位语法
|
||||||
:return: DriverElement对象
|
:return: DriverElement对象
|
||||||
"""
|
"""
|
||||||
return self._get_relative_eles('left', filter_loc, 1, index)[0]
|
eles = self._get_relative_eles('left', filter_loc)
|
||||||
|
return eles[index - 1] if index <= len(eles) else None
|
||||||
|
|
||||||
def near(self, index: int = 1, filter_loc: Union[tuple, str] = '') -> 'DriverElement':
|
def near(self, index: int = 1, filter_loc: Union[tuple, str] = '') -> 'DriverElement':
|
||||||
"""获取网页上显示在当前元素最近的某个元素,可设置选取条件 \n
|
"""获取网页上显示在当前元素最近的某个元素,可设置选取条件,可指定结果中第几个 \n
|
||||||
:param filter_loc: 筛选条件,可用selenium的(By, str),也可用本库定位语法
|
|
||||||
:param index: 获取第几个
|
:param index: 获取第几个
|
||||||
|
:param filter_loc: 筛选条件,可用selenium的(By, str),也可用本库定位语法
|
||||||
:return: DriverElement对象
|
:return: DriverElement对象
|
||||||
"""
|
"""
|
||||||
return self._get_relative_eles('near', filter_loc, 1, index)[0]
|
eles = self._get_relative_eles('near', filter_loc)
|
||||||
|
return eles[index - 1] if index <= len(eles) else None
|
||||||
|
|
||||||
def lefts(self, total: int = None, begin: int = 1, filter_loc: Union[tuple, str] = '') -> List['DriverElement']:
|
def lefts(self, filter_loc: Union[tuple, str] = '') -> List['DriverElement']:
|
||||||
"""获取网页上显示在当前元素左边的某个元素,可设置选取条件 \n
|
"""获取网页上显示在当前元素左边的所有元素,可设置选取条件,从近到远排列 \n
|
||||||
:param filter_loc: 筛选条件,可用selenium的(By, str),也可用本库定位语法
|
:param filter_loc: 筛选条件,可用selenium的(By, str),也可用本库定位语法
|
||||||
:param total: 获取多少个
|
|
||||||
:param begin: 从第几个开始返回
|
|
||||||
:return: DriverElement对象组成的列表
|
:return: DriverElement对象组成的列表
|
||||||
"""
|
"""
|
||||||
return self._get_relative_eles('left', filter_loc, total, begin)
|
return self._get_relative_eles('left', filter_loc)
|
||||||
|
|
||||||
def rights(self, total: int = None, begin: int = 1, filter_loc: Union[tuple, str] = '') -> List['DriverElement']:
|
def rights(self, filter_loc: Union[tuple, str] = '') -> List['DriverElement']:
|
||||||
"""获取网页上显示在当前元素右边的某个元素,可设置选取条件 \n
|
"""获取网页上显示在当前元素右边的所有元,可设置选取条件,从近到远排列 \n
|
||||||
:param filter_loc: 筛选条件,可用selenium的(By, str),也可用本库定位语法
|
:param filter_loc: 筛选条件,可用selenium的(By, str),也可用本库定位语法
|
||||||
:param total: 获取多少个
|
|
||||||
:param begin: 从第几个开始返回
|
|
||||||
:return: DriverElement对象组成的列表
|
:return: DriverElement对象组成的列表
|
||||||
"""
|
"""
|
||||||
return self._get_relative_eles('right', filter_loc, total, begin)
|
return self._get_relative_eles('right', filter_loc)
|
||||||
|
|
||||||
def aboves(self, total: int = None, begin: int = 1, filter_loc: Union[tuple, str] = '') -> List['DriverElement']:
|
def aboves(self, filter_loc: Union[tuple, str] = '') -> List['DriverElement']:
|
||||||
"""获取网页上显示在当前元素上边的某个元素,可设置选取条件 \n
|
"""获取网页上显示在当前元素上边的所有元素,可设置选取条件,从近到远排列 \n
|
||||||
:param filter_loc: 筛选条件,可用selenium的(By, str),也可用本库定位语法
|
:param filter_loc: 筛选条件,可用selenium的(By, str),也可用本库定位语法
|
||||||
:param total: 获取多少个
|
|
||||||
:param begin: 从第几个开始返回
|
|
||||||
:return: DriverElement对象组成的列表
|
:return: DriverElement对象组成的列表
|
||||||
"""
|
"""
|
||||||
return self._get_relative_eles('left', filter_loc, total, begin)
|
return self._get_relative_eles('left', filter_loc)
|
||||||
|
|
||||||
def belows(self, total: int = None, begin: int = 1, filter_loc: Union[tuple, str] = '') -> List['DriverElement']:
|
def belows(self, filter_loc: Union[tuple, str] = '') -> List['DriverElement']:
|
||||||
"""获取网页上显示在当前元素下边的某个元素,可设置选取条件 \n
|
"""获取网页上显示在当前元素下边的所有元素,可设置选取条件,从近到远排列 \n
|
||||||
:param filter_loc: 筛选条件,可用selenium的(By, str),也可用本库定位语法
|
:param filter_loc: 筛选条件,可用selenium的(By, str),也可用本库定位语法
|
||||||
:param total: 获取多少个
|
|
||||||
:param begin: 从第几个开始返回
|
|
||||||
:return: DriverElement对象组成的列表
|
:return: DriverElement对象组成的列表
|
||||||
"""
|
"""
|
||||||
return self._get_relative_eles('left', filter_loc, total, begin)
|
return self._get_relative_eles('left', filter_loc)
|
||||||
|
|
||||||
def nears(self, total: int = None, begin: int = 1, filter_loc: Union[tuple, str] = '') -> List['DriverElement']:
|
def nears(self, filter_loc: Union[tuple, str] = '') -> List['DriverElement']:
|
||||||
"""获取网页上显示在当前元素最近的某个元素,可设置选取条件 \n
|
"""获取网页上显示在当前元素附近元素,可设置选取条件,从近到远排列 \n
|
||||||
:param filter_loc: 筛选条件,可用selenium的(By, str),也可用本库定位语法
|
:param filter_loc: 筛选条件,可用selenium的(By, str),也可用本库定位语法
|
||||||
:param begin: 从第几个开始返回
|
|
||||||
:param total: 获取多少个
|
|
||||||
:return: DriverElement对象组成的列表
|
:return: DriverElement对象组成的列表
|
||||||
"""
|
"""
|
||||||
return self._get_relative_eles('near', filter_loc, total, begin)
|
return self._get_relative_eles('near', filter_loc)
|
||||||
|
|
||||||
def wait_ele(self,
|
def wait_ele(self,
|
||||||
loc_or_ele: Union[str, tuple, DrissionElement, WebElement],
|
loc_or_ele: Union[str, tuple, DrissionElement, WebElement],
|
||||||
@ -664,21 +649,12 @@ class DriverElement(DrissionElement):
|
|||||||
|
|
||||||
def _get_relative_eles(self,
|
def _get_relative_eles(self,
|
||||||
mode: str,
|
mode: str,
|
||||||
loc: Union[tuple, str] = '',
|
loc: Union[tuple, str] = '') -> Union[List['DriverElement'], 'DriverElement']:
|
||||||
total: int = 1,
|
|
||||||
begin: int = 1) -> Union[List['DriverElement'], 'DriverElement']:
|
|
||||||
"""获取网页上相对于当前元素周围的某个元素,可设置选取条件 \n
|
"""获取网页上相对于当前元素周围的某个元素,可设置选取条件 \n
|
||||||
:param mode: 可选:'left', 'right', 'above', 'below', 'near'
|
:param mode: 可选:'left', 'right', 'above', 'below', 'near'
|
||||||
:param loc: 筛选条件,可用selenium的(By, str),也可用本库定位语法
|
:param loc: 筛选条件,可用selenium的(By, str),也可用本库定位语法
|
||||||
:param total: 一共获取几个
|
|
||||||
:param begin: 从第几个开始获取
|
|
||||||
:return: DriverElement对象
|
:return: DriverElement对象
|
||||||
"""
|
"""
|
||||||
if not isinstance(begin, int) or begin < 1 or not isinstance(total, int) or total < 1:
|
|
||||||
raise ValueError('begin和total参数只能是大于0的整数。')
|
|
||||||
|
|
||||||
# TODO: 支持selenium原生相对定位符
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from selenium.webdriver.support.relative_locator import RelativeBy
|
from selenium.webdriver.support.relative_locator import RelativeBy
|
||||||
except ImportError:
|
except ImportError:
|
||||||
@ -699,8 +675,7 @@ class DriverElement(DrissionElement):
|
|||||||
else: # 'near'
|
else: # 'near'
|
||||||
eles = self.page.driver.find_elements(RelativeBy({loc[0]: loc[1]}).near(self.inner_ele))
|
eles = self.page.driver.find_elements(RelativeBy({loc[0]: loc[1]}).near(self.inner_ele))
|
||||||
|
|
||||||
end = None if not total or total >= len(eles) else begin + total - 1
|
return [self.page.ele(e) for e in eles]
|
||||||
return [self.page.ele(e) for e in eles[begin - 1:end]]
|
|
||||||
|
|
||||||
except IndexError:
|
except IndexError:
|
||||||
raise ValueError('未找到元素,请检查浏览器版本,低版本的浏览器无法使用此方法。')
|
raise ValueError('未找到元素,请检查浏览器版本,低版本的浏览器无法使用此方法。')
|
||||||
@ -719,12 +694,17 @@ def make_driver_ele(page_or_ele,
|
|||||||
:return: 返回DriverElement元素或它们组成的列表
|
:return: 返回DriverElement元素或它们组成的列表
|
||||||
"""
|
"""
|
||||||
# ---------------处理定位符---------------
|
# ---------------处理定位符---------------
|
||||||
if isinstance(loc, str):
|
if isinstance(loc, (str, tuple)):
|
||||||
loc = str_to_loc(loc)
|
loc = get_loc(loc)
|
||||||
elif isinstance(loc, tuple):
|
|
||||||
loc = translate_loc(loc)
|
elif str(type(loc)).endswith('RelativeBy'):
|
||||||
|
page = page_or_ele.page if isinstance(page_or_ele, BaseElement) else page_or_ele
|
||||||
|
driver = page.driver
|
||||||
|
eles = driver.find_elements(loc)
|
||||||
|
return DriverElement(eles[0], page) if single else [DriverElement(ele, page) for ele in eles]
|
||||||
|
|
||||||
else:
|
else:
|
||||||
raise ValueError("定位符必须为str或长度为2的tuple。")
|
raise ValueError("定位符必须为str、长度为2的tuple、或RelativeBy对象。")
|
||||||
|
|
||||||
# ---------------设置 page 和 driver---------------
|
# ---------------设置 page 和 driver---------------
|
||||||
if isinstance(page_or_ele, BaseElement): # 传入DriverElement 或 ShadowRootElement
|
if isinstance(page_or_ele, BaseElement): # 传入DriverElement 或 ShadowRootElement
|
||||||
|
@ -12,7 +12,7 @@ from lxml.etree import tostring
|
|||||||
from lxml.html import HtmlElement, fromstring
|
from lxml.html import HtmlElement, fromstring
|
||||||
|
|
||||||
from .base import DrissionElement, BasePage, BaseElement
|
from .base import DrissionElement, BasePage, BaseElement
|
||||||
from .common import str_to_loc, translate_loc, get_ele_txt
|
from .common import get_ele_txt, get_loc
|
||||||
|
|
||||||
|
|
||||||
class SessionElement(DrissionElement):
|
class SessionElement(DrissionElement):
|
||||||
@ -70,15 +70,6 @@ class SessionElement(DrissionElement):
|
|||||||
"""返回未格式化处理的元素内文本"""
|
"""返回未格式化处理的元素内文本"""
|
||||||
return str(self._inner_ele.text_content())
|
return str(self._inner_ele.text_content())
|
||||||
|
|
||||||
def parent(self, level_or_loc: Union[tuple, str, int] = 1) -> 'SessionElement':
|
|
||||||
"""返回上面某一级父元素,可指定层数或用查询语法定位 \n
|
|
||||||
:param level_or_loc: 第几级父元素,或定位符
|
|
||||||
:return: SessionElement对象
|
|
||||||
"""
|
|
||||||
if isinstance(level_or_loc, int):
|
|
||||||
return self.ele(f'xpath:..{"/.." * (level_or_loc - 1)}')
|
|
||||||
# TODO: 完善用定位符时的逻辑
|
|
||||||
|
|
||||||
def attr(self, attr: str) -> Union[str, None]:
|
def attr(self, attr: str) -> Union[str, None]:
|
||||||
"""返回attribute属性值 \n
|
"""返回attribute属性值 \n
|
||||||
:param attr: 属性名
|
:param attr: 属性名
|
||||||
@ -212,10 +203,10 @@ def make_session_ele(html_or_ele: Union[str, BaseElement, BasePage],
|
|||||||
return html_or_ele
|
return html_or_ele
|
||||||
loc = ('xpath', '.')
|
loc = ('xpath', '.')
|
||||||
single = True
|
single = True
|
||||||
elif isinstance(loc, str):
|
|
||||||
loc = str_to_loc(loc)
|
elif isinstance(loc, (str, tuple)):
|
||||||
elif isinstance(loc, tuple):
|
loc = get_loc(loc)
|
||||||
loc = translate_loc(loc)
|
|
||||||
else:
|
else:
|
||||||
raise ValueError("定位符必须为str或长度为2的tuple。")
|
raise ValueError("定位符必须为str或长度为2的tuple。")
|
||||||
|
|
||||||
|
@ -10,6 +10,7 @@ from typing import Union, Any, Tuple, List
|
|||||||
from selenium.webdriver.remote.webelement import WebElement
|
from selenium.webdriver.remote.webelement import WebElement
|
||||||
|
|
||||||
from .base import BaseElement
|
from .base import BaseElement
|
||||||
|
from .common import get_loc
|
||||||
from .driver_element import make_driver_ele, DriverElement
|
from .driver_element import make_driver_ele, DriverElement
|
||||||
from .session_element import make_session_ele
|
from .session_element import make_session_ele
|
||||||
|
|
||||||
@ -45,28 +46,48 @@ class ShadowRootElement(BaseElement):
|
|||||||
"""返回内部的html文本"""
|
"""返回内部的html文本"""
|
||||||
return self.inner_ele.get_attribute('innerHTML')
|
return self.inner_ele.get_attribute('innerHTML')
|
||||||
|
|
||||||
def parent(self, level: int = 1) -> DriverElement:
|
def parent(self, level_or_loc: Union[str, int] = 1) -> DriverElement:
|
||||||
"""返回上面第level级父元素 \n
|
"""返回上面某一级父元素,可指定层数或用查询语法定位 \n
|
||||||
:param level: 第几级父元素
|
:param level_or_loc: 第几级父元素,或定位符
|
||||||
:return: DriverElement对象
|
:return: DriverElement对象
|
||||||
"""
|
"""
|
||||||
if level == 1:
|
if isinstance(level_or_loc, int):
|
||||||
return self.parent_ele
|
loc = f'xpath:./ancestor-or-self::*[{level_or_loc}]'
|
||||||
|
|
||||||
|
elif isinstance(level_or_loc, (tuple, str)):
|
||||||
|
loc = get_loc(level_or_loc, True)
|
||||||
|
|
||||||
|
if loc[0] == 'css selector':
|
||||||
|
raise ValueError('此css selector语法不受支持,请换成xpath。')
|
||||||
|
|
||||||
|
loc = f'xpath:./ancestor-or-self::{loc[1].lstrip(". / ")}'
|
||||||
|
|
||||||
else:
|
else:
|
||||||
loc = 'xpath', f'.{"/.." * (level - 1)}'
|
raise TypeError('level_or_loc参数只能是tuple、int或str。')
|
||||||
return self.parent_ele.ele(loc, timeout=0.1)
|
|
||||||
|
|
||||||
def nexts(self, total: int = None, begin: int = 1) -> DriverElement:
|
return self.parent_ele.ele(loc, timeout=0)
|
||||||
"""返回后面若干个兄弟元素或节点组成的列表,total为None返回所有 \n
|
|
||||||
:param total: 获取多少个元素或节点
|
def next(self, index: int = 1, filter_loc: Union[tuple, str] = '') -> DriverElement:
|
||||||
:param begin: 从第几个开始获取,从1起
|
"""返回后面的一个兄弟元素,可用查询语法筛选,可指定返回筛选结果的第几个 \n
|
||||||
|
:param index: 第几个查询结果元素
|
||||||
|
:param filter_loc: 用于筛选元素的查询语法
|
||||||
:return: DriverElement对象
|
:return: DriverElement对象
|
||||||
"""
|
"""
|
||||||
loc = 'css selector', f':nth-child(n)'
|
nodes = self.nexts(filter_loc=filter_loc)
|
||||||
eles = self.parent_ele.eles(loc, timeout=0.1)
|
return nodes[index - 1] if nodes else None
|
||||||
end = None if not total or total >= len(eles) else begin + total - 1
|
|
||||||
|
|
||||||
return eles[begin - 1:end]
|
def nexts(self, filter_loc: Union[tuple, str] = '') -> List[DriverElement]:
|
||||||
|
"""返回后面所有兄弟元素或节点组成的列表 \n
|
||||||
|
:param filter_loc: 用于筛选元素的查询语法
|
||||||
|
:return: DriverElement对象组成的列表
|
||||||
|
"""
|
||||||
|
loc = get_loc(filter_loc, True)
|
||||||
|
if loc[0] == 'css selector':
|
||||||
|
raise ValueError('此css selector语法不受支持,请换成xpath。')
|
||||||
|
|
||||||
|
loc = loc[1].lstrip('./')
|
||||||
|
xpath = f'xpath:./{loc}'
|
||||||
|
return self.parent_ele.eles(xpath, timeout=0.1)
|
||||||
|
|
||||||
def ele(self,
|
def ele(self,
|
||||||
loc_or_str: Union[Tuple[str, str], str],
|
loc_or_str: Union[Tuple[str, str], str],
|
||||||
@ -114,14 +135,17 @@ class ShadowRootElement(BaseElement):
|
|||||||
"""
|
"""
|
||||||
if isinstance(loc_or_str, str):
|
if isinstance(loc_or_str, str):
|
||||||
loc_or_str = str_to_css_loc(loc_or_str)
|
loc_or_str = str_to_css_loc(loc_or_str)
|
||||||
|
|
||||||
elif isinstance(loc_or_str, tuple) and len(loc_or_str) == 2:
|
elif isinstance(loc_or_str, tuple) and len(loc_or_str) == 2:
|
||||||
if loc_or_str[0] == 'xpath':
|
if loc_or_str[0] == 'xpath':
|
||||||
raise ValueError('不支持xpath。')
|
raise ValueError('不支持xpath。')
|
||||||
|
|
||||||
else:
|
else:
|
||||||
raise ValueError('loc_or_str参数只能是tuple或str类型。')
|
raise ValueError('loc_or_str参数只能是tuple或str类型。')
|
||||||
|
|
||||||
if loc_or_str[0] == 'css selector':
|
if loc_or_str[0] == 'css selector':
|
||||||
return make_driver_ele(self, loc_or_str, single, timeout)
|
return make_driver_ele(self, loc_or_str, single, timeout)
|
||||||
|
|
||||||
elif loc_or_str[0] == 'text':
|
elif loc_or_str[0] == 'text':
|
||||||
return self._find_eles_by_text(loc_or_str[1], loc_or_str[2], loc_or_str[3], single)
|
return self._find_eles_by_text(loc_or_str[1], loc_or_str[2], loc_or_str[3], single)
|
||||||
|
|
||||||
@ -193,7 +217,7 @@ class ShadowRootElement(BaseElement):
|
|||||||
|
|
||||||
|
|
||||||
def str_to_css_loc(loc: str) -> tuple:
|
def str_to_css_loc(loc: str) -> tuple:
|
||||||
"""处理元素查找语句 \n
|
"""处理元素查找语句 \n
|
||||||
查找方式:属性、tag name及属性、文本、css selector \n
|
查找方式:属性、tag name及属性、文本、css selector \n
|
||||||
@表示属性,.表示class,#表示id,=表示精确匹配,:表示模糊匹配,无控制字符串时默认搜索该字符串 \n
|
@表示属性,.表示class,#表示id,=表示精确匹配,:表示模糊匹配,无控制字符串时默认搜索该字符串 \n
|
||||||
"""
|
"""
|
||||||
|
Loading…
x
Reference in New Issue
Block a user