mirror of
https://gitee.com/g1879/DrissionPage.git
synced 2024-12-10 04:00:23 +08:00
ele()和eles()删除show_errmsg参数;
增加__call__()方法,ele('xxx')等效于ele.ele('xxx')
This commit is contained in:
parent
1e8e606f93
commit
97c3197df4
@ -9,22 +9,25 @@ from html import unescape
|
|||||||
from typing import Union, List, Tuple
|
from typing import Union, List, Tuple
|
||||||
from urllib.parse import urlparse, urljoin, urlunparse
|
from urllib.parse import urlparse, urljoin, urlunparse
|
||||||
|
|
||||||
from lxml.etree import tostring, HTML
|
from cssselect import SelectorSyntaxError
|
||||||
from lxml.html import HtmlElement
|
from lxml.etree import tostring, HTML, _Element, XPathEvalError
|
||||||
|
|
||||||
from .common import DrissionElement, get_loc_from_str, translate_loc_to_xpath
|
from .common import DrissionElement, get_loc_from_str, translate_loc_to_xpath
|
||||||
|
|
||||||
|
|
||||||
class SessionElement(DrissionElement):
|
class SessionElement(DrissionElement):
|
||||||
"""session模式的元素对象,包装了一个Element对象,并封装了常用功能"""
|
"""session模式的元素对象,包装了一个lxml的Element对象,并封装了常用功能"""
|
||||||
|
|
||||||
def __init__(self, ele: HtmlElement, page=None):
|
def __init__(self, ele: _Element, page=None):
|
||||||
super().__init__(ele, page)
|
super().__init__(ele, page)
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
attrs = [f"{attr}='{self.attrs[attr]}'" for attr in self.attrs]
|
attrs = [f"{attr}='{self.attrs[attr]}'" for attr in self.attrs]
|
||||||
return f'<SessionElement {self.tag} {" ".join(attrs)}>'
|
return f'<SessionElement {self.tag} {" ".join(attrs)}>'
|
||||||
|
|
||||||
|
def __call__(self, loc_or_str: Union[Tuple[str, str], str], mode: str = 'single'):
|
||||||
|
return self.ele(loc_or_str, mode)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def attrs(self) -> dict:
|
def attrs(self) -> dict:
|
||||||
"""返回元素所有属性及值"""
|
"""返回元素所有属性及值"""
|
||||||
@ -116,10 +119,10 @@ class SessionElement(DrissionElement):
|
|||||||
else:
|
else:
|
||||||
raise ValueError("Argument mode can only be 'node' or 'ele'.")
|
raise ValueError("Argument mode can only be 'node' or 'ele'.")
|
||||||
|
|
||||||
e = self.ele(f'xpath:./following-sibling::{node_txt}[{num}]', show_errmsg=False)
|
e = self.ele(f'xpath:./following-sibling::{node_txt}[{num}]')
|
||||||
while e == '\n':
|
while e == '\n':
|
||||||
num += 1
|
num += 1
|
||||||
e = self.ele(f'xpath:./following-sibling::{node_txt}[{num}]', show_errmsg=False)
|
e = self.ele(f'xpath:./following-sibling::{node_txt}[{num}]')
|
||||||
|
|
||||||
return e
|
return e
|
||||||
|
|
||||||
@ -136,14 +139,14 @@ class SessionElement(DrissionElement):
|
|||||||
else:
|
else:
|
||||||
raise ValueError("Argument mode can only be 'node' or 'ele'.")
|
raise ValueError("Argument mode can only be 'node' or 'ele'.")
|
||||||
|
|
||||||
e = self.ele(f'xpath:./preceding-sibling::{node_txt}[{num}]', show_errmsg=False)
|
e = self.ele(f'xpath:./preceding-sibling::{node_txt}[{num}]')
|
||||||
while e == '\n':
|
while e == '\n':
|
||||||
num += 1
|
num += 1
|
||||||
e = self.ele(f'xpath:./preceding-sibling::{node_txt}[{num}]', show_errmsg=False)
|
e = self.ele(f'xpath:./preceding-sibling::{node_txt}[{num}]')
|
||||||
|
|
||||||
return e
|
return e
|
||||||
|
|
||||||
def ele(self, loc_or_str: Union[Tuple[str, str], str], mode: str = None, show_errmsg: bool = False):
|
def ele(self, loc_or_str: Union[Tuple[str, str], str], mode: str = None):
|
||||||
"""返回当前元素下级符合条件的子元素,默认返回第一个 \n
|
"""返回当前元素下级符合条件的子元素,默认返回第一个 \n
|
||||||
示例: \n
|
示例: \n
|
||||||
- 用loc元组查找: \n
|
- 用loc元组查找: \n
|
||||||
@ -166,7 +169,6 @@ class SessionElement(DrissionElement):
|
|||||||
ele.ele('css:div.ele_class') - 返回第一个符合css selector的子元素 \n
|
ele.ele('css:div.ele_class') - 返回第一个符合css selector的子元素 \n
|
||||||
:param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串
|
:param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串
|
||||||
:param mode: 'single' 或 'all‘,对应查找一个或全部
|
:param mode: 'single' 或 'all‘,对应查找一个或全部
|
||||||
:param show_errmsg: 出现异常时是否打印信息
|
|
||||||
:return: SessionElement对象
|
:return: SessionElement对象
|
||||||
"""
|
"""
|
||||||
if isinstance(loc_or_str, (str, tuple)):
|
if isinstance(loc_or_str, (str, tuple)):
|
||||||
@ -186,6 +188,7 @@ class SessionElement(DrissionElement):
|
|||||||
loc_str = loc_str if loc_str.startswith(('.', '/')) else f'.//{loc_str}'
|
loc_str = loc_str if loc_str.startswith(('.', '/')) else f'.//{loc_str}'
|
||||||
loc_str = loc_str if loc_str.startswith('.') else f'.{loc_str}'
|
loc_str = loc_str if loc_str.startswith('.') else f'.{loc_str}'
|
||||||
loc_str = f'{bracket}{loc_str}'
|
loc_str = f'{bracket}{loc_str}'
|
||||||
|
|
||||||
else: # css selector
|
else: # css selector
|
||||||
if loc_or_str[1][0].startswith('>'):
|
if loc_or_str[1][0].startswith('>'):
|
||||||
loc_str = f'{self.css_path}{loc_or_str[1]}'
|
loc_str = f'{self.css_path}{loc_or_str[1]}'
|
||||||
@ -194,9 +197,9 @@ class SessionElement(DrissionElement):
|
|||||||
loc_str = loc_or_str[1]
|
loc_str = loc_or_str[1]
|
||||||
|
|
||||||
loc_or_str = loc_or_str[0], loc_str
|
loc_or_str = loc_or_str[0], loc_str
|
||||||
return execute_session_find(element, loc_or_str, mode, show_errmsg)
|
return execute_session_find(element, loc_or_str, mode)
|
||||||
|
|
||||||
def eles(self, loc_or_str: Union[Tuple[str, str], str], show_errmsg: bool = False):
|
def eles(self, loc_or_str: Union[Tuple[str, str], str]):
|
||||||
"""返回当前元素下级所有符合条件的子元素 \n
|
"""返回当前元素下级所有符合条件的子元素 \n
|
||||||
示例: \n
|
示例: \n
|
||||||
- 用loc元组查找: \n
|
- 用loc元组查找: \n
|
||||||
@ -218,10 +221,9 @@ class SessionElement(DrissionElement):
|
|||||||
ele.eles('xpath://div[@class="ele_class"]') - 返回所有符合xpath的子元素 \n
|
ele.eles('xpath://div[@class="ele_class"]') - 返回所有符合xpath的子元素 \n
|
||||||
ele.eles('css:div.ele_class') - 返回所有符合css selector的子元素 \n
|
ele.eles('css:div.ele_class') - 返回所有符合css selector的子元素 \n
|
||||||
:param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串
|
:param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串
|
||||||
:param show_errmsg: 出现异常时是否打印信息
|
|
||||||
:return: SessionElement对象组成的列表
|
:return: SessionElement对象组成的列表
|
||||||
"""
|
"""
|
||||||
return self.ele(loc_or_str, mode='all', show_errmsg=show_errmsg)
|
return self.ele(loc_or_str, mode='all')
|
||||||
|
|
||||||
def attr(self, attr: str) -> Union[str, None]:
|
def attr(self, attr: str) -> Union[str, None]:
|
||||||
"""返回属性值 \n
|
"""返回属性值 \n
|
||||||
@ -279,19 +281,17 @@ class SessionElement(DrissionElement):
|
|||||||
|
|
||||||
def execute_session_find(page_or_ele,
|
def execute_session_find(page_or_ele,
|
||||||
loc: Tuple[str, str],
|
loc: Tuple[str, str],
|
||||||
mode: str = 'single',
|
mode: str = 'single', ) -> Union[SessionElement, List[SessionElement or str], None]:
|
||||||
show_errmsg: bool = False) -> Union[SessionElement, List[SessionElement or str]]:
|
|
||||||
"""执行session模式元素的查找 \n
|
"""执行session模式元素的查找 \n
|
||||||
页面查找元素及元素查找下级元素皆使用此方法 \n
|
页面查找元素及元素查找下级元素皆使用此方法 \n
|
||||||
:param page_or_ele: SessionPage对象或SessionElement对象
|
:param page_or_ele: SessionPage对象或SessionElement对象
|
||||||
:param loc: 元素定位元组
|
:param loc: 元素定位元组
|
||||||
:param mode: 'single' 或 'all',对应获取第一个或全部
|
:param mode: 'single' 或 'all',对应获取第一个或全部
|
||||||
:param show_errmsg: 出现异常时是否显示错误信息
|
|
||||||
:return: 返回SessionElement元素或列表
|
:return: 返回SessionElement元素或列表
|
||||||
"""
|
"""
|
||||||
mode = mode or 'single'
|
mode = mode or 'single'
|
||||||
if mode not in ['single', 'all']:
|
if mode not in ['single', 'all']:
|
||||||
raise ValueError("Argument mode can only be 'single' or 'all'.")
|
raise ValueError(f"Argument mode can only be 'single' or 'all', not '{mode}'.")
|
||||||
|
|
||||||
if isinstance(page_or_ele, SessionElement):
|
if isinstance(page_or_ele, SessionElement):
|
||||||
page = page_or_ele.page
|
page = page_or_ele.page
|
||||||
@ -301,25 +301,35 @@ def execute_session_find(page_or_ele,
|
|||||||
page_or_ele = get_HtmlElement(page_or_ele.response.text)
|
page_or_ele = get_HtmlElement(page_or_ele.response.text)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
# 用lxml内置方法获取lxml的元素对象列表
|
||||||
if loc[0] == 'xpath':
|
if loc[0] == 'xpath':
|
||||||
ele = page_or_ele.xpath(loc[1])
|
ele = page_or_ele.xpath(loc[1])
|
||||||
else: # 用css selector获取
|
else: # 用css selector获取
|
||||||
ele = page_or_ele.cssselect(loc[1])
|
ele = page_or_ele.cssselect(loc[1])
|
||||||
|
|
||||||
|
# 把lxml元素对象包装成SessionElement对象并按需要返回第一个或全部
|
||||||
if mode == 'single':
|
if mode == 'single':
|
||||||
ele = ele[0] if ele else None
|
ele = ele[0] if ele else None
|
||||||
return SessionElement(ele, page) if isinstance(ele, HtmlElement) else unescape(ele).replace('\xa0', ' ')
|
if isinstance(ele, _Element):
|
||||||
|
return SessionElement(ele, page)
|
||||||
|
elif isinstance(ele, str):
|
||||||
|
return unescape(ele).replace('\xa0', ' ')
|
||||||
|
else:
|
||||||
|
return None
|
||||||
elif mode == 'all':
|
elif mode == 'all':
|
||||||
ele = filter(lambda x: x != '\n', ele) # 去除元素间换行符
|
# 去除元素间换行符
|
||||||
ele = map(lambda x: unescape(x).replace('\xa0', ' ') if isinstance(x, str) else x, ele) # 替换空格
|
ele = filter(lambda x: x != '\n', ele)
|
||||||
return [SessionElement(e, page) if isinstance(e, HtmlElement) else e for e in ele]
|
# 处理空格
|
||||||
except:
|
ele = map(lambda x: unescape(x).replace('\xa0', ' ') if isinstance(x, str) else x, ele)
|
||||||
if show_errmsg:
|
return [SessionElement(e, page) if isinstance(e, _Element) else e for e in ele]
|
||||||
print('Element(s) not found.', loc)
|
|
||||||
raise
|
except XPathEvalError:
|
||||||
return [] if mode == 'all' else None
|
raise SyntaxError('Invalid xpath syntax.', loc)
|
||||||
|
|
||||||
|
except SelectorSyntaxError:
|
||||||
|
raise SyntaxError('Invalid css selector syntax.', loc)
|
||||||
|
|
||||||
|
|
||||||
def get_HtmlElement(html: str) -> HtmlElement:
|
def get_HtmlElement(html: str) -> _Element:
|
||||||
"""从html文本生成元素对象"""
|
"""从html文本生成lxml的元素对象"""
|
||||||
return HTML(html)
|
return HTML(html)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user