mirror of
https://gitee.com/g1879/DrissionPage.git
synced 2024-12-10 04:00:23 +08:00
完善xpath处理,待测试
This commit is contained in:
parent
04112d95e0
commit
7cca3f8d06
@ -37,8 +37,11 @@ class SessionElement(DrissionElement):
|
|||||||
return unescape(self._inner_ele.text).replace('\xa0', ' ')
|
return unescape(self._inner_ele.text).replace('\xa0', ' ')
|
||||||
|
|
||||||
def texts(self, text_node_only: bool = False) -> List[str]:
|
def texts(self, text_node_only: bool = False) -> List[str]:
|
||||||
# TODO: 待补充
|
nodes = self.eles('xpath:./*/node()')
|
||||||
return []
|
if text_node_only:
|
||||||
|
return [x for x in nodes if isinstance(x, str)]
|
||||||
|
else:
|
||||||
|
return [x if isinstance(x, str) else x.text for x in nodes]
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def html(self) -> str:
|
def html(self) -> str:
|
||||||
@ -246,13 +249,17 @@ def execute_session_find(page_or_ele: BaseParser,
|
|||||||
try:
|
try:
|
||||||
ele = None
|
ele = None
|
||||||
if loc_by == 'xpath':
|
if loc_by == 'xpath':
|
||||||
if 'PyQuery' in str(type(page_or_ele.element)) or '()' in loc_str.split('[')[0]:
|
if 'PyQuery' in str(type(page_or_ele.element)):
|
||||||
# 从页面查找。后面的条件是处理./node()、./text()等xpath语句时用的
|
# or '()' in loc_str.split('[')[0]\
|
||||||
|
# or loc_str.split('/')[-1].startswith('@'):
|
||||||
|
# 从页面查找。第二个条件处理./node()、./text()等xpath语句,第三个条件处理获取属性的语句
|
||||||
ele = page_or_ele.xpath(loc_str)
|
ele = page_or_ele.xpath(loc_str)
|
||||||
elif 'HtmlElement' in str(type(page_or_ele.element)):
|
elif 'HtmlElement' in str(type(page_or_ele.element)):
|
||||||
# 从元素查找。Q_Q忘记了为什么要这样区分
|
# 从元素查找。Q_Q忘记了为什么要这样区分
|
||||||
elements = page_or_ele.element.xpath(loc_str)
|
elements = page_or_ele.element.xpath(loc_str)
|
||||||
ele = [Element(element=e, url=page_or_ele.url) for e in elements]
|
ele = [Element(element=e, url=page_or_ele.url) for e in elements]
|
||||||
|
if not ele:
|
||||||
|
ele = page_or_ele.xpath(loc_str)
|
||||||
else: # 用css selector获取
|
else: # 用css selector获取
|
||||||
ele = page_or_ele.find(loc_str)
|
ele = page_or_ele.find(loc_str)
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user