d模式使用js通过evaluate方法处理xpath,放弃使用selenium原生的方法,以支持用xpath直接获取文本节点、元素属性。

This commit is contained in:
g1879 2020-11-03 11:43:51 +08:00
parent 00c9acc173
commit ba8d771545

View File

@ -8,14 +8,15 @@ import re
from html import unescape from html import unescape
from pathlib import Path from pathlib import Path
from time import sleep from time import sleep
from typing import Union, List, Any from typing import Union, List, Any, Tuple
from selenium.common.exceptions import InvalidSelectorException, TimeoutException from selenium.common.exceptions import TimeoutException, JavascriptException, InvalidElementStateException
from selenium.webdriver.chrome.webdriver import WebDriver from selenium.webdriver.chrome.webdriver import WebDriver
from selenium.webdriver.remote.webelement import WebElement from selenium.webdriver.remote.webelement import WebElement
from selenium.webdriver.support import expected_conditions as ec from selenium.webdriver.support import expected_conditions as ec
from selenium.webdriver.support.wait import WebDriverWait
from .common import DrissionElement, get_loc_from_str, get_available_file_name, WebDriverWaitPlus from .common import DrissionElement, get_loc_from_str, get_available_file_name, translate_loc_to_xpath
class DriverElement(DrissionElement): class DriverElement(DrissionElement):
@ -67,8 +68,7 @@ class DriverElement(DrissionElement):
if text_node_only: if text_node_only:
return self.eles('xpath:./text()') return self.eles('xpath:./text()')
else: else:
nodes = self.eles('xpath:./node()') return list(map(lambda x: x if isinstance(x, str) else x.text, self.eles('xpath:./node()')))
return list(map(lambda x: x if isinstance(x, str) else x.text, nodes))
@property @property
def html(self) -> str: def html(self) -> str:
@ -205,7 +205,7 @@ class DriverElement(DrissionElement):
return self.inner_ele.get_attribute(attr) return self.inner_ele.get_attribute(attr)
def ele(self, def ele(self,
loc_or_str: Union[tuple, str], loc_or_str: Union[Tuple[str, str], str],
mode: str = None, mode: str = None,
timeout: float = None, timeout: float = None,
show_errmsg: bool = False): show_errmsg: bool = False):
@ -235,10 +235,13 @@ class DriverElement(DrissionElement):
:param show_errmsg: 出现异常时是否打印信息 :param show_errmsg: 出现异常时是否打印信息
:return: DriverElement对象 :return: DriverElement对象
""" """
if isinstance(loc_or_str, (str, tuple)):
if isinstance(loc_or_str, str): if isinstance(loc_or_str, str):
loc_or_str = get_loc_from_str(loc_or_str) loc_or_str = get_loc_from_str(loc_or_str)
elif isinstance(loc_or_str, tuple) and len(loc_or_str) == 2: else:
pass if len(loc_or_str) != 2:
raise ValueError("Len of loc_or_str must be 2 when it's a tuple.")
loc_or_str = translate_loc_to_xpath(loc_or_str)
else: else:
raise ValueError('Argument loc_or_str can only be tuple or str.') raise ValueError('Argument loc_or_str can only be tuple or str.')
@ -258,7 +261,7 @@ class DriverElement(DrissionElement):
return execute_driver_find(self.inner_ele, loc_or_str, mode, show_errmsg, timeout) return execute_driver_find(self.inner_ele, loc_or_str, mode, show_errmsg, timeout)
def eles(self, def eles(self,
loc_or_str: Union[tuple, str], loc_or_str: Union[Tuple[str, str], str],
timeout: float = None, timeout: float = None,
show_errmsg: bool = False): show_errmsg: bool = False):
"""返回当前元素下级所有符合条件的子元素 \n """返回当前元素下级所有符合条件的子元素 \n
@ -286,8 +289,6 @@ class DriverElement(DrissionElement):
:param show_errmsg: 出现异常时是否打印信息 :param show_errmsg: 出现异常时是否打印信息
:return: DriverElement对象组成的列表 :return: DriverElement对象组成的列表
""" """
if not isinstance(loc_or_str, (tuple, str)):
raise TypeError('Type of loc_or_str can only be tuple or str.')
return self.ele(loc_or_str, mode='all', show_errmsg=show_errmsg, timeout=timeout) return self.ele(loc_or_str, mode='all', show_errmsg=show_errmsg, timeout=timeout)
# -----------------以下为driver独占------------------- # -----------------以下为driver独占-------------------
@ -480,7 +481,7 @@ class DriverElement(DrissionElement):
def execute_driver_find(ele_or_driver: Union[WebElement, WebDriver], def execute_driver_find(ele_or_driver: Union[WebElement, WebDriver],
loc: tuple, loc: Tuple[str, str],
mode: str = 'single', mode: str = 'single',
show_errmsg: bool = False, show_errmsg: bool = False,
timeout: float = 10) -> Union[DriverElement, List[DriverElement or str]]: timeout: float = 10) -> Union[DriverElement, List[DriverElement or str]]:
@ -498,21 +499,19 @@ def execute_driver_find(ele_or_driver: Union[WebElement, WebDriver],
raise ValueError("Argument mode can only be 'single' or 'all'.") raise ValueError("Argument mode can only be 'single' or 'all'.")
try: try:
wait = WebDriverWaitPlus(ele_or_driver, timeout=timeout) wait = WebDriverWait(ele_or_driver, timeout=timeout)
if loc[0] == 'xpath':
return wait.until(ElementsByXpath(ele_or_driver, loc[1], mode, timeout))
else:
if mode == 'single': if mode == 'single':
return DriverElement(wait.until(ec.presence_of_element_located(loc))) return DriverElement(wait.until(ec.presence_of_element_located(loc)), timeout)
elif mode == 'all': elif mode == 'all':
eles = wait.until(ec.presence_of_all_elements_located(loc)) eles = wait.until(ec.presence_of_all_elements_located(loc))
return [DriverElement(ele) for ele in eles] return [DriverElement(ele, timeout) for ele in eles]
except InvalidSelectorException as e:
if loc[0] == 'xpath' and 'It should be an element.' in str(e): except InvalidElementStateException:
return _get_elements(ele_or_driver, loc[1], mode) raise ValueError('Query statement error.', loc)
else:
if show_errmsg:
print('Query statement error.', loc)
raise e
else:
return [] if mode == 'all' else None
except TimeoutException: except TimeoutException:
if show_errmsg: if show_errmsg:
print('Element(s) not found.', loc) print('Element(s) not found.', loc)
@ -520,24 +519,35 @@ def execute_driver_find(ele_or_driver: Union[WebElement, WebDriver],
return [] if mode == 'all' else None return [] if mode == 'all' else None
def _get_elements( class ElementsByXpath(object):
"""用js通过xpath获取元素、节点或属性与WebDriverWait配合使用"""
def __init__(self,
ele_or_driver: Union[WebDriver, WebElement], ele_or_driver: Union[WebDriver, WebElement],
xpath: str = None, xpath: str = None,
mode='all') -> Union[str, DriverElement, None, List[str or DriverElement]]: mode: str = 'all',
"""使用js通过xpath获取元素或文本、属性值。 \n timeout: float = 10):
因selenium不支持获取属性或文本的xpath语句故使用此方法 \n self.ele_or_driver = ele_or_driver
:param ele_or_driver: selenium的driver或元素对象 self.xpath = xpath
:param xpath: xpath语句 self.mode = mode
:param mode: 'all' 'single self.timeout = timeout
:return: single模式返回元素或字符串all模式返回它们组成的列表
""" def __call__(self,
ele_or_driver: Union[WebDriver, WebElement],
) -> Union[str, DriverElement, None, List[str or DriverElement]]:
driver, the_node = (ele_or_driver, 'document') if isinstance(ele_or_driver, WebDriver) \ driver, the_node = (ele_or_driver, 'document') if isinstance(ele_or_driver, WebDriver) \
else (ele_or_driver.parent, ele_or_driver) else (ele_or_driver.parent, ele_or_driver)
def get_nodes(node=None, xpath_txt=None, type_txt='7'): def get_nodes(node=None, xpath_txt=None, type_txt='7'):
"""用js通过xpath获取元素、节点或属性
:param node: 'document' 元素对象
:param xpath_txt: xpath语句
:param type_txt: resultType,参考https://developer.mozilla.org/zh-CN/docs/Web/API/Document/evaluate
:return:
"""
node_txt = 'document' if not node or node == 'document' else 'arguments[0]' node_txt = 'document' if not node or node == 'document' else 'arguments[0]'
for_txt = '' for_txt = ''
if type_txt == '9': if type_txt == '9': # 获取第一个元素、节点或属性
return_txt = ''' return_txt = '''
if(e.singleNodeValue.constructor.name=="Text"){return e.singleNodeValue.data;} if(e.singleNodeValue.constructor.name=="Text"){return e.singleNodeValue.data;}
else if(e.singleNodeValue.constructor.name=="Attr"){return e.singleNodeValue.nodeValue;} else if(e.singleNodeValue.constructor.name=="Attr"){return e.singleNodeValue.nodeValue;}
@ -547,7 +557,7 @@ def _get_elements(
return_txt = 'return e.stringValue;' return_txt = 'return e.stringValue;'
elif type_txt == '1': elif type_txt == '1':
return_txt = 'return e.numberValue;' return_txt = 'return e.numberValue;'
elif type_txt == '7': elif type_txt == '7': # 按顺序获取所有元素、节点或属性
for_txt = """ for_txt = """
var a=new Array(); var a=new Array();
for(var i = 0; i <e.snapshotLength ; i++){ for(var i = 0; i <e.snapshotLength ; i++){
@ -566,12 +576,13 @@ def _get_elements(
""" """
return driver.execute_script(js, node) return driver.execute_script(js, node)
if mode == 'single': if self.mode == 'single':
try: try:
e = get_nodes(the_node, xpath_txt=xpath, type_txt='9') e = get_nodes(the_node, xpath_txt=self.xpath, type_txt='9')
return DriverElement(e) if isinstance(e, WebElement) else e return DriverElement(e, self.timeout) if isinstance(e, WebElement) else e
except: except JavascriptException: # 找不到目标时
return None return None
elif mode == 'all':
e = get_nodes(the_node, xpath_txt=xpath) elif self.mode == 'all':
return list(map(lambda x: DriverElement(x) if isinstance(x, WebElement) else x, e)) e = get_nodes(the_node, xpath_txt=self.xpath)
return list(map(lambda x: DriverElement(x, self.timeout) if isinstance(x, WebElement) else x, e))