mirror of
https://gitee.com/g1879/DrissionPage.git
synced 2024-12-10 04:00:23 +08:00
!7 v1.7.1
- d模式如指定了调试端口,可自动启动浏览器进程并接入 - 去除对cssselect库依赖 - 提高查找元素效率 - 调整获取元素xpath和css_path逻辑
This commit is contained in:
commit
4d6ca7f699
@ -20,9 +20,8 @@ from .common import DrissionElement, str_to_loc, get_available_file_name, transl
|
||||
class DriverElement(DrissionElement):
|
||||
"""driver模式的元素对象,包装了一个WebElement对象,并封装了常用功能"""
|
||||
|
||||
def __init__(self, ele: WebElement, page=None, timeout: float = 10):
|
||||
def __init__(self, ele: WebElement, page=None):
|
||||
super().__init__(ele, page)
|
||||
self.timeout = timeout
|
||||
|
||||
def __repr__(self):
|
||||
attrs = [f"{attr}='{self.attrs[attr]}'" for attr in self.attrs]
|
||||
@ -39,7 +38,7 @@ class DriverElement(DrissionElement):
|
||||
:param timeout: 超时时间
|
||||
:return: DriverElement对象
|
||||
"""
|
||||
return self.ele(loc_or_str, mode, timeout or self.timeout)
|
||||
return self.ele(loc_or_str, mode, timeout)
|
||||
|
||||
# -----------------共有属性-------------------
|
||||
@property
|
||||
@ -235,7 +234,6 @@ class DriverElement(DrissionElement):
|
||||
if loc_or_str[0] == 'css selector' and loc_or_str[1].lstrip().startswith('>'):
|
||||
loc_str = f'{self.css_path}{loc_or_str[1]}'
|
||||
|
||||
timeout = timeout or self.timeout
|
||||
loc_or_str = loc_or_str[0], loc_str
|
||||
|
||||
return execute_driver_find(self, loc_or_str, mode, timeout)
|
||||
@ -489,7 +487,7 @@ class DriverElement(DrissionElement):
|
||||
"""返获取css路径或xpath路径"""
|
||||
if mode == 'xpath':
|
||||
txt1 = 'var tag = el.nodeName.toLowerCase();'
|
||||
txt2 = '''return '//' + tag + '[@id="' + el.id + '"]' + path;'''
|
||||
# txt2 = '''return '//' + tag + '[@id="' + el.id + '"]' + path;'''
|
||||
txt3 = ''' && sib.nodeName.toLowerCase()==tag'''
|
||||
txt4 = '''
|
||||
if(nth>1){path = '/' + tag + '[' + nth + ']' + path;}
|
||||
@ -497,7 +495,7 @@ class DriverElement(DrissionElement):
|
||||
txt5 = '''return path;'''
|
||||
elif mode == 'css':
|
||||
txt1 = ''
|
||||
txt2 = '''return '#' + el.id + path;'''
|
||||
# txt2 = '''return '#' + el.id + path;'''
|
||||
txt3 = ''
|
||||
txt4 = '''path = '>' + ":nth-child(" + nth + ")" + path;'''
|
||||
txt5 = '''return path.substr(1);'''
|
||||
@ -510,16 +508,13 @@ class DriverElement(DrissionElement):
|
||||
var path = '';
|
||||
while (el.nodeType === Node.ELEMENT_NODE) {
|
||||
''' + txt1 + '''
|
||||
if (el.id) {
|
||||
''' + txt2 + '''
|
||||
} else {
|
||||
var sib = el, nth = 0;
|
||||
while (sib) {
|
||||
if(sib.nodeType === Node.ELEMENT_NODE''' + txt3 + '''){nth += 1;}
|
||||
sib = sib.previousSibling;
|
||||
}
|
||||
''' + txt4 + '''
|
||||
}
|
||||
|
||||
el = el.parentNode;
|
||||
}
|
||||
''' + txt5 + '''
|
||||
@ -567,7 +562,7 @@ class DriverElement(DrissionElement):
|
||||
def execute_driver_find(page_or_ele,
|
||||
loc: Tuple[str, str],
|
||||
mode: str = 'single',
|
||||
timeout: float = 10) -> Union[DriverElement, List[DriverElement], str, None]:
|
||||
timeout: float = None) -> Union[DriverElement, List[DriverElement], str, None]:
|
||||
"""执行driver模式元素的查找 \n
|
||||
页面查找元素及元素查找下级元素皆使用此方法 \n
|
||||
:param page_or_ele: DriverPage对象或DriverElement对象
|
||||
@ -588,15 +583,19 @@ def execute_driver_find(page_or_ele,
|
||||
driver = page_or_ele.driver
|
||||
|
||||
try:
|
||||
wait = WebDriverWait(driver, timeout=timeout)
|
||||
if timeout and timeout != page.timeout:
|
||||
wait = WebDriverWait(driver, timeout=timeout)
|
||||
else:
|
||||
wait = page.wait
|
||||
|
||||
if loc[0] == 'xpath':
|
||||
return wait.until(ElementsByXpath(page, loc[1], mode, timeout))
|
||||
else:
|
||||
if mode == 'single':
|
||||
return DriverElement(wait.until(ec.presence_of_element_located(loc)), page, timeout)
|
||||
return DriverElement(wait.until(ec.presence_of_element_located(loc)), page)
|
||||
elif mode == 'all':
|
||||
eles = wait.until(ec.presence_of_all_elements_located(loc))
|
||||
return [DriverElement(ele, page, timeout) for ele in eles]
|
||||
return [DriverElement(ele, page) for ele in eles]
|
||||
|
||||
except TimeoutException:
|
||||
return [] if mode == 'all' else None
|
||||
@ -622,8 +621,6 @@ class ElementsByXpath(object):
|
||||
|
||||
def __call__(self, ele_or_driver: Union[WebDriver, WebElement]) \
|
||||
-> Union[str, DriverElement, None, List[str or DriverElement]]:
|
||||
driver, the_node = ((ele_or_driver, 'document') if isinstance(ele_or_driver, WebDriver)
|
||||
else (ele_or_driver.parent, ele_or_driver))
|
||||
|
||||
def get_nodes(node=None, xpath_txt=None, type_txt='7'):
|
||||
"""用js通过xpath获取元素、节点或属性
|
||||
@ -669,12 +666,18 @@ class ElementsByXpath(object):
|
||||
"""
|
||||
return driver.execute_script(js, node)
|
||||
|
||||
# 把lxml元素对象包装成DriverElement对象并按需要返回第一个或全部
|
||||
if isinstance(ele_or_driver, WebDriver):
|
||||
driver, the_node = ele_or_driver, 'document'
|
||||
else:
|
||||
driver, the_node = ele_or_driver.parent, ele_or_driver
|
||||
|
||||
# 把lxml元素对象包装成DriverElement对象并按需要返回第一个或全部
|
||||
if self.mode == 'single':
|
||||
try:
|
||||
e = get_nodes(the_node, xpath_txt=self.xpath, type_txt='9')
|
||||
|
||||
if isinstance(e, WebElement):
|
||||
return DriverElement(e, self.page, self.timeout)
|
||||
return DriverElement(e, self.page)
|
||||
elif isinstance(e, str):
|
||||
return format_html(e)
|
||||
else:
|
||||
@ -685,7 +688,7 @@ class ElementsByXpath(object):
|
||||
return None
|
||||
|
||||
elif self.mode == 'all':
|
||||
return ([DriverElement(x, self.page, self.timeout) if isinstance(x, WebElement)
|
||||
return ([DriverElement(x, self.page) if isinstance(x, WebElement)
|
||||
else format_html(x)
|
||||
for x in get_nodes(the_node, xpath_txt=self.xpath)
|
||||
if x != '\n'])
|
||||
|
@ -13,6 +13,7 @@ from urllib.parse import quote
|
||||
from selenium.common.exceptions import NoAlertPresentException
|
||||
from selenium.webdriver.chrome.webdriver import WebDriver
|
||||
from selenium.webdriver.remote.webelement import WebElement
|
||||
from selenium.webdriver.support.wait import WebDriverWait
|
||||
|
||||
from .common import str_to_loc, get_available_file_name, translate_loc, format_html
|
||||
from .driver_element import DriverElement, execute_driver_find
|
||||
@ -24,9 +25,10 @@ class DriverPage(object):
|
||||
def __init__(self, driver: WebDriver, timeout: float = 10):
|
||||
"""初始化函数,接收一个WebDriver对象,用来操作网页"""
|
||||
self._driver = driver
|
||||
self.timeout = timeout
|
||||
self._timeout = timeout
|
||||
self._url = None
|
||||
self._url_available = None
|
||||
self._wait = None
|
||||
|
||||
@property
|
||||
def driver(self) -> WebDriver:
|
||||
@ -60,6 +62,22 @@ class DriverPage(object):
|
||||
"""返回网页title"""
|
||||
return self.driver.title
|
||||
|
||||
@property
|
||||
def timeout(self) -> float:
|
||||
return self._timeout
|
||||
|
||||
@timeout.setter
|
||||
def timeout(self, second: float) -> None:
|
||||
self._timeout = second
|
||||
self._wait = None
|
||||
|
||||
@property
|
||||
def wait(self) -> WebDriverWait:
|
||||
if self._wait is None:
|
||||
self._wait = WebDriverWait(self.driver, timeout=self.timeout)
|
||||
|
||||
return self._wait
|
||||
|
||||
def get_cookies(self, as_dict: bool = False) -> Union[list, dict]:
|
||||
"""返回当前网站cookies"""
|
||||
if as_dict:
|
||||
@ -165,7 +183,7 @@ class DriverPage(object):
|
||||
|
||||
# 接收到WebElement对象打包成DriverElement对象返回
|
||||
elif isinstance(loc_or_ele, WebElement):
|
||||
return DriverElement(loc_or_ele, self, self.timeout)
|
||||
return DriverElement(loc_or_ele, self)
|
||||
|
||||
# 接收到的类型不正确,抛出异常
|
||||
else:
|
||||
|
@ -336,8 +336,8 @@ class MixPage(Null, SessionPage, DriverPage):
|
||||
def ele(self,
|
||||
loc_or_ele: Union[Tuple[str, str], str, DriverElement, SessionElement, WebElement],
|
||||
mode: str = None,
|
||||
timeout: float = None) -> Union[
|
||||
DriverElement, SessionElement, str, List[SessionElement], List[DriverElement]]:
|
||||
timeout: float = None) \
|
||||
-> Union[DriverElement, SessionElement, str, List[SessionElement], List[DriverElement]]:
|
||||
"""返回页面中符合条件的元素、属性或节点文本,默认返回第一个 \n
|
||||
示例: \n
|
||||
- 接收到元素对象时: \n
|
||||
|
@ -8,8 +8,7 @@ import re
|
||||
from typing import Union, List, Tuple
|
||||
from urllib.parse import urlparse, urljoin, urlunparse
|
||||
|
||||
from cssselect import SelectorSyntaxError
|
||||
from lxml.etree import tostring, XPathEvalError
|
||||
from lxml.etree import tostring
|
||||
from lxml.html import HtmlElement, fromstring
|
||||
|
||||
from .common import DrissionElement, str_to_loc, translate_loc, format_html
|
||||
@ -285,20 +284,20 @@ class SessionElement(DrissionElement):
|
||||
ele = self
|
||||
|
||||
while ele:
|
||||
ele_id = ele.attr('id')
|
||||
# ele_id = ele.attr('id')
|
||||
|
||||
if ele_id:
|
||||
return f'#{ele_id}{path_str}' if mode == 'css' else f'//{ele.tag}[@id="{ele_id}"]{path_str}'
|
||||
# if ele_id:
|
||||
# return f'#{ele_id}{path_str}' if mode == 'css' else f'//{ele.tag}[@id="{ele_id}"]{path_str}'
|
||||
# else:
|
||||
|
||||
if mode == 'css':
|
||||
brothers = len(ele.eles(f'xpath:./preceding-sibling::*'))
|
||||
path_str = f'>:nth-child({brothers + 1}){path_str}'
|
||||
else:
|
||||
brothers = len(ele.eles(f'xpath:./preceding-sibling::{ele.tag}'))
|
||||
path_str = f'/{ele.tag}[{brothers + 1}]{path_str}' if brothers > 0 else f'/{ele.tag}{path_str}'
|
||||
|
||||
if mode == 'css':
|
||||
brothers = len(ele.eles(f'xpath:./preceding-sibling::*'))
|
||||
path_str = f'>:nth-child({brothers + 1}){path_str}'
|
||||
else:
|
||||
brothers = len(ele.eles(f'xpath:./preceding-sibling::{ele.tag}'))
|
||||
path_str = f'/{ele.tag}[{brothers + 1}]{path_str}' if brothers > 0 else f'/{ele.tag}{path_str}'
|
||||
|
||||
ele = ele.parent
|
||||
ele = ele.parent
|
||||
|
||||
return path_str[1:] if mode == 'css' else path_str
|
||||
|
||||
@ -383,8 +382,11 @@ def execute_session_find(page_or_ele,
|
||||
elif mode == 'all':
|
||||
return [SessionElement(e, page) if isinstance(e, HtmlElement) else e for e in ele if e != '\n']
|
||||
|
||||
except XPathEvalError:
|
||||
raise SyntaxError(f'Invalid xpath syntax. {loc}')
|
||||
except Exception as e:
|
||||
|
||||
except SelectorSyntaxError:
|
||||
raise SyntaxError(f'Invalid css selector syntax. {loc}')
|
||||
if 'Invalid expression' in str(e):
|
||||
raise SyntaxError(f'Invalid xpath syntax. {loc}')
|
||||
elif 'Expected selector' in str(e):
|
||||
raise SyntaxError(f'Invalid css selector syntax. {loc}')
|
||||
|
||||
raise e
|
||||
|
@ -10,10 +10,9 @@ from .driver_element import execute_driver_find, DriverElement
|
||||
|
||||
|
||||
class ShadowRootElement(DrissionElement):
|
||||
def __init__(self, inner_ele: WebElement, parent_ele: DriverElement, timeout: float = 10):
|
||||
def __init__(self, inner_ele: WebElement, parent_ele: DriverElement):
|
||||
super().__init__(inner_ele, parent_ele.page)
|
||||
self.parent_ele = parent_ele
|
||||
self.timeout = timeout
|
||||
|
||||
def __repr__(self):
|
||||
return f'<ShadowRootElement in {self.parent_ele} >'
|
||||
@ -29,7 +28,7 @@ class ShadowRootElement(DrissionElement):
|
||||
:param timeout: 超时时间
|
||||
:return: DriverElement对象
|
||||
"""
|
||||
return self.ele(loc_or_str, mode, timeout or self.timeout)
|
||||
return self.ele(loc_or_str, mode, timeout)
|
||||
|
||||
@property
|
||||
def tag(self):
|
||||
@ -107,8 +106,6 @@ class ShadowRootElement(DrissionElement):
|
||||
else:
|
||||
raise ValueError('Argument loc_or_str can only be tuple or str.')
|
||||
|
||||
timeout = timeout or self.timeout
|
||||
|
||||
if loc_or_str[0] == 'css selector':
|
||||
return execute_driver_find(self, loc_or_str, mode, timeout)
|
||||
elif loc_or_str[0] == 'text':
|
||||
@ -192,18 +189,18 @@ class ShadowRootElement(DrissionElement):
|
||||
if text == txt:
|
||||
|
||||
if mode == 'single':
|
||||
return DriverElement(ele, self.page, self.timeout)
|
||||
return DriverElement(ele, self.page)
|
||||
elif mode == 'all':
|
||||
results.append(DriverElement(ele, self.page, self.timeout))
|
||||
results.append(DriverElement(ele, self.page))
|
||||
|
||||
# 模糊匹配
|
||||
elif match == 'fuzzy':
|
||||
if text in txt:
|
||||
|
||||
if mode == 'single':
|
||||
return DriverElement(ele, self.page, self.timeout)
|
||||
return DriverElement(ele, self.page)
|
||||
elif mode == 'all':
|
||||
results.append(DriverElement(ele, self.page, self.timeout))
|
||||
results.append(DriverElement(ele, self.page))
|
||||
|
||||
return None if mode == 'single' else results
|
||||
|
||||
|
@ -1846,7 +1846,6 @@ Parameter Description:
|
||||
|
||||
- ele: WebElement- WebElement object
|
||||
- page: DriverPage- the page object where the element is located
|
||||
- timeout: float - Find the timeout of the element (it can be set separately each time the element is searched)
|
||||
|
||||
|
||||
|
||||
@ -2555,7 +2554,6 @@ Parameter Description:
|
||||
|
||||
- parent_ele: DriverElement-the element to which the shadow-root is attached
|
||||
|
||||
- timeout: float-timeout
|
||||
|
||||
|
||||
|
||||
|
@ -1,5 +1,4 @@
|
||||
selenium
|
||||
requests
|
||||
tldextract
|
||||
lxml
|
||||
cssselect
|
||||
lxml
|
Loading…
x
Reference in New Issue
Block a user