diff --git a/DrissionPage/driver_element.py b/DrissionPage/driver_element.py index 4ad440a..68c48b6 100644 --- a/DrissionPage/driver_element.py +++ b/DrissionPage/driver_element.py @@ -20,9 +20,8 @@ from .common import DrissionElement, str_to_loc, get_available_file_name, transl class DriverElement(DrissionElement): """driver模式的元素对象,包装了一个WebElement对象,并封装了常用功能""" - def __init__(self, ele: WebElement, page=None, timeout: float = 10): + def __init__(self, ele: WebElement, page=None): super().__init__(ele, page) - self.timeout = timeout def __repr__(self): attrs = [f"{attr}='{self.attrs[attr]}'" for attr in self.attrs] @@ -39,7 +38,7 @@ class DriverElement(DrissionElement): :param timeout: 超时时间 :return: DriverElement对象 """ - return self.ele(loc_or_str, mode, timeout or self.timeout) + return self.ele(loc_or_str, mode, timeout) # -----------------共有属性------------------- @property @@ -235,7 +234,6 @@ class DriverElement(DrissionElement): if loc_or_str[0] == 'css selector' and loc_or_str[1].lstrip().startswith('>'): loc_str = f'{self.css_path}{loc_or_str[1]}' - timeout = timeout or self.timeout loc_or_str = loc_or_str[0], loc_str return execute_driver_find(self, loc_or_str, mode, timeout) @@ -489,7 +487,7 @@ class DriverElement(DrissionElement): """返获取css路径或xpath路径""" if mode == 'xpath': txt1 = 'var tag = el.nodeName.toLowerCase();' - txt2 = '''return '//' + tag + '[@id="' + el.id + '"]' + path;''' + # txt2 = '''return '//' + tag + '[@id="' + el.id + '"]' + path;''' txt3 = ''' && sib.nodeName.toLowerCase()==tag''' txt4 = ''' if(nth>1){path = '/' + tag + '[' + nth + ']' + path;} @@ -497,7 +495,7 @@ class DriverElement(DrissionElement): txt5 = '''return path;''' elif mode == 'css': txt1 = '' - txt2 = '''return '#' + el.id + path;''' + # txt2 = '''return '#' + el.id + path;''' txt3 = '' txt4 = '''path = '>' + ":nth-child(" + nth + ")" + path;''' txt5 = '''return path.substr(1);''' @@ -510,16 +508,13 @@ class DriverElement(DrissionElement): var path = ''; while (el.nodeType === Node.ELEMENT_NODE) { ''' + txt1 + ''' - if (el.id) { - ''' + txt2 + ''' - } else { var sib = el, nth = 0; while (sib) { if(sib.nodeType === Node.ELEMENT_NODE''' + txt3 + '''){nth += 1;} sib = sib.previousSibling; } ''' + txt4 + ''' - } + el = el.parentNode; } ''' + txt5 + ''' @@ -567,7 +562,7 @@ class DriverElement(DrissionElement): def execute_driver_find(page_or_ele, loc: Tuple[str, str], mode: str = 'single', - timeout: float = 10) -> Union[DriverElement, List[DriverElement], str, None]: + timeout: float = None) -> Union[DriverElement, List[DriverElement], str, None]: """执行driver模式元素的查找 \n 页面查找元素及元素查找下级元素皆使用此方法 \n :param page_or_ele: DriverPage对象或DriverElement对象 @@ -588,15 +583,19 @@ def execute_driver_find(page_or_ele, driver = page_or_ele.driver try: - wait = WebDriverWait(driver, timeout=timeout) + if timeout and timeout != page.timeout: + wait = WebDriverWait(driver, timeout=timeout) + else: + wait = page.wait + if loc[0] == 'xpath': return wait.until(ElementsByXpath(page, loc[1], mode, timeout)) else: if mode == 'single': - return DriverElement(wait.until(ec.presence_of_element_located(loc)), page, timeout) + return DriverElement(wait.until(ec.presence_of_element_located(loc)), page) elif mode == 'all': eles = wait.until(ec.presence_of_all_elements_located(loc)) - return [DriverElement(ele, page, timeout) for ele in eles] + return [DriverElement(ele, page) for ele in eles] except TimeoutException: return [] if mode == 'all' else None @@ -622,8 +621,6 @@ class ElementsByXpath(object): def __call__(self, ele_or_driver: Union[WebDriver, WebElement]) \ -> Union[str, DriverElement, None, List[str or DriverElement]]: - driver, the_node = ((ele_or_driver, 'document') if isinstance(ele_or_driver, WebDriver) - else (ele_or_driver.parent, ele_or_driver)) def get_nodes(node=None, xpath_txt=None, type_txt='7'): """用js通过xpath获取元素、节点或属性 @@ -669,12 +666,18 @@ class ElementsByXpath(object): """ return driver.execute_script(js, node) - # 把lxml元素对象包装成DriverElement对象并按需要返回第一个或全部 + if isinstance(ele_or_driver, WebDriver): + driver, the_node = ele_or_driver, 'document' + else: + driver, the_node = ele_or_driver.parent, ele_or_driver + + # 把lxml元素对象包装成DriverElement对象并按需要返回第一个或全部 if self.mode == 'single': try: e = get_nodes(the_node, xpath_txt=self.xpath, type_txt='9') + if isinstance(e, WebElement): - return DriverElement(e, self.page, self.timeout) + return DriverElement(e, self.page) elif isinstance(e, str): return format_html(e) else: @@ -685,7 +688,7 @@ class ElementsByXpath(object): return None elif self.mode == 'all': - return ([DriverElement(x, self.page, self.timeout) if isinstance(x, WebElement) + return ([DriverElement(x, self.page) if isinstance(x, WebElement) else format_html(x) for x in get_nodes(the_node, xpath_txt=self.xpath) if x != '\n']) diff --git a/DrissionPage/driver_page.py b/DrissionPage/driver_page.py index 9ea3ad5..924c778 100644 --- a/DrissionPage/driver_page.py +++ b/DrissionPage/driver_page.py @@ -13,6 +13,7 @@ from urllib.parse import quote from selenium.common.exceptions import NoAlertPresentException from selenium.webdriver.chrome.webdriver import WebDriver from selenium.webdriver.remote.webelement import WebElement +from selenium.webdriver.support.wait import WebDriverWait from .common import str_to_loc, get_available_file_name, translate_loc, format_html from .driver_element import DriverElement, execute_driver_find @@ -24,9 +25,10 @@ class DriverPage(object): def __init__(self, driver: WebDriver, timeout: float = 10): """初始化函数,接收一个WebDriver对象,用来操作网页""" self._driver = driver - self.timeout = timeout + self._timeout = timeout self._url = None self._url_available = None + self._wait = None @property def driver(self) -> WebDriver: @@ -60,6 +62,22 @@ class DriverPage(object): """返回网页title""" return self.driver.title + @property + def timeout(self) -> float: + return self._timeout + + @timeout.setter + def timeout(self, second: float) -> None: + self._timeout = second + self._wait = None + + @property + def wait(self) -> WebDriverWait: + if self._wait is None: + self._wait = WebDriverWait(self.driver, timeout=self.timeout) + + return self._wait + def get_cookies(self, as_dict: bool = False) -> Union[list, dict]: """返回当前网站cookies""" if as_dict: @@ -165,7 +183,7 @@ class DriverPage(object): # 接收到WebElement对象打包成DriverElement对象返回 elif isinstance(loc_or_ele, WebElement): - return DriverElement(loc_or_ele, self, self.timeout) + return DriverElement(loc_or_ele, self) # 接收到的类型不正确,抛出异常 else: diff --git a/DrissionPage/mix_page.py b/DrissionPage/mix_page.py index 3ff1eae..a6fc180 100644 --- a/DrissionPage/mix_page.py +++ b/DrissionPage/mix_page.py @@ -336,8 +336,8 @@ class MixPage(Null, SessionPage, DriverPage): def ele(self, loc_or_ele: Union[Tuple[str, str], str, DriverElement, SessionElement, WebElement], mode: str = None, - timeout: float = None) -> Union[ - DriverElement, SessionElement, str, List[SessionElement], List[DriverElement]]: + timeout: float = None) \ + -> Union[DriverElement, SessionElement, str, List[SessionElement], List[DriverElement]]: """返回页面中符合条件的元素、属性或节点文本,默认返回第一个 \n 示例: \n - 接收到元素对象时: \n diff --git a/DrissionPage/session_element.py b/DrissionPage/session_element.py index 793a3cd..47b3fe5 100644 --- a/DrissionPage/session_element.py +++ b/DrissionPage/session_element.py @@ -8,8 +8,7 @@ import re from typing import Union, List, Tuple from urllib.parse import urlparse, urljoin, urlunparse -from cssselect import SelectorSyntaxError -from lxml.etree import tostring, XPathEvalError +from lxml.etree import tostring from lxml.html import HtmlElement, fromstring from .common import DrissionElement, str_to_loc, translate_loc, format_html @@ -285,20 +284,20 @@ class SessionElement(DrissionElement): ele = self while ele: - ele_id = ele.attr('id') + # ele_id = ele.attr('id') - if ele_id: - return f'#{ele_id}{path_str}' if mode == 'css' else f'//{ele.tag}[@id="{ele_id}"]{path_str}' + # if ele_id: + # return f'#{ele_id}{path_str}' if mode == 'css' else f'//{ele.tag}[@id="{ele_id}"]{path_str}' + # else: + + if mode == 'css': + brothers = len(ele.eles(f'xpath:./preceding-sibling::*')) + path_str = f'>:nth-child({brothers + 1}){path_str}' else: + brothers = len(ele.eles(f'xpath:./preceding-sibling::{ele.tag}')) + path_str = f'/{ele.tag}[{brothers + 1}]{path_str}' if brothers > 0 else f'/{ele.tag}{path_str}' - if mode == 'css': - brothers = len(ele.eles(f'xpath:./preceding-sibling::*')) - path_str = f'>:nth-child({brothers + 1}){path_str}' - else: - brothers = len(ele.eles(f'xpath:./preceding-sibling::{ele.tag}')) - path_str = f'/{ele.tag}[{brothers + 1}]{path_str}' if brothers > 0 else f'/{ele.tag}{path_str}' - - ele = ele.parent + ele = ele.parent return path_str[1:] if mode == 'css' else path_str @@ -383,8 +382,11 @@ def execute_session_find(page_or_ele, elif mode == 'all': return [SessionElement(e, page) if isinstance(e, HtmlElement) else e for e in ele if e != '\n'] - except XPathEvalError: - raise SyntaxError(f'Invalid xpath syntax. {loc}') + except Exception as e: - except SelectorSyntaxError: - raise SyntaxError(f'Invalid css selector syntax. {loc}') + if 'Invalid expression' in str(e): + raise SyntaxError(f'Invalid xpath syntax. {loc}') + elif 'Expected selector' in str(e): + raise SyntaxError(f'Invalid css selector syntax. {loc}') + + raise e diff --git a/DrissionPage/shadow_root_element.py b/DrissionPage/shadow_root_element.py index be6596c..92e5ac0 100644 --- a/DrissionPage/shadow_root_element.py +++ b/DrissionPage/shadow_root_element.py @@ -10,10 +10,9 @@ from .driver_element import execute_driver_find, DriverElement class ShadowRootElement(DrissionElement): - def __init__(self, inner_ele: WebElement, parent_ele: DriverElement, timeout: float = 10): + def __init__(self, inner_ele: WebElement, parent_ele: DriverElement): super().__init__(inner_ele, parent_ele.page) self.parent_ele = parent_ele - self.timeout = timeout def __repr__(self): return f'' @@ -29,7 +28,7 @@ class ShadowRootElement(DrissionElement): :param timeout: 超时时间 :return: DriverElement对象 """ - return self.ele(loc_or_str, mode, timeout or self.timeout) + return self.ele(loc_or_str, mode, timeout) @property def tag(self): @@ -107,8 +106,6 @@ class ShadowRootElement(DrissionElement): else: raise ValueError('Argument loc_or_str can only be tuple or str.') - timeout = timeout or self.timeout - if loc_or_str[0] == 'css selector': return execute_driver_find(self, loc_or_str, mode, timeout) elif loc_or_str[0] == 'text': @@ -192,18 +189,18 @@ class ShadowRootElement(DrissionElement): if text == txt: if mode == 'single': - return DriverElement(ele, self.page, self.timeout) + return DriverElement(ele, self.page) elif mode == 'all': - results.append(DriverElement(ele, self.page, self.timeout)) + results.append(DriverElement(ele, self.page)) # 模糊匹配 elif match == 'fuzzy': if text in txt: if mode == 'single': - return DriverElement(ele, self.page, self.timeout) + return DriverElement(ele, self.page) elif mode == 'all': - results.append(DriverElement(ele, self.page, self.timeout)) + results.append(DriverElement(ele, self.page)) return None if mode == 'single' else results diff --git a/README.en.md b/README.en.md index bc0c904..d4b43e1 100644 --- a/README.en.md +++ b/README.en.md @@ -1846,7 +1846,6 @@ Parameter Description: - ele: WebElement- WebElement object - page: DriverPage- the page object where the element is located -- timeout: float - Find the timeout of the element (it can be set separately each time the element is searched) @@ -2555,7 +2554,6 @@ Parameter Description: - parent_ele: DriverElement-the element to which the shadow-root is attached -- timeout: float-timeout diff --git a/requirements.txt b/requirements.txt index 2c2dd13..5b7920a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,4 @@ selenium requests tldextract -lxml -cssselect \ No newline at end of file +lxml \ No newline at end of file