From bab4461b586968ef2ed44e9df21418a3ae251430 Mon Sep 17 00:00:00 2001 From: g1879 Date: Sat, 20 Nov 2021 02:07:23 +0800 Subject: [PATCH] =?UTF-8?q?=E5=9F=BA=E6=9C=AC=E5=AE=8C=E6=88=90s=5Fele()?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/base.py | 17 +++---- DrissionPage/common.py | 2 +- DrissionPage/config.py | 13 +++--- DrissionPage/configs.ini | 8 ++-- DrissionPage/drission.py | 4 +- DrissionPage/driver_element.py | 46 ++++++++++++------- DrissionPage/driver_page.py | 36 +++++++-------- DrissionPage/mix_page.py | 21 +++++---- DrissionPage/session_element.py | 70 ++++++++++++++++------------- DrissionPage/session_page.py | 29 +++++++----- DrissionPage/shadow_root_element.py | 21 +++++---- 11 files changed, 150 insertions(+), 117 deletions(-) diff --git a/DrissionPage/base.py b/DrissionPage/base.py index f22fde5..cfbb141 100644 --- a/DrissionPage/base.py +++ b/DrissionPage/base.py @@ -26,11 +26,12 @@ class BaseParser(object): timeout: float = None): return self.ele(loc_or_str, mode='all', timeout=timeout) - def s_eles(self, - loc_or_str: Union[Tuple[str, str], str], - timeout: float = None): - """查找并以SessionElement方式返回元素""" - return self.s_ele(loc_or_str, mode='all', timeout=timeout) + def s_eles(self, loc_or_str: Union[Tuple[str, str], str]): + """查找并以SessionElement方式返回元素 \n + :param loc_or_str: 定位符 + :return: SessionElement或属性、文本组成的列表 + """ + return self.s_ele(loc_or_str, mode='all') # ----------------以下属性或方法待后代实现---------------- @property @@ -38,7 +39,7 @@ class BaseParser(object): return @abstractmethod - def s_ele(self, loc_or_ele, mode='single', timeout=None): + def s_ele(self, loc_or_ele, mode='single'): pass @abstractmethod @@ -169,7 +170,7 @@ class DrissionElement(BaseElement): elif mode == 'text': node_txt = 'text()' else: - raise ValueError(f"Argument mode can only be 'node' ,'ele' or 'text', not '{mode}'.") + raise ValueError(f"mode参数只能是'node'、'ele'或'text',现在是:'{mode}'。") # 查找节点的方向 if direction == 'next': @@ -177,7 +178,7 @@ class DrissionElement(BaseElement): elif direction == 'prev': direction_txt = 'preceding' else: - raise ValueError(f"Argument direction can only be 'next' or 'prev', not '{direction}'.") + raise ValueError(f"direction参数只能是'next'或'prev',现在是:'{direction}'。") timeout = 0 if direction == 'prev' else .5 diff --git a/DrissionPage/common.py b/DrissionPage/common.py index 1920f08..e9f5c74 100644 --- a/DrissionPage/common.py +++ b/DrissionPage/common.py @@ -136,7 +136,7 @@ def _make_xpath_str(tag: str, arg: str, val: str, mode: str = 'fuzzy') -> str: return f"//*[{tag_name}contains({arg},{_make_search_str(val)})]" else: - raise ValueError("Argument mode can only be 'exact' or 'fuzzy'.") + raise ValueError("mode参数只能是'exact'或'fuzzy'。") def _make_search_str(search_str: str) -> str: diff --git a/DrissionPage/config.py b/DrissionPage/config.py index e92aa0f..857bdf6 100644 --- a/DrissionPage/config.py +++ b/DrissionPage/config.py @@ -657,7 +657,7 @@ def _dict_to_chrome_options(options: dict) -> Options: # 启动参数 if options.get('arguments', None): if not isinstance(options['arguments'], list): - raise Exception(f"Arguments need list,not {type(options['arguments'])}.") + raise Exception(f"参数必须为list,现在是:{type(options['arguments'])}。") for arg in options['arguments']: chrome_options.add_argument(arg) @@ -665,7 +665,7 @@ def _dict_to_chrome_options(options: dict) -> Options: # 加载插件 if options.get('extension_files', None): if not isinstance(options['extension_files'], list): - raise Exception(f'Extension files need list,not {type(options["extension_files"])}.') + raise Exception(f'extension_files必须是list,现在是:{type(options["extension_files"])}。') for arg in options['extension_files']: chrome_options.add_extension(arg) @@ -673,7 +673,7 @@ def _dict_to_chrome_options(options: dict) -> Options: # 扩展设置 if options.get('extensions', None): if not isinstance(options['extensions'], list): - raise Exception(f'Extensions need list,not {type(options["extensions"])}.') + raise Exception(f'extensions必须是list,现在是:{type(options["extensions"])}。') for arg in options['extensions']: chrome_options.add_encoded_extension(arg) @@ -681,7 +681,7 @@ def _dict_to_chrome_options(options: dict) -> Options: # 实验性质的设置参数 if options.get('experimental_options', None): if not isinstance(options['experimental_options'], dict): - raise Exception(f'Experimental options need dict,not {type(options["experimental_options"])}.') + raise Exception(f'experimental_options必须是dict,现在是:{type(options["experimental_options"])}。') for i in options['experimental_options']: chrome_options.add_experimental_option(i, options['experimental_options'][i]) @@ -763,7 +763,6 @@ def _cookie_to_dict(cookie: Union[Cookie, str, dict]) -> dict: attr_val = attr.lstrip().split('=') if key == 0: - # TODO: 检查 cookie_dict['name'] = attr_val[0] cookie_dict['value'] = attr_val[1] if len(attr_val) == 2 else '' else: @@ -772,7 +771,7 @@ def _cookie_to_dict(cookie: Union[Cookie, str, dict]) -> dict: return cookie_dict else: - raise TypeError + raise TypeError('cookie参数必须为Cookie、str或dict类型。') return cookie_dict @@ -792,6 +791,6 @@ def _cookies_to_tuple(cookies: Union[RequestsCookieJar, list, tuple, str, dict]) cookies = tuple({'name': cookie, 'value': cookies[cookie]} for cookie in cookies) else: - raise TypeError + raise TypeError('cookies参数必须为RequestsCookieJar、list、tuple、str或dict类型。') return cookies diff --git a/DrissionPage/configs.ini b/DrissionPage/configs.ini index b8411b3..b9d0d2c 100644 --- a/DrissionPage/configs.ini +++ b/DrissionPage/configs.ini @@ -1,10 +1,10 @@ [paths] -chromedriver_path = D:\python\Google Chrome\Chrome\chromedriver75.exe -tmp_path = D:\python\projects\DrissionPage\DrissionPage\tmp +chromedriver_path = +tmp_path = [chrome_options] -debugger_address = 127.0.0.1:9222 -binary_location = D:\python\Google Chrome\Chrome\chrome.exe +debugger_address = +binary_location = arguments = ['--no-sandbox', '--disable-gpu', '--ignore-certificate-errors', '--disable-infobars'] extensions = [] experimental_options = {'prefs': {'profile.default_content_settings.popups': 0, 'profile.default_content_setting_values': {'notifications': 2}, 'plugins.plugins_list': [{'enabled': False, 'name': 'Chrome PDF Viewer'}]}, 'useAutomationExtension': False, 'excludeSwitches': ['enable-automation']} diff --git a/DrissionPage/drission.py b/DrissionPage/drission.py index 7ff65fe..3580612 100644 --- a/DrissionPage/drission.py +++ b/DrissionPage/drission.py @@ -84,7 +84,7 @@ class Drission(object): """ if self._driver is None: if not isinstance(self._driver_options, dict): - raise TypeError('Driver options invalid') + raise TypeError('无效的Driver配置。') options = _dict_to_chrome_options(self._driver_options) @@ -240,7 +240,7 @@ class Drission(object): url = extract(browser_domain) cookie_domain = f'{url.domain}.{url.suffix}' else: - raise ValueError('There is no domain name in the cookie or the browser has not visited a URL.') + raise ValueError('cookie中没有域名或浏览器未访问过URL。') cookie['domain'] = cookie_domain diff --git a/DrissionPage/driver_element.py b/DrissionPage/driver_element.py index 53daca4..099b786 100644 --- a/DrissionPage/driver_element.py +++ b/DrissionPage/driver_element.py @@ -41,7 +41,7 @@ class DriverElement(DrissionElement): :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 :param mode: 'single' 或 'all',对应查找一个或全部 :param timeout: 超时时间 - :return: DriverElement对象 + :return: DriverElement对象或属性文本 """ return super().__call__(loc_or_str, mode, timeout) @@ -134,9 +134,14 @@ class DriverElement(DrissionElement): loc_str = f'{self.css_path}{loc_or_str[1]}' loc_or_str = loc_or_str[0], loc_str - return execute_driver_find(self, loc_or_str, mode, timeout) + return make_driver_ele(self, loc_or_str, mode, timeout) - def s_ele(self, loc_or_ele, mode='single', timeout=None): + def s_ele(self, loc_or_ele, mode='single'): + """查找元素以SessionElement形式返回,处理复杂页面时效率很高 \n + :param loc_or_ele: 元素的定位信息,可以是loc元组,或查询字符串 + :param mode: 查找第一个或全部 + :return: SessionElement对象或属性、文本 + """ return make_session_ele(self, loc_or_ele, mode) def eles(self, @@ -168,7 +173,7 @@ class DriverElement(DrissionElement): txt5 = '''return path.substr(1);''' else: - raise ValueError(f"Argument mode can only be 'xpath' or 'css', not '{mode}'.") + raise ValueError(f"mode参数只能是'xpath'或'css',现在是:'{mode}'。") js = ''' function e(el) { @@ -459,7 +464,7 @@ class DriverElement(DrissionElement): self.run_script(f'arguments[0].removeAttribute("{attr}");') return True except: - raise False + return False def drag(self, x: int, y: int, speed: int = 40, shake: bool = True) -> bool: """拖拽当前元素到相对位置 \n @@ -490,7 +495,7 @@ class DriverElement(DrissionElement): elif isinstance(ele_or_loc, tuple): target_x, target_y = ele_or_loc else: - raise TypeError('Need DriverElement, WebElement object or coordinate information.') + raise TypeError('需要DriverElement、WebElement对象或坐标。') current_x = self.location['x'] + self.size['width'] // 2 current_y = self.location['y'] + self.size['height'] // 2 @@ -525,10 +530,10 @@ class DriverElement(DrissionElement): ActionChains(self.page.driver).move_to_element(self.inner_ele).perform() -def execute_driver_find(page_or_ele, - loc: Tuple[str, str], - mode: str = 'single', - timeout: float = None) -> Union[DriverElement, List[DriverElement], str, None]: +def make_driver_ele(page_or_ele, + loc: Union[str, Tuple[str, str]], + mode: str = 'single', + timeout: float = None) -> Union[DriverElement, List[DriverElement], str, None]: """执行driver模式元素的查找 \n 页面查找元素及元素查找下级元素皆使用此方法 \n :param page_or_ele: DriverPage对象或DriverElement对象 @@ -539,7 +544,7 @@ def execute_driver_find(page_or_ele, """ mode = mode or 'single' if mode not in ('single', 'all'): - raise ValueError(f"Argument mode can only be 'single' or 'all', not '{mode}'.") + raise ValueError(f"mode参数只能是'single'或'all',现在是:'{mode}'。") if isinstance(page_or_ele, BaseElement): page = page_or_ele.page @@ -555,6 +560,15 @@ def execute_driver_find(page_or_ele, page.wait_object._driver = driver wait = page.wait_object + # ---------------处理定位符--------------- + if isinstance(loc, str): + loc = str_to_loc(loc) + elif isinstance(loc, tuple): + loc = translate_loc(loc) + else: + raise ValueError("定位符必须为str或长度为2的tuple。") + + # ---------------执行查找----------------- try: # 使用xpath查找 if loc[0] == 'xpath': @@ -572,7 +586,7 @@ def execute_driver_find(page_or_ele, return [] if mode == 'all' else None except InvalidElementStateException: - raise ValueError(f'Invalid query syntax. {loc}') + raise ValueError(f'无效的查找语句:{loc}') class ElementsByXpath(object): @@ -681,7 +695,7 @@ class Select(object): :param ele: select 元素对象 """ if ele.tag != 'select': - raise TypeError(f"Select only works on 元素使用,现在是:{ele.tag}。") from selenium.webdriver.support.select import Select as sl self.inner_ele = ele @@ -821,7 +835,7 @@ class Select(object): def invert(self) -> None: """反选""" if not self.is_multi: - raise NotImplementedError("You may only deselect options of a multi-select") + raise NotImplementedError("只能对多项选框执行反选。") for i in self.options: i.click() @@ -839,7 +853,7 @@ def _wait_ele(page_or_ele, :return: 等待是否成功 """ if mode.lower() not in ('del', 'display', 'hidden'): - raise ValueError('Argument mode can only be "del", "display", "hidden"') + raise ValueError('mode参数只能是"del"、"display"或"hidden"。') if isinstance(page_or_ele, DrissionElement): # TODO: 是否要改为 BaseElement page = page_or_ele.page @@ -865,7 +879,7 @@ def _wait_ele(page_or_ele, pass else: - raise TypeError('The type of loc_or_ele can only be str, tuple, DriverElement, WebElement') + raise TypeError('loc_or_ele参数只能是str、tuple、DriverElement 或 WebElement类型') # 当传入参数是元素对象时 if is_ele: diff --git a/DrissionPage/driver_page.py b/DrissionPage/driver_page.py index c28bdd1..0c04a96 100644 --- a/DrissionPage/driver_page.py +++ b/DrissionPage/driver_page.py @@ -17,8 +17,8 @@ from selenium.webdriver.remote.webelement import WebElement from selenium.webdriver.support.wait import WebDriverWait from .base import BasePage -from .common import str_to_loc, get_available_file_name, translate_loc, format_html -from .driver_element import DriverElement, execute_driver_find, _wait_ele +from .common import get_available_file_name, format_html +from .driver_element import DriverElement, make_driver_ele, _wait_ele from .session_element import make_session_ele @@ -34,13 +34,13 @@ class DriverPage(BasePage): def __call__(self, loc_or_str: Union[Tuple[str, str], str, DriverElement, WebElement], mode: str = 'single', - timeout: float = None) -> Union[DriverElement, List[DriverElement]]: + timeout: float = None) -> Union[DriverElement, List[DriverElement], str]: """在内部查找元素 \n 例:ele = page('@id=ele_id') \n :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 :param mode: 'single' 或 'all',对应查找一个或全部 :param timeout: 超时时间 - :return: DriverElement对象 + :return: DriverElement对象或属性文本 """ return super().__call__(loc_or_str, mode, timeout) @@ -107,12 +107,7 @@ class DriverPage(BasePage): """ # 接收到字符串或元组,获取定位loc元组 if isinstance(loc_or_ele, (str, tuple)): - if isinstance(loc_or_ele, str): - loc_or_ele = str_to_loc(loc_or_ele) - else: - if len(loc_or_ele) != 2: - raise ValueError("Len of loc_or_ele must be 2 when it's a tuple.") - loc_or_ele = translate_loc(loc_or_ele) + return make_driver_ele(self, loc_or_ele, mode, timeout) # 接收到DriverElement对象直接返回 elif isinstance(loc_or_ele, DriverElement): @@ -124,11 +119,14 @@ class DriverPage(BasePage): # 接收到的类型不正确,抛出异常 else: - raise ValueError('Argument loc_or_str can only be tuple, str, DriverElement, DriverElement.') + raise ValueError('loc_or_str参数只能是tuple、str、DriverElement 或 DriverElement类型。') - return execute_driver_find(self, loc_or_ele, mode, timeout) - - def s_ele(self, loc_or_ele, mode='single', timeout=None): + def s_ele(self, loc_or_ele, mode='single'): + """查找元素以SessionElement形式返回,处理复杂页面时效率很高 \n + :param loc_or_ele: 元素的定位信息,可以是loc元组,或查询字符串 + :param mode: 查找第一个或全部 + :return: SessionElement对象或属性、文本 + """ return make_session_ele(self, loc_or_ele, mode) def eles(self, @@ -192,7 +190,7 @@ class DriverPage(BasePage): print(f'重试 {to_url}') if is_ok is False and show_errmsg: - raise err if err is not None else ConnectionError('Connect error.') + raise err if err is not None else ConnectionError('连接异常。') return is_ok @@ -295,7 +293,7 @@ class DriverPage(BasePage): elif isinstance(tab, (list, tuple)): page_handle = tab else: - raise TypeError('Argument num_or_handle can only be int, str, list or tuple.') + raise TypeError('num_or_handle参数只能是int、str、list 或 tuple类型。') for i in tabs: # 遍历所有标签页,关闭非保留的 if i not in page_handle: @@ -422,8 +420,8 @@ class DriverPage(BasePage): self.driver.execute_script(f"window.scrollBy({pixel},0);") else: - raise ValueError("Argument mode can only be " - "'top', 'bottom', 'half', 'rightmost', 'leftmost', 'up', 'down', 'left', 'right'.") + raise ValueError("mode参数只能是'top', 'bottom', 'half', 'rightmost', " + "'leftmost', 'up', 'down', 'left', 'right'。") def refresh(self) -> None: """刷新当前页面""" @@ -447,7 +445,7 @@ class DriverPage(BasePage): else: if x < 0 or y < 0: - raise ValueError('Arguments x and y must greater than 0.') + raise ValueError('x 和 y参数必须大于0。') new_x = x or self.driver.get_window_size()['width'] new_y = y or self.driver.get_window_size()['height'] diff --git a/DrissionPage/mix_page.py b/DrissionPage/mix_page.py index 513558e..bee5da8 100644 --- a/DrissionPage/mix_page.py +++ b/DrissionPage/mix_page.py @@ -58,7 +58,7 @@ class MixPage(SessionPage, DriverPage, BasePage): :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 :param mode: 'single' 或 'all',对应查找一个或全部 :param timeout: 超时时间 - :return: DriverElement对象 + :return: 子元素对象或属性文本 """ return super().__call__(loc_or_str, mode, timeout) @@ -133,19 +133,24 @@ class MixPage(SessionPage, DriverPage, BasePage): elif self._mode == 'd': return super(SessionPage, self).ele(loc_or_ele, mode=mode, timeout=timeout) - def s_ele(self, loc_or_ele, mode='single', timeout=None): + def s_ele(self, loc_or_ele, mode='single') -> Union[SessionElement, List[SessionElement], List[str]]: + """查找元素以SessionElement形式返回,处理复杂页面时效率很高 \n + :param loc_or_ele: 元素的定位信息,可以是loc元组,或查询字符串 + :param mode: 查找第一个或全部 + :return: SessionElement对象或属性、文本 + """ if self._mode == 's': return super().s_ele(loc_or_ele, mode=mode) elif self._mode == 'd': - return super(SessionPage, self).s_ele(loc_or_ele, mode=mode, timeout=timeout) + return super(SessionPage, self).s_ele(loc_or_ele, mode=mode) def eles(self, loc_or_str: Union[Tuple[str, str], str], - timeout: float = None) -> Union[List[DriverElement], List[SessionElement]]: - """返回页面中所有符合条件的元素、属性或节点文本 \n + timeout: float = None) -> Union[List[DriverElement], List[SessionElement], List[str]]: + """返回页面中所有符合条件的元素、属性或节点文本 \n :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 :param timeout: 查找元素超时时间,d模式专用 - :return: 元素对象或属性、文本节点文本组成的列表 + :return: 元素对象或属性、文本组成的列表 """ return super(SessionPage, self).eles(loc_or_str, timeout=timeout) @@ -368,8 +373,8 @@ class MixPage(SessionPage, DriverPage, BasePage): path = download_path or self._drission.driver_options['experimental_options']['prefs'][ 'download.default_directory'] if not path: - raise + raise ValueError('未指定下载路径。') except: - raise IOError('Download path not found.') + raise IOError('无法找到下载路径。') return super().chrome_downloading(path) diff --git a/DrissionPage/session_element.py b/DrissionPage/session_element.py index 0d977f0..fa9c9a1 100644 --- a/DrissionPage/session_element.py +++ b/DrissionPage/session_element.py @@ -11,7 +11,7 @@ from urllib.parse import urlparse, urljoin, urlunparse from lxml.etree import tostring from lxml.html import HtmlElement, fromstring -from .base import DrissionElement +from .base import DrissionElement, BasePage, BaseElement from .common import str_to_loc, translate_loc, format_html @@ -31,7 +31,7 @@ class SessionElement(DrissionElement): :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 :param mode: 'single' 或 'all',对应查找一个或全部 :param timeout: 不起实际作用,用于和父类对应 - :return: SessionElement对象 + :return: SessionElement对象或属性文本 """ return super().__call__(loc_or_str, mode, timeout) @@ -158,7 +158,6 @@ class SessionElement(DrissionElement): element = self.page loc_or_str = loc_or_str[0], loc_str - return make_session_ele(element, loc_or_str, mode) def eles(self, loc_or_str: Union[Tuple[str, str], str], timeout=None): @@ -169,8 +168,13 @@ class SessionElement(DrissionElement): """ return self.ele(loc_or_str, mode='all') - def s_ele(self, loc_or_str: Union[Tuple[str, str], str], mode: str = None, timeout=None): - return self.ele(loc_or_str, mode=mode, timeout=timeout) + def s_ele(self, loc_or_str: Union[Tuple[str, str], str], mode: str = None): + """返回当前元素下级符合条件的子元素、属性或节点文本,默认返回第一个 \n + :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 + :param mode: 'single' 或 'all‘,对应查找一个或全部 + :return: SessionElement对象 + """ + return self.ele(loc_or_str, mode=mode) def _get_ele_path(self, mode) -> str: """获取css路径或xpath路径 @@ -217,37 +221,39 @@ class SessionElement(DrissionElement): return link -def make_session_ele(page_or_ele, +def make_session_ele(html_or_ele: Union[str, BaseElement, BasePage], loc: Union[str, Tuple[str, str]], mode: str = 'single', ) -> Union[SessionElement, List[SessionElement], str, None]: - """执行session模式元素的查找 \n - 页面查找元素及元素查找下级元素皆使用此方法 \n - :param page_or_ele: SessionPage对象或SessionElement对象 - :param loc: 元素定位元组 + """从接收到的对象或html文本中查找元素,返回SessionElement对象 \n + :param html_or_ele: html文本、BaseParser对象 + :param loc: 定位元组或字符串 :param mode: 'single' 或 'all',对应获取第一个或全部 - :return: 返回SessionElement元素或列表 + :return: 返回SessionElement元素或列表,或属性文本 """ mode = mode or 'single' if mode not in ('single', 'all'): - raise ValueError(f"Argument mode can only be 'single' or 'all', not '{mode}'.") + raise ValueError(f"mode参数只能是'single'或'all',现在是:'{mode}'。") # 根据传入对象类型获取页面对象和lxml元素对象 - type_str = str(type(page_or_ele)) - if isinstance(page_or_ele, str): # 直接传入html文本 + if isinstance(html_or_ele, SessionElement): # SessionElement + page = html_or_ele.page + html_or_ele = html_or_ele.inner_ele + # html_or_ele = fromstring(sub(r' ?', ' ', html_or_ele.response.text)) + + elif isinstance(html_or_ele, BasePage): # MixPage, DriverPage 或 SessionPage + page = html_or_ele + html_or_ele = fromstring(html_or_ele.html) + + elif isinstance(html_or_ele, str): # 直接传入html文本 page = None - page_or_ele = fromstring(page_or_ele) - elif type_str.endswith("SessionElement'>"): # SessionElement - page = page_or_ele.page - page_or_ele = page_or_ele.inner_ele - elif "Page" in type_str: # MixPage, DriverPage 或 SessionPage - page = page_or_ele - page_or_ele = fromstring(page_or_ele.html) - else: # DrissionElement 或 ShadowRootElement - page = page_or_ele.page - page_or_ele = fromstring(page_or_ele.html) - # else: # 传入的是SessionPage对象 - # page = page_or_ele - # page_or_ele = fromstring(sub(r' ?', ' ', page_or_ele.response.text)) + html_or_ele = fromstring(html_or_ele) + + elif isinstance(html_or_ele, BaseElement): # DrissionElement 或 ShadowRootElement + page = html_or_ele.page + html_or_ele = fromstring(html_or_ele.html) + + else: + raise TypeError('html_or_ele参数只能是元素、页面对象或html文本。') # ---------------处理定位符--------------- if isinstance(loc, str): @@ -257,12 +263,12 @@ def make_session_ele(page_or_ele, else: raise ValueError("定位符必须为str或长度为2的tuple。") - # ---------------执行搜索----------------- + # ---------------执行查找----------------- try: if loc[0] == 'xpath': # 用lxml内置方法获取lxml的元素对象列表 - ele = page_or_ele.xpath(loc[1]) + ele = html_or_ele.xpath(loc[1]) else: # 用css selector获取元素对象列表 - ele = page_or_ele.cssselect(loc[1]) + ele = html_or_ele.cssselect(loc[1]) if not isinstance(ele, list): # 结果不是列表,如数字 return ele @@ -282,8 +288,8 @@ def make_session_ele(page_or_ele, except Exception as e: if 'Invalid expression' in str(e): - raise SyntaxError(f'Invalid xpath syntax. {loc}') + raise SyntaxError(f'无效的xpath语句:{loc}') elif 'Expected selector' in str(e): - raise SyntaxError(f'Invalid css selector syntax. {loc}') + raise SyntaxError(f'无效的css select语句:{loc}') raise e diff --git a/DrissionPage/session_page.py b/DrissionPage/session_page.py index b4c2b8c..ce51f17 100644 --- a/DrissionPage/session_page.py +++ b/DrissionPage/session_page.py @@ -33,13 +33,13 @@ class SessionPage(BasePage): def __call__(self, loc_or_str: Union[Tuple[str, str], str, SessionElement], mode: str = 'single', - timeout: float = None) -> Union[SessionElement, List[SessionElement]]: + timeout: float = None) -> Union[SessionElement, List[SessionElement], str]: """在内部查找元素 \n 例:ele2 = ele1('@id=ele_id') \n :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 :param mode: 'single' 或 'all',对应查找一个或全部 :param timeout: 不起实际作用,用于和父类对应 - :return: SessionElement对象 + :return: SessionElement对象或属性文本 """ return super().__call__(loc_or_str, mode, timeout) @@ -94,7 +94,7 @@ class SessionPage(BasePage): else: if show_errmsg: - raise ConnectionError(f'{to_url}\nStatus code: {self._response.status_code}.') + raise ConnectionError(f'{to_url}\n连接状态码:{self._response.status_code}.') self._url_available = False @@ -120,8 +120,13 @@ class SessionPage(BasePage): """ return super().eles(loc_or_str, timeout) - def s_ele(self, loc_or_str: Union[Tuple[str, str], str], mode: str = None, timeout=None): - return self.ele(loc_or_str, mode=mode, timeout=timeout) + def s_ele(self, loc_or_str: Union[Tuple[str, str], str], mode: str = None): + """返回页面中符合条件的元素、属性或节点文本,默认返回第一个 \n + :param loc_or_str: 元素的定位信息,可以是元素对象,loc元组,或查询字符串 + :param mode: 'single' 或 'all‘,对应查找一个或全部 + :return: SessionElement对象 + """ + return self.ele(loc_or_str, mode=mode) def get_cookies(self, as_dict: bool = False, all_domains: bool = False) -> Union[dict, list]: """返回cookies \n @@ -180,7 +185,7 @@ class SessionPage(BasePage): print(f'重试 {to_url}') if not r and show_errmsg: - raise err if err is not None else ConnectionError('Connect error.') + raise err if err is not None else ConnectionError('连接异常。') return r @@ -232,7 +237,7 @@ class SessionPage(BasePage): else: if show_errmsg: - raise ConnectionError(f'Status code: {self._response.status_code}.') + raise ConnectionError(f'连接状态码:{self._response.status_code}.') self._url_available = False return self._url_available @@ -292,7 +297,7 @@ class SessionPage(BasePage): if not r.ok: if errmsg: - raise ConnectionError(f'Status code: {r.status_code}.') + raise ConnectionError(f'连接状态码:{r.status_code}.') return False, f'Status code: {r.status_code}.' @@ -361,7 +366,7 @@ class SessionPage(BasePage): pass else: - raise ValueError("Argument file_exists can only be 'skip', 'overwrite', 'rename'.") + raise ValueError("file_exists参数只能是'skip'、'overwrite'或'rename'。") # -------------------打印要下载的文件------------------- if msg: @@ -404,7 +409,7 @@ class SessionPage(BasePage): else: if full_path.stat().st_size == 0: if errmsg: - raise ValueError('File size is 0.') + raise ValueError('文件大小为0。') download_status, info = False, 'File size is 0.' @@ -456,11 +461,11 @@ class SessionPage(BasePage): """ if not url: if show_errmsg: - raise ValueError('url is empty.') + raise ValueError('URL为空。') return None, 'url is empty.' if mode not in ('get', 'post'): - raise ValueError("Argument mode can only be 'get' or 'post'.") + raise ValueError("mode参数只能是'get'或'post'。") url = quote(url, safe='/:&?=%;#@+!') diff --git a/DrissionPage/shadow_root_element.py b/DrissionPage/shadow_root_element.py index 39e68ee..d28ccf3 100644 --- a/DrissionPage/shadow_root_element.py +++ b/DrissionPage/shadow_root_element.py @@ -11,7 +11,7 @@ from selenium.webdriver.remote.webelement import WebElement from .base import BaseElement from .common import format_html -from .driver_element import execute_driver_find, DriverElement +from .driver_element import make_driver_ele, DriverElement from .session_element import make_session_ele @@ -26,13 +26,13 @@ class ShadowRootElement(BaseElement): def __call__(self, loc_or_str: Union[Tuple[str, str], str], mode: str = 'single', - timeout: float = None) -> Union[DriverElement, List[DriverElement]]: + timeout: float = None) -> Union[DriverElement, List[DriverElement], str]: """在内部查找元素 \n 例:ele2 = ele1('@id=ele_id') \n :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 :param mode: 'single' 或 'all',对应查找一个或全部 :param timeout: 超时时间 - :return: DriverElement对象 + :return: DriverElement对象或属性文本 """ return self.ele(loc_or_str, mode, timeout) @@ -81,16 +81,21 @@ class ShadowRootElement(BaseElement): loc_or_str = str_to_css_loc(loc_or_str) elif isinstance(loc_or_str, tuple) and len(loc_or_str) == 2: if loc_or_str[0] == 'xpath': - raise ValueError('不支持xpath') + raise ValueError('不支持xpath。') else: - raise ValueError('Argument loc_or_str can only be tuple or str.') + raise ValueError('loc_or_str参数只能是tuple或str类型。') if loc_or_str[0] == 'css selector': - return execute_driver_find(self, loc_or_str, mode, timeout) + return make_driver_ele(self, loc_or_str, mode, timeout) elif loc_or_str[0] == 'text': return self._find_eles_by_text(loc_or_str[1], loc_or_str[2], loc_or_str[3], mode) - def s_ele(self, loc_or_ele, mode='single', timeout=None): + def s_ele(self, loc_or_ele, mode='single'): + """查找元素以SessionElement形式返回,处理复杂页面时效率很高 \n + :param loc_or_ele: 元素的定位信息,可以是loc元组,或查询字符串 + :param mode: 查找第一个或全部 + :return: SessionElement对象或属性、文本 + """ return make_session_ele(self, loc_or_ele, mode) def eles(self, @@ -213,7 +218,7 @@ def str_to_css_loc(loc: str) -> tuple: loc = f'text{loc[2:]}' elif loc.startswith(('x:', 'x=', 'xpath:', 'xpath=')): - raise ValueError('不支持xpath') + raise ValueError('不支持xpath。') # 根据属性查找 if loc.startswith('@'):