基本完成s_ele()

This commit is contained in:
g1879 2021-11-20 02:07:23 +08:00
parent 391d042635
commit bab4461b58
11 changed files with 150 additions and 117 deletions

View File

@ -26,11 +26,12 @@ class BaseParser(object):
timeout: float = None): timeout: float = None):
return self.ele(loc_or_str, mode='all', timeout=timeout) return self.ele(loc_or_str, mode='all', timeout=timeout)
def s_eles(self, def s_eles(self, loc_or_str: Union[Tuple[str, str], str]):
loc_or_str: Union[Tuple[str, str], str], """查找并以SessionElement方式返回元素 \n
timeout: float = None): :param loc_or_str: 定位符
"""查找并以SessionElement方式返回元素""" :return: SessionElement或属性文本组成的列表
return self.s_ele(loc_or_str, mode='all', timeout=timeout) """
return self.s_ele(loc_or_str, mode='all')
# ----------------以下属性或方法待后代实现---------------- # ----------------以下属性或方法待后代实现----------------
@property @property
@ -38,7 +39,7 @@ class BaseParser(object):
return return
@abstractmethod @abstractmethod
def s_ele(self, loc_or_ele, mode='single', timeout=None): def s_ele(self, loc_or_ele, mode='single'):
pass pass
@abstractmethod @abstractmethod
@ -169,7 +170,7 @@ class DrissionElement(BaseElement):
elif mode == 'text': elif mode == 'text':
node_txt = 'text()' node_txt = 'text()'
else: else:
raise ValueError(f"Argument mode can only be 'node' ,'ele' or 'text', not '{mode}'.") raise ValueError(f"mode参数只能是'node''ele''text',现在是:'{mode}'")
# 查找节点的方向 # 查找节点的方向
if direction == 'next': if direction == 'next':
@ -177,7 +178,7 @@ class DrissionElement(BaseElement):
elif direction == 'prev': elif direction == 'prev':
direction_txt = 'preceding' direction_txt = 'preceding'
else: else:
raise ValueError(f"Argument direction can only be 'next' or 'prev', not '{direction}'.") raise ValueError(f"direction参数只能是'next''prev',现在是:'{direction}'")
timeout = 0 if direction == 'prev' else .5 timeout = 0 if direction == 'prev' else .5

View File

@ -136,7 +136,7 @@ def _make_xpath_str(tag: str, arg: str, val: str, mode: str = 'fuzzy') -> str:
return f"//*[{tag_name}contains({arg},{_make_search_str(val)})]" return f"//*[{tag_name}contains({arg},{_make_search_str(val)})]"
else: else:
raise ValueError("Argument mode can only be 'exact' or 'fuzzy'.") raise ValueError("mode参数只能是'exact''fuzzy'")
def _make_search_str(search_str: str) -> str: def _make_search_str(search_str: str) -> str:

View File

@ -657,7 +657,7 @@ def _dict_to_chrome_options(options: dict) -> Options:
# 启动参数 # 启动参数
if options.get('arguments', None): if options.get('arguments', None):
if not isinstance(options['arguments'], list): if not isinstance(options['arguments'], list):
raise Exception(f"Arguments need listnot {type(options['arguments'])}.") raise Exception(f"参数必须为list现在是{type(options['arguments'])}")
for arg in options['arguments']: for arg in options['arguments']:
chrome_options.add_argument(arg) chrome_options.add_argument(arg)
@ -665,7 +665,7 @@ def _dict_to_chrome_options(options: dict) -> Options:
# 加载插件 # 加载插件
if options.get('extension_files', None): if options.get('extension_files', None):
if not isinstance(options['extension_files'], list): if not isinstance(options['extension_files'], list):
raise Exception(f'Extension files need listnot {type(options["extension_files"])}.') raise Exception(f'extension_files必须是list现在是{type(options["extension_files"])}')
for arg in options['extension_files']: for arg in options['extension_files']:
chrome_options.add_extension(arg) chrome_options.add_extension(arg)
@ -673,7 +673,7 @@ def _dict_to_chrome_options(options: dict) -> Options:
# 扩展设置 # 扩展设置
if options.get('extensions', None): if options.get('extensions', None):
if not isinstance(options['extensions'], list): if not isinstance(options['extensions'], list):
raise Exception(f'Extensions need listnot {type(options["extensions"])}.') raise Exception(f'extensions必须是list现在是{type(options["extensions"])}')
for arg in options['extensions']: for arg in options['extensions']:
chrome_options.add_encoded_extension(arg) chrome_options.add_encoded_extension(arg)
@ -681,7 +681,7 @@ def _dict_to_chrome_options(options: dict) -> Options:
# 实验性质的设置参数 # 实验性质的设置参数
if options.get('experimental_options', None): if options.get('experimental_options', None):
if not isinstance(options['experimental_options'], dict): if not isinstance(options['experimental_options'], dict):
raise Exception(f'Experimental options need dictnot {type(options["experimental_options"])}.') raise Exception(f'experimental_options必须是dict现在是{type(options["experimental_options"])}')
for i in options['experimental_options']: for i in options['experimental_options']:
chrome_options.add_experimental_option(i, options['experimental_options'][i]) chrome_options.add_experimental_option(i, options['experimental_options'][i])
@ -763,7 +763,6 @@ def _cookie_to_dict(cookie: Union[Cookie, str, dict]) -> dict:
attr_val = attr.lstrip().split('=') attr_val = attr.lstrip().split('=')
if key == 0: if key == 0:
# TODO: 检查
cookie_dict['name'] = attr_val[0] cookie_dict['name'] = attr_val[0]
cookie_dict['value'] = attr_val[1] if len(attr_val) == 2 else '' cookie_dict['value'] = attr_val[1] if len(attr_val) == 2 else ''
else: else:
@ -772,7 +771,7 @@ def _cookie_to_dict(cookie: Union[Cookie, str, dict]) -> dict:
return cookie_dict return cookie_dict
else: else:
raise TypeError raise TypeError('cookie参数必须为Cookie、str或dict类型。')
return cookie_dict return cookie_dict
@ -792,6 +791,6 @@ def _cookies_to_tuple(cookies: Union[RequestsCookieJar, list, tuple, str, dict])
cookies = tuple({'name': cookie, 'value': cookies[cookie]} for cookie in cookies) cookies = tuple({'name': cookie, 'value': cookies[cookie]} for cookie in cookies)
else: else:
raise TypeError raise TypeError('cookies参数必须为RequestsCookieJar、list、tuple、str或dict类型。')
return cookies return cookies

View File

@ -1,10 +1,10 @@
[paths] [paths]
chromedriver_path = D:\python\Google Chrome\Chrome\chromedriver75.exe chromedriver_path =
tmp_path = D:\python\projects\DrissionPage\DrissionPage\tmp tmp_path =
[chrome_options] [chrome_options]
debugger_address = 127.0.0.1:9222 debugger_address =
binary_location = D:\python\Google Chrome\Chrome\chrome.exe binary_location =
arguments = ['--no-sandbox', '--disable-gpu', '--ignore-certificate-errors', '--disable-infobars'] arguments = ['--no-sandbox', '--disable-gpu', '--ignore-certificate-errors', '--disable-infobars']
extensions = [] extensions = []
experimental_options = {'prefs': {'profile.default_content_settings.popups': 0, 'profile.default_content_setting_values': {'notifications': 2}, 'plugins.plugins_list': [{'enabled': False, 'name': 'Chrome PDF Viewer'}]}, 'useAutomationExtension': False, 'excludeSwitches': ['enable-automation']} experimental_options = {'prefs': {'profile.default_content_settings.popups': 0, 'profile.default_content_setting_values': {'notifications': 2}, 'plugins.plugins_list': [{'enabled': False, 'name': 'Chrome PDF Viewer'}]}, 'useAutomationExtension': False, 'excludeSwitches': ['enable-automation']}

View File

@ -84,7 +84,7 @@ class Drission(object):
""" """
if self._driver is None: if self._driver is None:
if not isinstance(self._driver_options, dict): if not isinstance(self._driver_options, dict):
raise TypeError('Driver options invalid') raise TypeError('无效的Driver配置。')
options = _dict_to_chrome_options(self._driver_options) options = _dict_to_chrome_options(self._driver_options)
@ -240,7 +240,7 @@ class Drission(object):
url = extract(browser_domain) url = extract(browser_domain)
cookie_domain = f'{url.domain}.{url.suffix}' cookie_domain = f'{url.domain}.{url.suffix}'
else: else:
raise ValueError('There is no domain name in the cookie or the browser has not visited a URL.') raise ValueError('cookie中没有域名或浏览器未访问过URL。')
cookie['domain'] = cookie_domain cookie['domain'] = cookie_domain

View File

@ -41,7 +41,7 @@ class DriverElement(DrissionElement):
:param loc_or_str: 元素的定位信息可以是loc元组或查询字符串 :param loc_or_str: 元素的定位信息可以是loc元组或查询字符串
:param mode: 'single' 'all'对应查找一个或全部 :param mode: 'single' 'all'对应查找一个或全部
:param timeout: 超时时间 :param timeout: 超时时间
:return: DriverElement对象 :return: DriverElement对象或属性文本
""" """
return super().__call__(loc_or_str, mode, timeout) return super().__call__(loc_or_str, mode, timeout)
@ -134,9 +134,14 @@ class DriverElement(DrissionElement):
loc_str = f'{self.css_path}{loc_or_str[1]}' loc_str = f'{self.css_path}{loc_or_str[1]}'
loc_or_str = loc_or_str[0], loc_str loc_or_str = loc_or_str[0], loc_str
return execute_driver_find(self, loc_or_str, mode, timeout) return make_driver_ele(self, loc_or_str, mode, timeout)
def s_ele(self, loc_or_ele, mode='single', timeout=None): def s_ele(self, loc_or_ele, mode='single'):
"""查找元素以SessionElement形式返回处理复杂页面时效率很高 \n
:param loc_or_ele: 元素的定位信息可以是loc元组或查询字符串
:param mode: 查找第一个或全部
:return: SessionElement对象或属性文本
"""
return make_session_ele(self, loc_or_ele, mode) return make_session_ele(self, loc_or_ele, mode)
def eles(self, def eles(self,
@ -168,7 +173,7 @@ class DriverElement(DrissionElement):
txt5 = '''return path.substr(1);''' txt5 = '''return path.substr(1);'''
else: else:
raise ValueError(f"Argument mode can only be 'xpath' or 'css', not '{mode}'.") raise ValueError(f"mode参数只能是'xpath''css',现在是:'{mode}'")
js = ''' js = '''
function e(el) { function e(el) {
@ -459,7 +464,7 @@ class DriverElement(DrissionElement):
self.run_script(f'arguments[0].removeAttribute("{attr}");') self.run_script(f'arguments[0].removeAttribute("{attr}");')
return True return True
except: except:
raise False return False
def drag(self, x: int, y: int, speed: int = 40, shake: bool = True) -> bool: def drag(self, x: int, y: int, speed: int = 40, shake: bool = True) -> bool:
"""拖拽当前元素到相对位置 \n """拖拽当前元素到相对位置 \n
@ -490,7 +495,7 @@ class DriverElement(DrissionElement):
elif isinstance(ele_or_loc, tuple): elif isinstance(ele_or_loc, tuple):
target_x, target_y = ele_or_loc target_x, target_y = ele_or_loc
else: else:
raise TypeError('Need DriverElement, WebElement object or coordinate information.') raise TypeError('需要DriverElement、WebElement对象或坐标。')
current_x = self.location['x'] + self.size['width'] // 2 current_x = self.location['x'] + self.size['width'] // 2
current_y = self.location['y'] + self.size['height'] // 2 current_y = self.location['y'] + self.size['height'] // 2
@ -525,10 +530,10 @@ class DriverElement(DrissionElement):
ActionChains(self.page.driver).move_to_element(self.inner_ele).perform() ActionChains(self.page.driver).move_to_element(self.inner_ele).perform()
def execute_driver_find(page_or_ele, def make_driver_ele(page_or_ele,
loc: Tuple[str, str], loc: Union[str, Tuple[str, str]],
mode: str = 'single', mode: str = 'single',
timeout: float = None) -> Union[DriverElement, List[DriverElement], str, None]: timeout: float = None) -> Union[DriverElement, List[DriverElement], str, None]:
"""执行driver模式元素的查找 \n """执行driver模式元素的查找 \n
页面查找元素及元素查找下级元素皆使用此方法 \n 页面查找元素及元素查找下级元素皆使用此方法 \n
:param page_or_ele: DriverPage对象或DriverElement对象 :param page_or_ele: DriverPage对象或DriverElement对象
@ -539,7 +544,7 @@ def execute_driver_find(page_or_ele,
""" """
mode = mode or 'single' mode = mode or 'single'
if mode not in ('single', 'all'): if mode not in ('single', 'all'):
raise ValueError(f"Argument mode can only be 'single' or 'all', not '{mode}'.") raise ValueError(f"mode参数只能是'single''all',现在是:'{mode}'")
if isinstance(page_or_ele, BaseElement): if isinstance(page_or_ele, BaseElement):
page = page_or_ele.page page = page_or_ele.page
@ -555,6 +560,15 @@ def execute_driver_find(page_or_ele,
page.wait_object._driver = driver page.wait_object._driver = driver
wait = page.wait_object wait = page.wait_object
# ---------------处理定位符---------------
if isinstance(loc, str):
loc = str_to_loc(loc)
elif isinstance(loc, tuple):
loc = translate_loc(loc)
else:
raise ValueError("定位符必须为str或长度为2的tuple。")
# ---------------执行查找-----------------
try: try:
# 使用xpath查找 # 使用xpath查找
if loc[0] == 'xpath': if loc[0] == 'xpath':
@ -572,7 +586,7 @@ def execute_driver_find(page_or_ele,
return [] if mode == 'all' else None return [] if mode == 'all' else None
except InvalidElementStateException: except InvalidElementStateException:
raise ValueError(f'Invalid query syntax. {loc}') raise ValueError(f'无效的查找语句:{loc}')
class ElementsByXpath(object): class ElementsByXpath(object):
@ -681,7 +695,7 @@ class Select(object):
:param ele: select 元素对象 :param ele: select 元素对象
""" """
if ele.tag != 'select': if ele.tag != 'select':
raise TypeError(f"Select only works on <select> elements, not on {ele.tag}") raise TypeError(f"select方法只能在<select>元素使用,现在是:{ele.tag}")
from selenium.webdriver.support.select import Select as sl from selenium.webdriver.support.select import Select as sl
self.inner_ele = ele self.inner_ele = ele
@ -821,7 +835,7 @@ class Select(object):
def invert(self) -> None: def invert(self) -> None:
"""反选""" """反选"""
if not self.is_multi: if not self.is_multi:
raise NotImplementedError("You may only deselect options of a multi-select") raise NotImplementedError("只能对多项选框执行反选。")
for i in self.options: for i in self.options:
i.click() i.click()
@ -839,7 +853,7 @@ def _wait_ele(page_or_ele,
:return: 等待是否成功 :return: 等待是否成功
""" """
if mode.lower() not in ('del', 'display', 'hidden'): if mode.lower() not in ('del', 'display', 'hidden'):
raise ValueError('Argument mode can only be "del", "display", "hidden"') raise ValueError('mode参数只能是"del""display""hidden"')
if isinstance(page_or_ele, DrissionElement): # TODO: 是否要改为 BaseElement if isinstance(page_or_ele, DrissionElement): # TODO: 是否要改为 BaseElement
page = page_or_ele.page page = page_or_ele.page
@ -865,7 +879,7 @@ def _wait_ele(page_or_ele,
pass pass
else: else:
raise TypeError('The type of loc_or_ele can only be str, tuple, DriverElement, WebElement') raise TypeError('loc_or_ele参数只能是str、tuple、DriverElement 或 WebElement类型')
# 当传入参数是元素对象时 # 当传入参数是元素对象时
if is_ele: if is_ele:

View File

@ -17,8 +17,8 @@ from selenium.webdriver.remote.webelement import WebElement
from selenium.webdriver.support.wait import WebDriverWait from selenium.webdriver.support.wait import WebDriverWait
from .base import BasePage from .base import BasePage
from .common import str_to_loc, get_available_file_name, translate_loc, format_html from .common import get_available_file_name, format_html
from .driver_element import DriverElement, execute_driver_find, _wait_ele from .driver_element import DriverElement, make_driver_ele, _wait_ele
from .session_element import make_session_ele from .session_element import make_session_ele
@ -34,13 +34,13 @@ class DriverPage(BasePage):
def __call__(self, def __call__(self,
loc_or_str: Union[Tuple[str, str], str, DriverElement, WebElement], loc_or_str: Union[Tuple[str, str], str, DriverElement, WebElement],
mode: str = 'single', mode: str = 'single',
timeout: float = None) -> Union[DriverElement, List[DriverElement]]: timeout: float = None) -> Union[DriverElement, List[DriverElement], str]:
"""在内部查找元素 \n """在内部查找元素 \n
ele = page('@id=ele_id') \n ele = page('@id=ele_id') \n
:param loc_or_str: 元素的定位信息可以是loc元组或查询字符串 :param loc_or_str: 元素的定位信息可以是loc元组或查询字符串
:param mode: 'single' 'all'对应查找一个或全部 :param mode: 'single' 'all'对应查找一个或全部
:param timeout: 超时时间 :param timeout: 超时时间
:return: DriverElement对象 :return: DriverElement对象或属性文本
""" """
return super().__call__(loc_or_str, mode, timeout) return super().__call__(loc_or_str, mode, timeout)
@ -107,12 +107,7 @@ class DriverPage(BasePage):
""" """
# 接收到字符串或元组获取定位loc元组 # 接收到字符串或元组获取定位loc元组
if isinstance(loc_or_ele, (str, tuple)): if isinstance(loc_or_ele, (str, tuple)):
if isinstance(loc_or_ele, str): return make_driver_ele(self, loc_or_ele, mode, timeout)
loc_or_ele = str_to_loc(loc_or_ele)
else:
if len(loc_or_ele) != 2:
raise ValueError("Len of loc_or_ele must be 2 when it's a tuple.")
loc_or_ele = translate_loc(loc_or_ele)
# 接收到DriverElement对象直接返回 # 接收到DriverElement对象直接返回
elif isinstance(loc_or_ele, DriverElement): elif isinstance(loc_or_ele, DriverElement):
@ -124,11 +119,14 @@ class DriverPage(BasePage):
# 接收到的类型不正确,抛出异常 # 接收到的类型不正确,抛出异常
else: else:
raise ValueError('Argument loc_or_str can only be tuple, str, DriverElement, DriverElement.') raise ValueError('loc_or_str参数只能是tuple、str、DriverElement 或 DriverElement类型。')
return execute_driver_find(self, loc_or_ele, mode, timeout) def s_ele(self, loc_or_ele, mode='single'):
"""查找元素以SessionElement形式返回处理复杂页面时效率很高 \n
def s_ele(self, loc_or_ele, mode='single', timeout=None): :param loc_or_ele: 元素的定位信息可以是loc元组或查询字符串
:param mode: 查找第一个或全部
:return: SessionElement对象或属性文本
"""
return make_session_ele(self, loc_or_ele, mode) return make_session_ele(self, loc_or_ele, mode)
def eles(self, def eles(self,
@ -192,7 +190,7 @@ class DriverPage(BasePage):
print(f'重试 {to_url}') print(f'重试 {to_url}')
if is_ok is False and show_errmsg: if is_ok is False and show_errmsg:
raise err if err is not None else ConnectionError('Connect error.') raise err if err is not None else ConnectionError('连接异常。')
return is_ok return is_ok
@ -295,7 +293,7 @@ class DriverPage(BasePage):
elif isinstance(tab, (list, tuple)): elif isinstance(tab, (list, tuple)):
page_handle = tab page_handle = tab
else: else:
raise TypeError('Argument num_or_handle can only be int, str, list or tuple.') raise TypeError('num_or_handle参数只能是int、str、list 或 tuple类型。')
for i in tabs: # 遍历所有标签页,关闭非保留的 for i in tabs: # 遍历所有标签页,关闭非保留的
if i not in page_handle: if i not in page_handle:
@ -422,8 +420,8 @@ class DriverPage(BasePage):
self.driver.execute_script(f"window.scrollBy({pixel},0);") self.driver.execute_script(f"window.scrollBy({pixel},0);")
else: else:
raise ValueError("Argument mode can only be " raise ValueError("mode参数只能是'top', 'bottom', 'half', 'rightmost', "
"'top', 'bottom', 'half', 'rightmost', 'leftmost', 'up', 'down', 'left', 'right'.") "'leftmost', 'up', 'down', 'left', 'right'")
def refresh(self) -> None: def refresh(self) -> None:
"""刷新当前页面""" """刷新当前页面"""
@ -447,7 +445,7 @@ class DriverPage(BasePage):
else: else:
if x < 0 or y < 0: if x < 0 or y < 0:
raise ValueError('Arguments x and y must greater than 0.') raise ValueError('x 和 y参数必须大于0。')
new_x = x or self.driver.get_window_size()['width'] new_x = x or self.driver.get_window_size()['width']
new_y = y or self.driver.get_window_size()['height'] new_y = y or self.driver.get_window_size()['height']

View File

@ -58,7 +58,7 @@ class MixPage(SessionPage, DriverPage, BasePage):
:param loc_or_str: 元素的定位信息可以是loc元组或查询字符串 :param loc_or_str: 元素的定位信息可以是loc元组或查询字符串
:param mode: 'single' 'all'对应查找一个或全部 :param mode: 'single' 'all'对应查找一个或全部
:param timeout: 超时时间 :param timeout: 超时时间
:return: DriverElement对象 :return: 子元素对象或属性文本
""" """
return super().__call__(loc_or_str, mode, timeout) return super().__call__(loc_or_str, mode, timeout)
@ -133,19 +133,24 @@ class MixPage(SessionPage, DriverPage, BasePage):
elif self._mode == 'd': elif self._mode == 'd':
return super(SessionPage, self).ele(loc_or_ele, mode=mode, timeout=timeout) return super(SessionPage, self).ele(loc_or_ele, mode=mode, timeout=timeout)
def s_ele(self, loc_or_ele, mode='single', timeout=None): def s_ele(self, loc_or_ele, mode='single') -> Union[SessionElement, List[SessionElement], List[str]]:
"""查找元素以SessionElement形式返回处理复杂页面时效率很高 \n
:param loc_or_ele: 元素的定位信息可以是loc元组或查询字符串
:param mode: 查找第一个或全部
:return: SessionElement对象或属性文本
"""
if self._mode == 's': if self._mode == 's':
return super().s_ele(loc_or_ele, mode=mode) return super().s_ele(loc_or_ele, mode=mode)
elif self._mode == 'd': elif self._mode == 'd':
return super(SessionPage, self).s_ele(loc_or_ele, mode=mode, timeout=timeout) return super(SessionPage, self).s_ele(loc_or_ele, mode=mode)
def eles(self, def eles(self,
loc_or_str: Union[Tuple[str, str], str], loc_or_str: Union[Tuple[str, str], str],
timeout: float = None) -> Union[List[DriverElement], List[SessionElement]]: timeout: float = None) -> Union[List[DriverElement], List[SessionElement], List[str]]:
"""返回页面中所有符合条件的元素、属性或节点文本 \n """返回页面中所有符合条件的元素、属性或节点文本 \n
:param loc_or_str: 元素的定位信息可以是loc元组或查询字符串 :param loc_or_str: 元素的定位信息可以是loc元组或查询字符串
:param timeout: 查找元素超时时间d模式专用 :param timeout: 查找元素超时时间d模式专用
:return: 元素对象或属性文本节点文本组成的列表 :return: 元素对象或属性文本组成的列表
""" """
return super(SessionPage, self).eles(loc_or_str, timeout=timeout) return super(SessionPage, self).eles(loc_or_str, timeout=timeout)
@ -368,8 +373,8 @@ class MixPage(SessionPage, DriverPage, BasePage):
path = download_path or self._drission.driver_options['experimental_options']['prefs'][ path = download_path or self._drission.driver_options['experimental_options']['prefs'][
'download.default_directory'] 'download.default_directory']
if not path: if not path:
raise raise ValueError('未指定下载路径。')
except: except:
raise IOError('Download path not found.') raise IOError('无法找到下载路径。')
return super().chrome_downloading(path) return super().chrome_downloading(path)

View File

@ -11,7 +11,7 @@ from urllib.parse import urlparse, urljoin, urlunparse
from lxml.etree import tostring from lxml.etree import tostring
from lxml.html import HtmlElement, fromstring from lxml.html import HtmlElement, fromstring
from .base import DrissionElement from .base import DrissionElement, BasePage, BaseElement
from .common import str_to_loc, translate_loc, format_html from .common import str_to_loc, translate_loc, format_html
@ -31,7 +31,7 @@ class SessionElement(DrissionElement):
:param loc_or_str: 元素的定位信息可以是loc元组或查询字符串 :param loc_or_str: 元素的定位信息可以是loc元组或查询字符串
:param mode: 'single' 'all'对应查找一个或全部 :param mode: 'single' 'all'对应查找一个或全部
:param timeout: 不起实际作用用于和父类对应 :param timeout: 不起实际作用用于和父类对应
:return: SessionElement对象 :return: SessionElement对象或属性文本
""" """
return super().__call__(loc_or_str, mode, timeout) return super().__call__(loc_or_str, mode, timeout)
@ -158,7 +158,6 @@ class SessionElement(DrissionElement):
element = self.page element = self.page
loc_or_str = loc_or_str[0], loc_str loc_or_str = loc_or_str[0], loc_str
return make_session_ele(element, loc_or_str, mode) return make_session_ele(element, loc_or_str, mode)
def eles(self, loc_or_str: Union[Tuple[str, str], str], timeout=None): def eles(self, loc_or_str: Union[Tuple[str, str], str], timeout=None):
@ -169,8 +168,13 @@ class SessionElement(DrissionElement):
""" """
return self.ele(loc_or_str, mode='all') return self.ele(loc_or_str, mode='all')
def s_ele(self, loc_or_str: Union[Tuple[str, str], str], mode: str = None, timeout=None): def s_ele(self, loc_or_str: Union[Tuple[str, str], str], mode: str = None):
return self.ele(loc_or_str, mode=mode, timeout=timeout) """返回当前元素下级符合条件的子元素、属性或节点文本,默认返回第一个 \n
:param loc_or_str: 元素的定位信息可以是loc元组或查询字符串
:param mode: 'single' 'all对应查找一个或全部
:return: SessionElement对象
"""
return self.ele(loc_or_str, mode=mode)
def _get_ele_path(self, mode) -> str: def _get_ele_path(self, mode) -> str:
"""获取css路径或xpath路径 """获取css路径或xpath路径
@ -217,37 +221,39 @@ class SessionElement(DrissionElement):
return link return link
def make_session_ele(page_or_ele, def make_session_ele(html_or_ele: Union[str, BaseElement, BasePage],
loc: Union[str, Tuple[str, str]], loc: Union[str, Tuple[str, str]],
mode: str = 'single', ) -> Union[SessionElement, List[SessionElement], str, None]: mode: str = 'single', ) -> Union[SessionElement, List[SessionElement], str, None]:
"""执行session模式元素的查找 \n """从接收到的对象或html文本中查找元素返回SessionElement对象 \n
页面查找元素及元素查找下级元素皆使用此方法 \n :param html_or_ele: html文本BaseParser对象
:param page_or_ele: SessionPage对象或SessionElement对象 :param loc: 定位元组或字符串
:param loc: 元素定位元组
:param mode: 'single' 'all'对应获取第一个或全部 :param mode: 'single' 'all'对应获取第一个或全部
:return: 返回SessionElement元素或列表 :return: 返回SessionElement元素或列表或属性文本
""" """
mode = mode or 'single' mode = mode or 'single'
if mode not in ('single', 'all'): if mode not in ('single', 'all'):
raise ValueError(f"Argument mode can only be 'single' or 'all', not '{mode}'.") raise ValueError(f"mode参数只能是'single''all',现在是:'{mode}'")
# 根据传入对象类型获取页面对象和lxml元素对象 # 根据传入对象类型获取页面对象和lxml元素对象
type_str = str(type(page_or_ele)) if isinstance(html_or_ele, SessionElement): # SessionElement
if isinstance(page_or_ele, str): # 直接传入html文本 page = html_or_ele.page
html_or_ele = html_or_ele.inner_ele
# html_or_ele = fromstring(sub(r'&nbsp;?', '&nbsp;', html_or_ele.response.text))
elif isinstance(html_or_ele, BasePage): # MixPage, DriverPage 或 SessionPage
page = html_or_ele
html_or_ele = fromstring(html_or_ele.html)
elif isinstance(html_or_ele, str): # 直接传入html文本
page = None page = None
page_or_ele = fromstring(page_or_ele) html_or_ele = fromstring(html_or_ele)
elif type_str.endswith("SessionElement'>"): # SessionElement
page = page_or_ele.page elif isinstance(html_or_ele, BaseElement): # DrissionElement 或 ShadowRootElement
page_or_ele = page_or_ele.inner_ele page = html_or_ele.page
elif "Page" in type_str: # MixPage, DriverPage 或 SessionPage html_or_ele = fromstring(html_or_ele.html)
page = page_or_ele
page_or_ele = fromstring(page_or_ele.html) else:
else: # DrissionElement 或 ShadowRootElement raise TypeError('html_or_ele参数只能是元素、页面对象或html文本。')
page = page_or_ele.page
page_or_ele = fromstring(page_or_ele.html)
# else: # 传入的是SessionPage对象
# page = page_or_ele
# page_or_ele = fromstring(sub(r'&nbsp;?', '&nbsp;', page_or_ele.response.text))
# ---------------处理定位符--------------- # ---------------处理定位符---------------
if isinstance(loc, str): if isinstance(loc, str):
@ -257,12 +263,12 @@ def make_session_ele(page_or_ele,
else: else:
raise ValueError("定位符必须为str或长度为2的tuple。") raise ValueError("定位符必须为str或长度为2的tuple。")
# ---------------执行搜索----------------- # ---------------执行查找-----------------
try: try:
if loc[0] == 'xpath': # 用lxml内置方法获取lxml的元素对象列表 if loc[0] == 'xpath': # 用lxml内置方法获取lxml的元素对象列表
ele = page_or_ele.xpath(loc[1]) ele = html_or_ele.xpath(loc[1])
else: # 用css selector获取元素对象列表 else: # 用css selector获取元素对象列表
ele = page_or_ele.cssselect(loc[1]) ele = html_or_ele.cssselect(loc[1])
if not isinstance(ele, list): # 结果不是列表,如数字 if not isinstance(ele, list): # 结果不是列表,如数字
return ele return ele
@ -282,8 +288,8 @@ def make_session_ele(page_or_ele,
except Exception as e: except Exception as e:
if 'Invalid expression' in str(e): if 'Invalid expression' in str(e):
raise SyntaxError(f'Invalid xpath syntax. {loc}') raise SyntaxError(f'无效的xpath语句{loc}')
elif 'Expected selector' in str(e): elif 'Expected selector' in str(e):
raise SyntaxError(f'Invalid css selector syntax. {loc}') raise SyntaxError(f'无效的css select语句{loc}')
raise e raise e

View File

@ -33,13 +33,13 @@ class SessionPage(BasePage):
def __call__(self, def __call__(self,
loc_or_str: Union[Tuple[str, str], str, SessionElement], loc_or_str: Union[Tuple[str, str], str, SessionElement],
mode: str = 'single', mode: str = 'single',
timeout: float = None) -> Union[SessionElement, List[SessionElement]]: timeout: float = None) -> Union[SessionElement, List[SessionElement], str]:
"""在内部查找元素 \n """在内部查找元素 \n
ele2 = ele1('@id=ele_id') \n ele2 = ele1('@id=ele_id') \n
:param loc_or_str: 元素的定位信息可以是loc元组或查询字符串 :param loc_or_str: 元素的定位信息可以是loc元组或查询字符串
:param mode: 'single' 'all'对应查找一个或全部 :param mode: 'single' 'all'对应查找一个或全部
:param timeout: 不起实际作用用于和父类对应 :param timeout: 不起实际作用用于和父类对应
:return: SessionElement对象 :return: SessionElement对象或属性文本
""" """
return super().__call__(loc_or_str, mode, timeout) return super().__call__(loc_or_str, mode, timeout)
@ -94,7 +94,7 @@ class SessionPage(BasePage):
else: else:
if show_errmsg: if show_errmsg:
raise ConnectionError(f'{to_url}\nStatus code: {self._response.status_code}.') raise ConnectionError(f'{to_url}\n连接状态码:{self._response.status_code}.')
self._url_available = False self._url_available = False
@ -120,8 +120,13 @@ class SessionPage(BasePage):
""" """
return super().eles(loc_or_str, timeout) return super().eles(loc_or_str, timeout)
def s_ele(self, loc_or_str: Union[Tuple[str, str], str], mode: str = None, timeout=None): def s_ele(self, loc_or_str: Union[Tuple[str, str], str], mode: str = None):
return self.ele(loc_or_str, mode=mode, timeout=timeout) """返回页面中符合条件的元素、属性或节点文本,默认返回第一个 \n
:param loc_or_str: 元素的定位信息可以是元素对象loc元组或查询字符串
:param mode: 'single' 'all对应查找一个或全部
:return: SessionElement对象
"""
return self.ele(loc_or_str, mode=mode)
def get_cookies(self, as_dict: bool = False, all_domains: bool = False) -> Union[dict, list]: def get_cookies(self, as_dict: bool = False, all_domains: bool = False) -> Union[dict, list]:
"""返回cookies \n """返回cookies \n
@ -180,7 +185,7 @@ class SessionPage(BasePage):
print(f'重试 {to_url}') print(f'重试 {to_url}')
if not r and show_errmsg: if not r and show_errmsg:
raise err if err is not None else ConnectionError('Connect error.') raise err if err is not None else ConnectionError('连接异常。')
return r return r
@ -232,7 +237,7 @@ class SessionPage(BasePage):
else: else:
if show_errmsg: if show_errmsg:
raise ConnectionError(f'Status code: {self._response.status_code}.') raise ConnectionError(f'连接状态码:{self._response.status_code}.')
self._url_available = False self._url_available = False
return self._url_available return self._url_available
@ -292,7 +297,7 @@ class SessionPage(BasePage):
if not r.ok: if not r.ok:
if errmsg: if errmsg:
raise ConnectionError(f'Status code: {r.status_code}.') raise ConnectionError(f'连接状态码:{r.status_code}.')
return False, f'Status code: {r.status_code}.' return False, f'Status code: {r.status_code}.'
@ -361,7 +366,7 @@ class SessionPage(BasePage):
pass pass
else: else:
raise ValueError("Argument file_exists can only be 'skip', 'overwrite', 'rename'.") raise ValueError("file_exists参数只能是'skip''overwrite''rename'")
# -------------------打印要下载的文件------------------- # -------------------打印要下载的文件-------------------
if msg: if msg:
@ -404,7 +409,7 @@ class SessionPage(BasePage):
else: else:
if full_path.stat().st_size == 0: if full_path.stat().st_size == 0:
if errmsg: if errmsg:
raise ValueError('File size is 0.') raise ValueError('文件大小为0。')
download_status, info = False, 'File size is 0.' download_status, info = False, 'File size is 0.'
@ -456,11 +461,11 @@ class SessionPage(BasePage):
""" """
if not url: if not url:
if show_errmsg: if show_errmsg:
raise ValueError('url is empty.') raise ValueError('URL为空。')
return None, 'url is empty.' return None, 'url is empty.'
if mode not in ('get', 'post'): if mode not in ('get', 'post'):
raise ValueError("Argument mode can only be 'get' or 'post'.") raise ValueError("mode参数只能是'get''post'")
url = quote(url, safe='/:&?=%;#@+!') url = quote(url, safe='/:&?=%;#@+!')

View File

@ -11,7 +11,7 @@ from selenium.webdriver.remote.webelement import WebElement
from .base import BaseElement from .base import BaseElement
from .common import format_html from .common import format_html
from .driver_element import execute_driver_find, DriverElement from .driver_element import make_driver_ele, DriverElement
from .session_element import make_session_ele from .session_element import make_session_ele
@ -26,13 +26,13 @@ class ShadowRootElement(BaseElement):
def __call__(self, def __call__(self,
loc_or_str: Union[Tuple[str, str], str], loc_or_str: Union[Tuple[str, str], str],
mode: str = 'single', mode: str = 'single',
timeout: float = None) -> Union[DriverElement, List[DriverElement]]: timeout: float = None) -> Union[DriverElement, List[DriverElement], str]:
"""在内部查找元素 \n """在内部查找元素 \n
ele2 = ele1('@id=ele_id') \n ele2 = ele1('@id=ele_id') \n
:param loc_or_str: 元素的定位信息可以是loc元组或查询字符串 :param loc_or_str: 元素的定位信息可以是loc元组或查询字符串
:param mode: 'single' 'all'对应查找一个或全部 :param mode: 'single' 'all'对应查找一个或全部
:param timeout: 超时时间 :param timeout: 超时时间
:return: DriverElement对象 :return: DriverElement对象或属性文本
""" """
return self.ele(loc_or_str, mode, timeout) return self.ele(loc_or_str, mode, timeout)
@ -81,16 +81,21 @@ class ShadowRootElement(BaseElement):
loc_or_str = str_to_css_loc(loc_or_str) loc_or_str = str_to_css_loc(loc_or_str)
elif isinstance(loc_or_str, tuple) and len(loc_or_str) == 2: elif isinstance(loc_or_str, tuple) and len(loc_or_str) == 2:
if loc_or_str[0] == 'xpath': if loc_or_str[0] == 'xpath':
raise ValueError('不支持xpath') raise ValueError('不支持xpath')
else: else:
raise ValueError('Argument loc_or_str can only be tuple or str.') raise ValueError('loc_or_str参数只能是tuple或str类型。')
if loc_or_str[0] == 'css selector': if loc_or_str[0] == 'css selector':
return execute_driver_find(self, loc_or_str, mode, timeout) return make_driver_ele(self, loc_or_str, mode, timeout)
elif loc_or_str[0] == 'text': elif loc_or_str[0] == 'text':
return self._find_eles_by_text(loc_or_str[1], loc_or_str[2], loc_or_str[3], mode) return self._find_eles_by_text(loc_or_str[1], loc_or_str[2], loc_or_str[3], mode)
def s_ele(self, loc_or_ele, mode='single', timeout=None): def s_ele(self, loc_or_ele, mode='single'):
"""查找元素以SessionElement形式返回处理复杂页面时效率很高 \n
:param loc_or_ele: 元素的定位信息可以是loc元组或查询字符串
:param mode: 查找第一个或全部
:return: SessionElement对象或属性文本
"""
return make_session_ele(self, loc_or_ele, mode) return make_session_ele(self, loc_or_ele, mode)
def eles(self, def eles(self,
@ -213,7 +218,7 @@ def str_to_css_loc(loc: str) -> tuple:
loc = f'text{loc[2:]}' loc = f'text{loc[2:]}'
elif loc.startswith(('x:', 'x=', 'xpath:', 'xpath=')): elif loc.startswith(('x:', 'x=', 'xpath:', 'xpath=')):
raise ValueError('不支持xpath') raise ValueError('不支持xpath')
# 根据属性查找 # 根据属性查找
if loc.startswith('@'): if loc.startswith('@'):