mirror of
https://gitee.com/g1879/DrissionPage.git
synced 2024-12-10 04:00:23 +08:00
基本完成s_ele()
This commit is contained in:
parent
391d042635
commit
bab4461b58
@ -26,11 +26,12 @@ class BaseParser(object):
|
||||
timeout: float = None):
|
||||
return self.ele(loc_or_str, mode='all', timeout=timeout)
|
||||
|
||||
def s_eles(self,
|
||||
loc_or_str: Union[Tuple[str, str], str],
|
||||
timeout: float = None):
|
||||
"""查找并以SessionElement方式返回元素"""
|
||||
return self.s_ele(loc_or_str, mode='all', timeout=timeout)
|
||||
def s_eles(self, loc_or_str: Union[Tuple[str, str], str]):
|
||||
"""查找并以SessionElement方式返回元素 \n
|
||||
:param loc_or_str: 定位符
|
||||
:return: SessionElement或属性、文本组成的列表
|
||||
"""
|
||||
return self.s_ele(loc_or_str, mode='all')
|
||||
|
||||
# ----------------以下属性或方法待后代实现----------------
|
||||
@property
|
||||
@ -38,7 +39,7 @@ class BaseParser(object):
|
||||
return
|
||||
|
||||
@abstractmethod
|
||||
def s_ele(self, loc_or_ele, mode='single', timeout=None):
|
||||
def s_ele(self, loc_or_ele, mode='single'):
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
@ -169,7 +170,7 @@ class DrissionElement(BaseElement):
|
||||
elif mode == 'text':
|
||||
node_txt = 'text()'
|
||||
else:
|
||||
raise ValueError(f"Argument mode can only be 'node' ,'ele' or 'text', not '{mode}'.")
|
||||
raise ValueError(f"mode参数只能是'node'、'ele'或'text',现在是:'{mode}'。")
|
||||
|
||||
# 查找节点的方向
|
||||
if direction == 'next':
|
||||
@ -177,7 +178,7 @@ class DrissionElement(BaseElement):
|
||||
elif direction == 'prev':
|
||||
direction_txt = 'preceding'
|
||||
else:
|
||||
raise ValueError(f"Argument direction can only be 'next' or 'prev', not '{direction}'.")
|
||||
raise ValueError(f"direction参数只能是'next'或'prev',现在是:'{direction}'。")
|
||||
|
||||
timeout = 0 if direction == 'prev' else .5
|
||||
|
||||
|
@ -136,7 +136,7 @@ def _make_xpath_str(tag: str, arg: str, val: str, mode: str = 'fuzzy') -> str:
|
||||
return f"//*[{tag_name}contains({arg},{_make_search_str(val)})]"
|
||||
|
||||
else:
|
||||
raise ValueError("Argument mode can only be 'exact' or 'fuzzy'.")
|
||||
raise ValueError("mode参数只能是'exact'或'fuzzy'。")
|
||||
|
||||
|
||||
def _make_search_str(search_str: str) -> str:
|
||||
|
@ -657,7 +657,7 @@ def _dict_to_chrome_options(options: dict) -> Options:
|
||||
# 启动参数
|
||||
if options.get('arguments', None):
|
||||
if not isinstance(options['arguments'], list):
|
||||
raise Exception(f"Arguments need list,not {type(options['arguments'])}.")
|
||||
raise Exception(f"参数必须为list,现在是:{type(options['arguments'])}。")
|
||||
|
||||
for arg in options['arguments']:
|
||||
chrome_options.add_argument(arg)
|
||||
@ -665,7 +665,7 @@ def _dict_to_chrome_options(options: dict) -> Options:
|
||||
# 加载插件
|
||||
if options.get('extension_files', None):
|
||||
if not isinstance(options['extension_files'], list):
|
||||
raise Exception(f'Extension files need list,not {type(options["extension_files"])}.')
|
||||
raise Exception(f'extension_files必须是list,现在是:{type(options["extension_files"])}。')
|
||||
|
||||
for arg in options['extension_files']:
|
||||
chrome_options.add_extension(arg)
|
||||
@ -673,7 +673,7 @@ def _dict_to_chrome_options(options: dict) -> Options:
|
||||
# 扩展设置
|
||||
if options.get('extensions', None):
|
||||
if not isinstance(options['extensions'], list):
|
||||
raise Exception(f'Extensions need list,not {type(options["extensions"])}.')
|
||||
raise Exception(f'extensions必须是list,现在是:{type(options["extensions"])}。')
|
||||
|
||||
for arg in options['extensions']:
|
||||
chrome_options.add_encoded_extension(arg)
|
||||
@ -681,7 +681,7 @@ def _dict_to_chrome_options(options: dict) -> Options:
|
||||
# 实验性质的设置参数
|
||||
if options.get('experimental_options', None):
|
||||
if not isinstance(options['experimental_options'], dict):
|
||||
raise Exception(f'Experimental options need dict,not {type(options["experimental_options"])}.')
|
||||
raise Exception(f'experimental_options必须是dict,现在是:{type(options["experimental_options"])}。')
|
||||
|
||||
for i in options['experimental_options']:
|
||||
chrome_options.add_experimental_option(i, options['experimental_options'][i])
|
||||
@ -763,7 +763,6 @@ def _cookie_to_dict(cookie: Union[Cookie, str, dict]) -> dict:
|
||||
attr_val = attr.lstrip().split('=')
|
||||
|
||||
if key == 0:
|
||||
# TODO: 检查
|
||||
cookie_dict['name'] = attr_val[0]
|
||||
cookie_dict['value'] = attr_val[1] if len(attr_val) == 2 else ''
|
||||
else:
|
||||
@ -772,7 +771,7 @@ def _cookie_to_dict(cookie: Union[Cookie, str, dict]) -> dict:
|
||||
return cookie_dict
|
||||
|
||||
else:
|
||||
raise TypeError
|
||||
raise TypeError('cookie参数必须为Cookie、str或dict类型。')
|
||||
|
||||
return cookie_dict
|
||||
|
||||
@ -792,6 +791,6 @@ def _cookies_to_tuple(cookies: Union[RequestsCookieJar, list, tuple, str, dict])
|
||||
cookies = tuple({'name': cookie, 'value': cookies[cookie]} for cookie in cookies)
|
||||
|
||||
else:
|
||||
raise TypeError
|
||||
raise TypeError('cookies参数必须为RequestsCookieJar、list、tuple、str或dict类型。')
|
||||
|
||||
return cookies
|
||||
|
@ -1,10 +1,10 @@
|
||||
[paths]
|
||||
chromedriver_path = D:\python\Google Chrome\Chrome\chromedriver75.exe
|
||||
tmp_path = D:\python\projects\DrissionPage\DrissionPage\tmp
|
||||
chromedriver_path =
|
||||
tmp_path =
|
||||
|
||||
[chrome_options]
|
||||
debugger_address = 127.0.0.1:9222
|
||||
binary_location = D:\python\Google Chrome\Chrome\chrome.exe
|
||||
debugger_address =
|
||||
binary_location =
|
||||
arguments = ['--no-sandbox', '--disable-gpu', '--ignore-certificate-errors', '--disable-infobars']
|
||||
extensions = []
|
||||
experimental_options = {'prefs': {'profile.default_content_settings.popups': 0, 'profile.default_content_setting_values': {'notifications': 2}, 'plugins.plugins_list': [{'enabled': False, 'name': 'Chrome PDF Viewer'}]}, 'useAutomationExtension': False, 'excludeSwitches': ['enable-automation']}
|
||||
|
@ -84,7 +84,7 @@ class Drission(object):
|
||||
"""
|
||||
if self._driver is None:
|
||||
if not isinstance(self._driver_options, dict):
|
||||
raise TypeError('Driver options invalid')
|
||||
raise TypeError('无效的Driver配置。')
|
||||
|
||||
options = _dict_to_chrome_options(self._driver_options)
|
||||
|
||||
@ -240,7 +240,7 @@ class Drission(object):
|
||||
url = extract(browser_domain)
|
||||
cookie_domain = f'{url.domain}.{url.suffix}'
|
||||
else:
|
||||
raise ValueError('There is no domain name in the cookie or the browser has not visited a URL.')
|
||||
raise ValueError('cookie中没有域名或浏览器未访问过URL。')
|
||||
|
||||
cookie['domain'] = cookie_domain
|
||||
|
||||
|
@ -41,7 +41,7 @@ class DriverElement(DrissionElement):
|
||||
:param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串
|
||||
:param mode: 'single' 或 'all',对应查找一个或全部
|
||||
:param timeout: 超时时间
|
||||
:return: DriverElement对象
|
||||
:return: DriverElement对象或属性文本
|
||||
"""
|
||||
return super().__call__(loc_or_str, mode, timeout)
|
||||
|
||||
@ -134,9 +134,14 @@ class DriverElement(DrissionElement):
|
||||
loc_str = f'{self.css_path}{loc_or_str[1]}'
|
||||
|
||||
loc_or_str = loc_or_str[0], loc_str
|
||||
return execute_driver_find(self, loc_or_str, mode, timeout)
|
||||
return make_driver_ele(self, loc_or_str, mode, timeout)
|
||||
|
||||
def s_ele(self, loc_or_ele, mode='single', timeout=None):
|
||||
def s_ele(self, loc_or_ele, mode='single'):
|
||||
"""查找元素以SessionElement形式返回,处理复杂页面时效率很高 \n
|
||||
:param loc_or_ele: 元素的定位信息,可以是loc元组,或查询字符串
|
||||
:param mode: 查找第一个或全部
|
||||
:return: SessionElement对象或属性、文本
|
||||
"""
|
||||
return make_session_ele(self, loc_or_ele, mode)
|
||||
|
||||
def eles(self,
|
||||
@ -168,7 +173,7 @@ class DriverElement(DrissionElement):
|
||||
txt5 = '''return path.substr(1);'''
|
||||
|
||||
else:
|
||||
raise ValueError(f"Argument mode can only be 'xpath' or 'css', not '{mode}'.")
|
||||
raise ValueError(f"mode参数只能是'xpath'或'css',现在是:'{mode}'。")
|
||||
|
||||
js = '''
|
||||
function e(el) {
|
||||
@ -459,7 +464,7 @@ class DriverElement(DrissionElement):
|
||||
self.run_script(f'arguments[0].removeAttribute("{attr}");')
|
||||
return True
|
||||
except:
|
||||
raise False
|
||||
return False
|
||||
|
||||
def drag(self, x: int, y: int, speed: int = 40, shake: bool = True) -> bool:
|
||||
"""拖拽当前元素到相对位置 \n
|
||||
@ -490,7 +495,7 @@ class DriverElement(DrissionElement):
|
||||
elif isinstance(ele_or_loc, tuple):
|
||||
target_x, target_y = ele_or_loc
|
||||
else:
|
||||
raise TypeError('Need DriverElement, WebElement object or coordinate information.')
|
||||
raise TypeError('需要DriverElement、WebElement对象或坐标。')
|
||||
|
||||
current_x = self.location['x'] + self.size['width'] // 2
|
||||
current_y = self.location['y'] + self.size['height'] // 2
|
||||
@ -525,10 +530,10 @@ class DriverElement(DrissionElement):
|
||||
ActionChains(self.page.driver).move_to_element(self.inner_ele).perform()
|
||||
|
||||
|
||||
def execute_driver_find(page_or_ele,
|
||||
loc: Tuple[str, str],
|
||||
mode: str = 'single',
|
||||
timeout: float = None) -> Union[DriverElement, List[DriverElement], str, None]:
|
||||
def make_driver_ele(page_or_ele,
|
||||
loc: Union[str, Tuple[str, str]],
|
||||
mode: str = 'single',
|
||||
timeout: float = None) -> Union[DriverElement, List[DriverElement], str, None]:
|
||||
"""执行driver模式元素的查找 \n
|
||||
页面查找元素及元素查找下级元素皆使用此方法 \n
|
||||
:param page_or_ele: DriverPage对象或DriverElement对象
|
||||
@ -539,7 +544,7 @@ def execute_driver_find(page_or_ele,
|
||||
"""
|
||||
mode = mode or 'single'
|
||||
if mode not in ('single', 'all'):
|
||||
raise ValueError(f"Argument mode can only be 'single' or 'all', not '{mode}'.")
|
||||
raise ValueError(f"mode参数只能是'single'或'all',现在是:'{mode}'。")
|
||||
|
||||
if isinstance(page_or_ele, BaseElement):
|
||||
page = page_or_ele.page
|
||||
@ -555,6 +560,15 @@ def execute_driver_find(page_or_ele,
|
||||
page.wait_object._driver = driver
|
||||
wait = page.wait_object
|
||||
|
||||
# ---------------处理定位符---------------
|
||||
if isinstance(loc, str):
|
||||
loc = str_to_loc(loc)
|
||||
elif isinstance(loc, tuple):
|
||||
loc = translate_loc(loc)
|
||||
else:
|
||||
raise ValueError("定位符必须为str或长度为2的tuple。")
|
||||
|
||||
# ---------------执行查找-----------------
|
||||
try:
|
||||
# 使用xpath查找
|
||||
if loc[0] == 'xpath':
|
||||
@ -572,7 +586,7 @@ def execute_driver_find(page_or_ele,
|
||||
return [] if mode == 'all' else None
|
||||
|
||||
except InvalidElementStateException:
|
||||
raise ValueError(f'Invalid query syntax. {loc}')
|
||||
raise ValueError(f'无效的查找语句:{loc}')
|
||||
|
||||
|
||||
class ElementsByXpath(object):
|
||||
@ -681,7 +695,7 @@ class Select(object):
|
||||
:param ele: select 元素对象
|
||||
"""
|
||||
if ele.tag != 'select':
|
||||
raise TypeError(f"Select only works on <select> elements, not on {ele.tag}")
|
||||
raise TypeError(f"select方法只能在<select>元素使用,现在是:{ele.tag}。")
|
||||
|
||||
from selenium.webdriver.support.select import Select as sl
|
||||
self.inner_ele = ele
|
||||
@ -821,7 +835,7 @@ class Select(object):
|
||||
def invert(self) -> None:
|
||||
"""反选"""
|
||||
if not self.is_multi:
|
||||
raise NotImplementedError("You may only deselect options of a multi-select")
|
||||
raise NotImplementedError("只能对多项选框执行反选。")
|
||||
|
||||
for i in self.options:
|
||||
i.click()
|
||||
@ -839,7 +853,7 @@ def _wait_ele(page_or_ele,
|
||||
:return: 等待是否成功
|
||||
"""
|
||||
if mode.lower() not in ('del', 'display', 'hidden'):
|
||||
raise ValueError('Argument mode can only be "del", "display", "hidden"')
|
||||
raise ValueError('mode参数只能是"del"、"display"或"hidden"。')
|
||||
|
||||
if isinstance(page_or_ele, DrissionElement): # TODO: 是否要改为 BaseElement
|
||||
page = page_or_ele.page
|
||||
@ -865,7 +879,7 @@ def _wait_ele(page_or_ele,
|
||||
pass
|
||||
|
||||
else:
|
||||
raise TypeError('The type of loc_or_ele can only be str, tuple, DriverElement, WebElement')
|
||||
raise TypeError('loc_or_ele参数只能是str、tuple、DriverElement 或 WebElement类型')
|
||||
|
||||
# 当传入参数是元素对象时
|
||||
if is_ele:
|
||||
|
@ -17,8 +17,8 @@ from selenium.webdriver.remote.webelement import WebElement
|
||||
from selenium.webdriver.support.wait import WebDriverWait
|
||||
|
||||
from .base import BasePage
|
||||
from .common import str_to_loc, get_available_file_name, translate_loc, format_html
|
||||
from .driver_element import DriverElement, execute_driver_find, _wait_ele
|
||||
from .common import get_available_file_name, format_html
|
||||
from .driver_element import DriverElement, make_driver_ele, _wait_ele
|
||||
from .session_element import make_session_ele
|
||||
|
||||
|
||||
@ -34,13 +34,13 @@ class DriverPage(BasePage):
|
||||
def __call__(self,
|
||||
loc_or_str: Union[Tuple[str, str], str, DriverElement, WebElement],
|
||||
mode: str = 'single',
|
||||
timeout: float = None) -> Union[DriverElement, List[DriverElement]]:
|
||||
timeout: float = None) -> Union[DriverElement, List[DriverElement], str]:
|
||||
"""在内部查找元素 \n
|
||||
例:ele = page('@id=ele_id') \n
|
||||
:param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串
|
||||
:param mode: 'single' 或 'all',对应查找一个或全部
|
||||
:param timeout: 超时时间
|
||||
:return: DriverElement对象
|
||||
:return: DriverElement对象或属性文本
|
||||
"""
|
||||
return super().__call__(loc_or_str, mode, timeout)
|
||||
|
||||
@ -107,12 +107,7 @@ class DriverPage(BasePage):
|
||||
"""
|
||||
# 接收到字符串或元组,获取定位loc元组
|
||||
if isinstance(loc_or_ele, (str, tuple)):
|
||||
if isinstance(loc_or_ele, str):
|
||||
loc_or_ele = str_to_loc(loc_or_ele)
|
||||
else:
|
||||
if len(loc_or_ele) != 2:
|
||||
raise ValueError("Len of loc_or_ele must be 2 when it's a tuple.")
|
||||
loc_or_ele = translate_loc(loc_or_ele)
|
||||
return make_driver_ele(self, loc_or_ele, mode, timeout)
|
||||
|
||||
# 接收到DriverElement对象直接返回
|
||||
elif isinstance(loc_or_ele, DriverElement):
|
||||
@ -124,11 +119,14 @@ class DriverPage(BasePage):
|
||||
|
||||
# 接收到的类型不正确,抛出异常
|
||||
else:
|
||||
raise ValueError('Argument loc_or_str can only be tuple, str, DriverElement, DriverElement.')
|
||||
raise ValueError('loc_or_str参数只能是tuple、str、DriverElement 或 DriverElement类型。')
|
||||
|
||||
return execute_driver_find(self, loc_or_ele, mode, timeout)
|
||||
|
||||
def s_ele(self, loc_or_ele, mode='single', timeout=None):
|
||||
def s_ele(self, loc_or_ele, mode='single'):
|
||||
"""查找元素以SessionElement形式返回,处理复杂页面时效率很高 \n
|
||||
:param loc_or_ele: 元素的定位信息,可以是loc元组,或查询字符串
|
||||
:param mode: 查找第一个或全部
|
||||
:return: SessionElement对象或属性、文本
|
||||
"""
|
||||
return make_session_ele(self, loc_or_ele, mode)
|
||||
|
||||
def eles(self,
|
||||
@ -192,7 +190,7 @@ class DriverPage(BasePage):
|
||||
print(f'重试 {to_url}')
|
||||
|
||||
if is_ok is False and show_errmsg:
|
||||
raise err if err is not None else ConnectionError('Connect error.')
|
||||
raise err if err is not None else ConnectionError('连接异常。')
|
||||
|
||||
return is_ok
|
||||
|
||||
@ -295,7 +293,7 @@ class DriverPage(BasePage):
|
||||
elif isinstance(tab, (list, tuple)):
|
||||
page_handle = tab
|
||||
else:
|
||||
raise TypeError('Argument num_or_handle can only be int, str, list or tuple.')
|
||||
raise TypeError('num_or_handle参数只能是int、str、list 或 tuple类型。')
|
||||
|
||||
for i in tabs: # 遍历所有标签页,关闭非保留的
|
||||
if i not in page_handle:
|
||||
@ -422,8 +420,8 @@ class DriverPage(BasePage):
|
||||
self.driver.execute_script(f"window.scrollBy({pixel},0);")
|
||||
|
||||
else:
|
||||
raise ValueError("Argument mode can only be "
|
||||
"'top', 'bottom', 'half', 'rightmost', 'leftmost', 'up', 'down', 'left', 'right'.")
|
||||
raise ValueError("mode参数只能是'top', 'bottom', 'half', 'rightmost', "
|
||||
"'leftmost', 'up', 'down', 'left', 'right'。")
|
||||
|
||||
def refresh(self) -> None:
|
||||
"""刷新当前页面"""
|
||||
@ -447,7 +445,7 @@ class DriverPage(BasePage):
|
||||
|
||||
else:
|
||||
if x < 0 or y < 0:
|
||||
raise ValueError('Arguments x and y must greater than 0.')
|
||||
raise ValueError('x 和 y参数必须大于0。')
|
||||
|
||||
new_x = x or self.driver.get_window_size()['width']
|
||||
new_y = y or self.driver.get_window_size()['height']
|
||||
|
@ -58,7 +58,7 @@ class MixPage(SessionPage, DriverPage, BasePage):
|
||||
:param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串
|
||||
:param mode: 'single' 或 'all',对应查找一个或全部
|
||||
:param timeout: 超时时间
|
||||
:return: DriverElement对象
|
||||
:return: 子元素对象或属性文本
|
||||
"""
|
||||
return super().__call__(loc_or_str, mode, timeout)
|
||||
|
||||
@ -133,19 +133,24 @@ class MixPage(SessionPage, DriverPage, BasePage):
|
||||
elif self._mode == 'd':
|
||||
return super(SessionPage, self).ele(loc_or_ele, mode=mode, timeout=timeout)
|
||||
|
||||
def s_ele(self, loc_or_ele, mode='single', timeout=None):
|
||||
def s_ele(self, loc_or_ele, mode='single') -> Union[SessionElement, List[SessionElement], List[str]]:
|
||||
"""查找元素以SessionElement形式返回,处理复杂页面时效率很高 \n
|
||||
:param loc_or_ele: 元素的定位信息,可以是loc元组,或查询字符串
|
||||
:param mode: 查找第一个或全部
|
||||
:return: SessionElement对象或属性、文本
|
||||
"""
|
||||
if self._mode == 's':
|
||||
return super().s_ele(loc_or_ele, mode=mode)
|
||||
elif self._mode == 'd':
|
||||
return super(SessionPage, self).s_ele(loc_or_ele, mode=mode, timeout=timeout)
|
||||
return super(SessionPage, self).s_ele(loc_or_ele, mode=mode)
|
||||
|
||||
def eles(self,
|
||||
loc_or_str: Union[Tuple[str, str], str],
|
||||
timeout: float = None) -> Union[List[DriverElement], List[SessionElement]]:
|
||||
"""返回页面中所有符合条件的元素、属性或节点文本 \n
|
||||
timeout: float = None) -> Union[List[DriverElement], List[SessionElement], List[str]]:
|
||||
"""返回页面中所有符合条件的元素、属性或节点文本 \n
|
||||
:param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串
|
||||
:param timeout: 查找元素超时时间,d模式专用
|
||||
:return: 元素对象或属性、文本节点文本组成的列表
|
||||
:return: 元素对象或属性、文本组成的列表
|
||||
"""
|
||||
return super(SessionPage, self).eles(loc_or_str, timeout=timeout)
|
||||
|
||||
@ -368,8 +373,8 @@ class MixPage(SessionPage, DriverPage, BasePage):
|
||||
path = download_path or self._drission.driver_options['experimental_options']['prefs'][
|
||||
'download.default_directory']
|
||||
if not path:
|
||||
raise
|
||||
raise ValueError('未指定下载路径。')
|
||||
except:
|
||||
raise IOError('Download path not found.')
|
||||
raise IOError('无法找到下载路径。')
|
||||
|
||||
return super().chrome_downloading(path)
|
||||
|
@ -11,7 +11,7 @@ from urllib.parse import urlparse, urljoin, urlunparse
|
||||
from lxml.etree import tostring
|
||||
from lxml.html import HtmlElement, fromstring
|
||||
|
||||
from .base import DrissionElement
|
||||
from .base import DrissionElement, BasePage, BaseElement
|
||||
from .common import str_to_loc, translate_loc, format_html
|
||||
|
||||
|
||||
@ -31,7 +31,7 @@ class SessionElement(DrissionElement):
|
||||
:param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串
|
||||
:param mode: 'single' 或 'all',对应查找一个或全部
|
||||
:param timeout: 不起实际作用,用于和父类对应
|
||||
:return: SessionElement对象
|
||||
:return: SessionElement对象或属性文本
|
||||
"""
|
||||
return super().__call__(loc_or_str, mode, timeout)
|
||||
|
||||
@ -158,7 +158,6 @@ class SessionElement(DrissionElement):
|
||||
element = self.page
|
||||
|
||||
loc_or_str = loc_or_str[0], loc_str
|
||||
|
||||
return make_session_ele(element, loc_or_str, mode)
|
||||
|
||||
def eles(self, loc_or_str: Union[Tuple[str, str], str], timeout=None):
|
||||
@ -169,8 +168,13 @@ class SessionElement(DrissionElement):
|
||||
"""
|
||||
return self.ele(loc_or_str, mode='all')
|
||||
|
||||
def s_ele(self, loc_or_str: Union[Tuple[str, str], str], mode: str = None, timeout=None):
|
||||
return self.ele(loc_or_str, mode=mode, timeout=timeout)
|
||||
def s_ele(self, loc_or_str: Union[Tuple[str, str], str], mode: str = None):
|
||||
"""返回当前元素下级符合条件的子元素、属性或节点文本,默认返回第一个 \n
|
||||
:param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串
|
||||
:param mode: 'single' 或 'all‘,对应查找一个或全部
|
||||
:return: SessionElement对象
|
||||
"""
|
||||
return self.ele(loc_or_str, mode=mode)
|
||||
|
||||
def _get_ele_path(self, mode) -> str:
|
||||
"""获取css路径或xpath路径
|
||||
@ -217,37 +221,39 @@ class SessionElement(DrissionElement):
|
||||
return link
|
||||
|
||||
|
||||
def make_session_ele(page_or_ele,
|
||||
def make_session_ele(html_or_ele: Union[str, BaseElement, BasePage],
|
||||
loc: Union[str, Tuple[str, str]],
|
||||
mode: str = 'single', ) -> Union[SessionElement, List[SessionElement], str, None]:
|
||||
"""执行session模式元素的查找 \n
|
||||
页面查找元素及元素查找下级元素皆使用此方法 \n
|
||||
:param page_or_ele: SessionPage对象或SessionElement对象
|
||||
:param loc: 元素定位元组
|
||||
"""从接收到的对象或html文本中查找元素,返回SessionElement对象 \n
|
||||
:param html_or_ele: html文本、BaseParser对象
|
||||
:param loc: 定位元组或字符串
|
||||
:param mode: 'single' 或 'all',对应获取第一个或全部
|
||||
:return: 返回SessionElement元素或列表
|
||||
:return: 返回SessionElement元素或列表,或属性文本
|
||||
"""
|
||||
mode = mode or 'single'
|
||||
if mode not in ('single', 'all'):
|
||||
raise ValueError(f"Argument mode can only be 'single' or 'all', not '{mode}'.")
|
||||
raise ValueError(f"mode参数只能是'single'或'all',现在是:'{mode}'。")
|
||||
|
||||
# 根据传入对象类型获取页面对象和lxml元素对象
|
||||
type_str = str(type(page_or_ele))
|
||||
if isinstance(page_or_ele, str): # 直接传入html文本
|
||||
if isinstance(html_or_ele, SessionElement): # SessionElement
|
||||
page = html_or_ele.page
|
||||
html_or_ele = html_or_ele.inner_ele
|
||||
# html_or_ele = fromstring(sub(r' ?', ' ', html_or_ele.response.text))
|
||||
|
||||
elif isinstance(html_or_ele, BasePage): # MixPage, DriverPage 或 SessionPage
|
||||
page = html_or_ele
|
||||
html_or_ele = fromstring(html_or_ele.html)
|
||||
|
||||
elif isinstance(html_or_ele, str): # 直接传入html文本
|
||||
page = None
|
||||
page_or_ele = fromstring(page_or_ele)
|
||||
elif type_str.endswith("SessionElement'>"): # SessionElement
|
||||
page = page_or_ele.page
|
||||
page_or_ele = page_or_ele.inner_ele
|
||||
elif "Page" in type_str: # MixPage, DriverPage 或 SessionPage
|
||||
page = page_or_ele
|
||||
page_or_ele = fromstring(page_or_ele.html)
|
||||
else: # DrissionElement 或 ShadowRootElement
|
||||
page = page_or_ele.page
|
||||
page_or_ele = fromstring(page_or_ele.html)
|
||||
# else: # 传入的是SessionPage对象
|
||||
# page = page_or_ele
|
||||
# page_or_ele = fromstring(sub(r' ?', ' ', page_or_ele.response.text))
|
||||
html_or_ele = fromstring(html_or_ele)
|
||||
|
||||
elif isinstance(html_or_ele, BaseElement): # DrissionElement 或 ShadowRootElement
|
||||
page = html_or_ele.page
|
||||
html_or_ele = fromstring(html_or_ele.html)
|
||||
|
||||
else:
|
||||
raise TypeError('html_or_ele参数只能是元素、页面对象或html文本。')
|
||||
|
||||
# ---------------处理定位符---------------
|
||||
if isinstance(loc, str):
|
||||
@ -257,12 +263,12 @@ def make_session_ele(page_or_ele,
|
||||
else:
|
||||
raise ValueError("定位符必须为str或长度为2的tuple。")
|
||||
|
||||
# ---------------执行搜索-----------------
|
||||
# ---------------执行查找-----------------
|
||||
try:
|
||||
if loc[0] == 'xpath': # 用lxml内置方法获取lxml的元素对象列表
|
||||
ele = page_or_ele.xpath(loc[1])
|
||||
ele = html_or_ele.xpath(loc[1])
|
||||
else: # 用css selector获取元素对象列表
|
||||
ele = page_or_ele.cssselect(loc[1])
|
||||
ele = html_or_ele.cssselect(loc[1])
|
||||
|
||||
if not isinstance(ele, list): # 结果不是列表,如数字
|
||||
return ele
|
||||
@ -282,8 +288,8 @@ def make_session_ele(page_or_ele,
|
||||
|
||||
except Exception as e:
|
||||
if 'Invalid expression' in str(e):
|
||||
raise SyntaxError(f'Invalid xpath syntax. {loc}')
|
||||
raise SyntaxError(f'无效的xpath语句:{loc}')
|
||||
elif 'Expected selector' in str(e):
|
||||
raise SyntaxError(f'Invalid css selector syntax. {loc}')
|
||||
raise SyntaxError(f'无效的css select语句:{loc}')
|
||||
|
||||
raise e
|
||||
|
@ -33,13 +33,13 @@ class SessionPage(BasePage):
|
||||
def __call__(self,
|
||||
loc_or_str: Union[Tuple[str, str], str, SessionElement],
|
||||
mode: str = 'single',
|
||||
timeout: float = None) -> Union[SessionElement, List[SessionElement]]:
|
||||
timeout: float = None) -> Union[SessionElement, List[SessionElement], str]:
|
||||
"""在内部查找元素 \n
|
||||
例:ele2 = ele1('@id=ele_id') \n
|
||||
:param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串
|
||||
:param mode: 'single' 或 'all',对应查找一个或全部
|
||||
:param timeout: 不起实际作用,用于和父类对应
|
||||
:return: SessionElement对象
|
||||
:return: SessionElement对象或属性文本
|
||||
"""
|
||||
return super().__call__(loc_or_str, mode, timeout)
|
||||
|
||||
@ -94,7 +94,7 @@ class SessionPage(BasePage):
|
||||
|
||||
else:
|
||||
if show_errmsg:
|
||||
raise ConnectionError(f'{to_url}\nStatus code: {self._response.status_code}.')
|
||||
raise ConnectionError(f'{to_url}\n连接状态码:{self._response.status_code}.')
|
||||
|
||||
self._url_available = False
|
||||
|
||||
@ -120,8 +120,13 @@ class SessionPage(BasePage):
|
||||
"""
|
||||
return super().eles(loc_or_str, timeout)
|
||||
|
||||
def s_ele(self, loc_or_str: Union[Tuple[str, str], str], mode: str = None, timeout=None):
|
||||
return self.ele(loc_or_str, mode=mode, timeout=timeout)
|
||||
def s_ele(self, loc_or_str: Union[Tuple[str, str], str], mode: str = None):
|
||||
"""返回页面中符合条件的元素、属性或节点文本,默认返回第一个 \n
|
||||
:param loc_or_str: 元素的定位信息,可以是元素对象,loc元组,或查询字符串
|
||||
:param mode: 'single' 或 'all‘,对应查找一个或全部
|
||||
:return: SessionElement对象
|
||||
"""
|
||||
return self.ele(loc_or_str, mode=mode)
|
||||
|
||||
def get_cookies(self, as_dict: bool = False, all_domains: bool = False) -> Union[dict, list]:
|
||||
"""返回cookies \n
|
||||
@ -180,7 +185,7 @@ class SessionPage(BasePage):
|
||||
print(f'重试 {to_url}')
|
||||
|
||||
if not r and show_errmsg:
|
||||
raise err if err is not None else ConnectionError('Connect error.')
|
||||
raise err if err is not None else ConnectionError('连接异常。')
|
||||
|
||||
return r
|
||||
|
||||
@ -232,7 +237,7 @@ class SessionPage(BasePage):
|
||||
|
||||
else:
|
||||
if show_errmsg:
|
||||
raise ConnectionError(f'Status code: {self._response.status_code}.')
|
||||
raise ConnectionError(f'连接状态码:{self._response.status_code}.')
|
||||
self._url_available = False
|
||||
|
||||
return self._url_available
|
||||
@ -292,7 +297,7 @@ class SessionPage(BasePage):
|
||||
|
||||
if not r.ok:
|
||||
if errmsg:
|
||||
raise ConnectionError(f'Status code: {r.status_code}.')
|
||||
raise ConnectionError(f'连接状态码:{r.status_code}.')
|
||||
|
||||
return False, f'Status code: {r.status_code}.'
|
||||
|
||||
@ -361,7 +366,7 @@ class SessionPage(BasePage):
|
||||
pass
|
||||
|
||||
else:
|
||||
raise ValueError("Argument file_exists can only be 'skip', 'overwrite', 'rename'.")
|
||||
raise ValueError("file_exists参数只能是'skip'、'overwrite'或'rename'。")
|
||||
|
||||
# -------------------打印要下载的文件-------------------
|
||||
if msg:
|
||||
@ -404,7 +409,7 @@ class SessionPage(BasePage):
|
||||
else:
|
||||
if full_path.stat().st_size == 0:
|
||||
if errmsg:
|
||||
raise ValueError('File size is 0.')
|
||||
raise ValueError('文件大小为0。')
|
||||
|
||||
download_status, info = False, 'File size is 0.'
|
||||
|
||||
@ -456,11 +461,11 @@ class SessionPage(BasePage):
|
||||
"""
|
||||
if not url:
|
||||
if show_errmsg:
|
||||
raise ValueError('url is empty.')
|
||||
raise ValueError('URL为空。')
|
||||
return None, 'url is empty.'
|
||||
|
||||
if mode not in ('get', 'post'):
|
||||
raise ValueError("Argument mode can only be 'get' or 'post'.")
|
||||
raise ValueError("mode参数只能是'get'或'post'。")
|
||||
|
||||
url = quote(url, safe='/:&?=%;#@+!')
|
||||
|
||||
|
@ -11,7 +11,7 @@ from selenium.webdriver.remote.webelement import WebElement
|
||||
|
||||
from .base import BaseElement
|
||||
from .common import format_html
|
||||
from .driver_element import execute_driver_find, DriverElement
|
||||
from .driver_element import make_driver_ele, DriverElement
|
||||
from .session_element import make_session_ele
|
||||
|
||||
|
||||
@ -26,13 +26,13 @@ class ShadowRootElement(BaseElement):
|
||||
def __call__(self,
|
||||
loc_or_str: Union[Tuple[str, str], str],
|
||||
mode: str = 'single',
|
||||
timeout: float = None) -> Union[DriverElement, List[DriverElement]]:
|
||||
timeout: float = None) -> Union[DriverElement, List[DriverElement], str]:
|
||||
"""在内部查找元素 \n
|
||||
例:ele2 = ele1('@id=ele_id') \n
|
||||
:param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串
|
||||
:param mode: 'single' 或 'all',对应查找一个或全部
|
||||
:param timeout: 超时时间
|
||||
:return: DriverElement对象
|
||||
:return: DriverElement对象或属性文本
|
||||
"""
|
||||
return self.ele(loc_or_str, mode, timeout)
|
||||
|
||||
@ -81,16 +81,21 @@ class ShadowRootElement(BaseElement):
|
||||
loc_or_str = str_to_css_loc(loc_or_str)
|
||||
elif isinstance(loc_or_str, tuple) and len(loc_or_str) == 2:
|
||||
if loc_or_str[0] == 'xpath':
|
||||
raise ValueError('不支持xpath')
|
||||
raise ValueError('不支持xpath。')
|
||||
else:
|
||||
raise ValueError('Argument loc_or_str can only be tuple or str.')
|
||||
raise ValueError('loc_or_str参数只能是tuple或str类型。')
|
||||
|
||||
if loc_or_str[0] == 'css selector':
|
||||
return execute_driver_find(self, loc_or_str, mode, timeout)
|
||||
return make_driver_ele(self, loc_or_str, mode, timeout)
|
||||
elif loc_or_str[0] == 'text':
|
||||
return self._find_eles_by_text(loc_or_str[1], loc_or_str[2], loc_or_str[3], mode)
|
||||
|
||||
def s_ele(self, loc_or_ele, mode='single', timeout=None):
|
||||
def s_ele(self, loc_or_ele, mode='single'):
|
||||
"""查找元素以SessionElement形式返回,处理复杂页面时效率很高 \n
|
||||
:param loc_or_ele: 元素的定位信息,可以是loc元组,或查询字符串
|
||||
:param mode: 查找第一个或全部
|
||||
:return: SessionElement对象或属性、文本
|
||||
"""
|
||||
return make_session_ele(self, loc_or_ele, mode)
|
||||
|
||||
def eles(self,
|
||||
@ -213,7 +218,7 @@ def str_to_css_loc(loc: str) -> tuple:
|
||||
loc = f'text{loc[2:]}'
|
||||
|
||||
elif loc.startswith(('x:', 'x=', 'xpath:', 'xpath=')):
|
||||
raise ValueError('不支持xpath')
|
||||
raise ValueError('不支持xpath。')
|
||||
|
||||
# 根据属性查找
|
||||
if loc.startswith('@'):
|
||||
|
Loading…
x
Reference in New Issue
Block a user