mirror of
https://gitee.com/g1879/DrissionPage.git
synced 2024-12-10 04:00:23 +08:00
添加s_ele(),未完成
This commit is contained in:
parent
4241ac000d
commit
391d042635
@ -6,7 +6,7 @@
|
||||
"""
|
||||
from abc import abstractmethod
|
||||
from re import sub
|
||||
from typing import Union
|
||||
from typing import Union, Tuple
|
||||
|
||||
from lxml.html import HtmlElement
|
||||
from selenium.webdriver.remote.webelement import WebElement
|
||||
@ -21,14 +21,26 @@ class BaseParser(object):
|
||||
timeout: float = None):
|
||||
return self.ele(loc_or_str, mode, timeout)
|
||||
|
||||
def eles(self, loc_or_str, timeout):
|
||||
def eles(self,
|
||||
loc_or_str: Union[Tuple[str, str], str],
|
||||
timeout: float = None):
|
||||
return self.ele(loc_or_str, mode='all', timeout=timeout)
|
||||
|
||||
def s_eles(self,
|
||||
loc_or_str: Union[Tuple[str, str], str],
|
||||
timeout: float = None):
|
||||
"""查找并以SessionElement方式返回元素"""
|
||||
return self.s_ele(loc_or_str, mode='all', timeout=timeout)
|
||||
|
||||
# ----------------以下属性或方法待后代实现----------------
|
||||
@property
|
||||
def html(self):
|
||||
return
|
||||
|
||||
@abstractmethod
|
||||
def s_ele(self, loc_or_ele, mode='single', timeout=None):
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def ele(self, loc_or_ele, mode='single', timeout=None):
|
||||
pass
|
||||
@ -50,8 +62,8 @@ class BaseElement(BaseParser):
|
||||
"""返回后一个兄弟元素"""
|
||||
return self.nexts()
|
||||
|
||||
def eles(self, loc_or_str, timeout):
|
||||
return super().eles(loc_or_str, timeout)
|
||||
# def eles(self, loc_or_str, timeout):
|
||||
# return super().eles(loc_or_str, timeout)
|
||||
|
||||
# ----------------以下属性或方法由后代实现----------------
|
||||
@property
|
||||
@ -251,8 +263,8 @@ class BasePage(BaseParser):
|
||||
"""返回当前访问的url有效性"""
|
||||
return self._url_available
|
||||
|
||||
def eles(self, loc_or_str, timeout):
|
||||
return super().eles(loc_or_str, timeout)
|
||||
# def eles(self, loc_or_str, timeout):
|
||||
# return super().eles(loc_or_str, timeout)
|
||||
|
||||
# ----------------以下属性或方法由后代实现----------------
|
||||
@property
|
||||
|
@ -158,7 +158,11 @@ def _make_search_str(search_str: str) -> str:
|
||||
|
||||
|
||||
def format_html(text: str, trans: bool = True) -> str:
|
||||
"""处理html编码字符"""
|
||||
"""处理html编码字符 \n
|
||||
:param text: html文本
|
||||
:param trans: 是否转码
|
||||
:return: 格式化后的html文本
|
||||
"""
|
||||
if not text:
|
||||
return text
|
||||
|
||||
@ -173,8 +177,10 @@ def translate_loc(loc: tuple) -> tuple:
|
||||
:param loc: By类型的loc元组
|
||||
:return: css selector或xpath类型的loc元组
|
||||
"""
|
||||
if len(loc) != 2:
|
||||
raise ValueError('定位符长度必须为2。')
|
||||
|
||||
loc_by = 'xpath'
|
||||
loc_str = None
|
||||
|
||||
if loc[0] == 'xpath':
|
||||
loc_str = loc[1]
|
||||
@ -201,6 +207,9 @@ def translate_loc(loc: tuple) -> tuple:
|
||||
elif loc[0] == 'partial link text':
|
||||
loc_str = f'//a[contains(text(),"{loc[1]}")]'
|
||||
|
||||
else:
|
||||
raise ValueError('无法识别的定位符。')
|
||||
|
||||
return loc_by, loc_str
|
||||
|
||||
|
||||
|
@ -1,19 +1,19 @@
|
||||
[paths]
|
||||
chromedriver_path =
|
||||
tmp_path =
|
||||
chromedriver_path = D:\python\Google Chrome\Chrome\chromedriver75.exe
|
||||
tmp_path = D:\python\projects\DrissionPage\DrissionPage\tmp
|
||||
|
||||
[chrome_options]
|
||||
debugger_address =
|
||||
binary_location =
|
||||
debugger_address = 127.0.0.1:9222
|
||||
binary_location = D:\python\Google Chrome\Chrome\chrome.exe
|
||||
arguments = ['--no-sandbox', '--disable-gpu', '--ignore-certificate-errors', '--disable-infobars']
|
||||
extensions = []
|
||||
experimental_options = {'prefs': {'profile.default_content_settings.popups': 0, 'profile.default_content_setting_values': {'notifications': 2}, 'plugins.plugins_list': [{'enabled': False, 'name': 'Chrome PDF Viewer'}]}, 'useAutomationExtension': False, 'excludeSwitches': ['enable-automation']}
|
||||
|
||||
[session_options]
|
||||
headers = {
|
||||
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/603.3.8 (KHTML, like Gecko) Version/10.1.2 Safari/603.3.8",
|
||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
||||
"Connection": "keep-alive",
|
||||
"Accept-Charset": "GB2312,utf-8;q=0.7,*;q=0.7"
|
||||
}
|
||||
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/603.3.8 (KHTML, like Gecko) Version/10.1.2 Safari/603.3.8",
|
||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
||||
"Connection": "keep-alive",
|
||||
"Accept-Charset": "GB2312,utf-8;q=0.7,*;q=0.7"
|
||||
}
|
||||
|
||||
|
@ -18,6 +18,7 @@ from selenium.webdriver.support.wait import WebDriverWait
|
||||
|
||||
from .base import DrissionElement, BaseElement
|
||||
from .common import str_to_loc, get_available_file_name, translate_loc, format_html
|
||||
from .session_element import make_session_ele
|
||||
|
||||
|
||||
class DriverElement(DrissionElement):
|
||||
@ -123,16 +124,7 @@ class DriverElement(DrissionElement):
|
||||
:param timeout: 查找元素超时时间
|
||||
:return: DriverElement对象
|
||||
"""
|
||||
if isinstance(loc_or_str, (str, tuple)):
|
||||
if isinstance(loc_or_str, str):
|
||||
loc_or_str = str_to_loc(loc_or_str)
|
||||
else:
|
||||
if len(loc_or_str) != 2:
|
||||
raise ValueError("Len of loc_or_str must be 2 when it's a tuple.")
|
||||
loc_or_str = translate_loc(loc_or_str)
|
||||
else:
|
||||
raise ValueError('Argument loc_or_str can only be tuple or str.')
|
||||
|
||||
loc_or_str = str_to_loc(loc_or_str) if isinstance(loc_or_str, str) else translate_loc(loc_or_str)
|
||||
loc_str = loc_or_str[1]
|
||||
|
||||
if loc_or_str[0] == 'xpath' and loc_or_str[1].lstrip().startswith('/'):
|
||||
@ -142,9 +134,11 @@ class DriverElement(DrissionElement):
|
||||
loc_str = f'{self.css_path}{loc_or_str[1]}'
|
||||
|
||||
loc_or_str = loc_or_str[0], loc_str
|
||||
|
||||
return execute_driver_find(self, loc_or_str, mode, timeout)
|
||||
|
||||
def s_ele(self, loc_or_ele, mode='single', timeout=None):
|
||||
return make_session_ele(self, loc_or_ele, mode)
|
||||
|
||||
def eles(self,
|
||||
loc_or_str: Union[Tuple[str, str], str],
|
||||
timeout: float = None):
|
||||
|
@ -19,6 +19,7 @@ from selenium.webdriver.support.wait import WebDriverWait
|
||||
from .base import BasePage
|
||||
from .common import str_to_loc, get_available_file_name, translate_loc, format_html
|
||||
from .driver_element import DriverElement, execute_driver_find, _wait_ele
|
||||
from .session_element import make_session_ele
|
||||
|
||||
|
||||
class DriverPage(BasePage):
|
||||
@ -127,6 +128,9 @@ class DriverPage(BasePage):
|
||||
|
||||
return execute_driver_find(self, loc_or_ele, mode, timeout)
|
||||
|
||||
def s_ele(self, loc_or_ele, mode='single', timeout=None):
|
||||
return make_session_ele(self, loc_or_ele, mode)
|
||||
|
||||
def eles(self,
|
||||
loc_or_str: Union[Tuple[str, str], str],
|
||||
timeout: float = None) -> List[DriverElement]:
|
||||
@ -135,9 +139,6 @@ class DriverPage(BasePage):
|
||||
:param timeout: 查找元素超时时间
|
||||
:return: DriverElement对象组成的列表
|
||||
"""
|
||||
if not isinstance(loc_or_str, (tuple, str)):
|
||||
raise TypeError('Type of loc_or_str can only be tuple or str.')
|
||||
|
||||
return super().eles(loc_or_str, timeout)
|
||||
|
||||
def get_cookies(self, as_dict: bool = False) -> Union[list, dict]:
|
||||
|
@ -133,6 +133,12 @@ class MixPage(SessionPage, DriverPage, BasePage):
|
||||
elif self._mode == 'd':
|
||||
return super(SessionPage, self).ele(loc_or_ele, mode=mode, timeout=timeout)
|
||||
|
||||
def s_ele(self, loc_or_ele, mode='single', timeout=None):
|
||||
if self._mode == 's':
|
||||
return super().s_ele(loc_or_ele, mode=mode)
|
||||
elif self._mode == 'd':
|
||||
return super(SessionPage, self).s_ele(loc_or_ele, mode=mode, timeout=timeout)
|
||||
|
||||
def eles(self,
|
||||
loc_or_str: Union[Tuple[str, str], str],
|
||||
timeout: float = None) -> Union[List[DriverElement], List[SessionElement]]:
|
||||
|
@ -145,16 +145,7 @@ class SessionElement(DrissionElement):
|
||||
:param timeout: 不起实际作用,用于和父类对应
|
||||
:return: SessionElement对象
|
||||
"""
|
||||
if isinstance(loc_or_str, (str, tuple)):
|
||||
if isinstance(loc_or_str, str):
|
||||
loc_or_str = str_to_loc(loc_or_str)
|
||||
else:
|
||||
if len(loc_or_str) != 2:
|
||||
raise ValueError("Len of loc_or_str must be 2 when it's a tuple.")
|
||||
loc_or_str = translate_loc(loc_or_str)
|
||||
else:
|
||||
raise ValueError('Argument loc_or_str can only be tuple or str.')
|
||||
|
||||
loc_or_str = str_to_loc(loc_or_str) if isinstance(loc_or_str, str) else translate_loc(loc_or_str)
|
||||
element = self
|
||||
loc_str = loc_or_str[1]
|
||||
|
||||
@ -168,7 +159,7 @@ class SessionElement(DrissionElement):
|
||||
|
||||
loc_or_str = loc_or_str[0], loc_str
|
||||
|
||||
return execute_session_find(element, loc_or_str, mode)
|
||||
return make_session_ele(element, loc_or_str, mode)
|
||||
|
||||
def eles(self, loc_or_str: Union[Tuple[str, str], str], timeout=None):
|
||||
"""返回当前元素下级所有符合条件的子元素、属性或节点文本 \n
|
||||
@ -178,6 +169,9 @@ class SessionElement(DrissionElement):
|
||||
"""
|
||||
return self.ele(loc_or_str, mode='all')
|
||||
|
||||
def s_ele(self, loc_or_str: Union[Tuple[str, str], str], mode: str = None, timeout=None):
|
||||
return self.ele(loc_or_str, mode=mode, timeout=timeout)
|
||||
|
||||
def _get_ele_path(self, mode) -> str:
|
||||
"""获取css路径或xpath路径
|
||||
:param mode: 'css' 或 'xpath'
|
||||
@ -223,9 +217,9 @@ class SessionElement(DrissionElement):
|
||||
return link
|
||||
|
||||
|
||||
def execute_session_find(page_or_ele,
|
||||
loc: Tuple[str, str],
|
||||
mode: str = 'single', ) -> Union[SessionElement, List[SessionElement], str, None]:
|
||||
def make_session_ele(page_or_ele,
|
||||
loc: Union[str, Tuple[str, str]],
|
||||
mode: str = 'single', ) -> Union[SessionElement, List[SessionElement], str, None]:
|
||||
"""执行session模式元素的查找 \n
|
||||
页面查找元素及元素查找下级元素皆使用此方法 \n
|
||||
:param page_or_ele: SessionPage对象或SessionElement对象
|
||||
@ -238,30 +232,44 @@ def execute_session_find(page_or_ele,
|
||||
raise ValueError(f"Argument mode can only be 'single' or 'all', not '{mode}'.")
|
||||
|
||||
# 根据传入对象类型获取页面对象和lxml元素对象
|
||||
if isinstance(page_or_ele, SessionElement):
|
||||
type_str = str(type(page_or_ele))
|
||||
if isinstance(page_or_ele, str): # 直接传入html文本
|
||||
page = None
|
||||
page_or_ele = fromstring(page_or_ele)
|
||||
elif type_str.endswith("SessionElement'>"): # SessionElement
|
||||
page = page_or_ele.page
|
||||
page_or_ele = page_or_ele.inner_ele
|
||||
else: # 传入的是SessionPage对象
|
||||
elif "Page" in type_str: # MixPage, DriverPage 或 SessionPage
|
||||
page = page_or_ele
|
||||
page_or_ele = fromstring(sub(r' ?', ' ', page_or_ele.response.text))
|
||||
page_or_ele = fromstring(page_or_ele.html)
|
||||
else: # DrissionElement 或 ShadowRootElement
|
||||
page = page_or_ele.page
|
||||
page_or_ele = fromstring(page_or_ele.html)
|
||||
# else: # 传入的是SessionPage对象
|
||||
# page = page_or_ele
|
||||
# page_or_ele = fromstring(sub(r' ?', ' ', page_or_ele.response.text))
|
||||
|
||||
# ---------------处理定位符---------------
|
||||
if isinstance(loc, str):
|
||||
loc = str_to_loc(loc)
|
||||
elif isinstance(loc, tuple):
|
||||
loc = translate_loc(loc)
|
||||
else:
|
||||
raise ValueError("定位符必须为str或长度为2的tuple。")
|
||||
|
||||
# ---------------执行搜索-----------------
|
||||
try:
|
||||
# 用lxml内置方法获取lxml的元素对象列表
|
||||
if loc[0] == 'xpath':
|
||||
if loc[0] == 'xpath': # 用lxml内置方法获取lxml的元素对象列表
|
||||
ele = page_or_ele.xpath(loc[1])
|
||||
|
||||
# 用css selector获取元素对象列表
|
||||
else:
|
||||
else: # 用css selector获取元素对象列表
|
||||
ele = page_or_ele.cssselect(loc[1])
|
||||
|
||||
# 结果不是列表,如数字
|
||||
if not isinstance(ele, list):
|
||||
if not isinstance(ele, list): # 结果不是列表,如数字
|
||||
return ele
|
||||
|
||||
# 把lxml元素对象包装成SessionElement对象并按需要返回第一个或全部
|
||||
if mode == 'single':
|
||||
ele = ele[0] if ele else None
|
||||
|
||||
if isinstance(ele, HtmlElement):
|
||||
return SessionElement(ele, page)
|
||||
elif isinstance(ele, str):
|
||||
@ -273,7 +281,6 @@ def execute_session_find(page_or_ele,
|
||||
return [SessionElement(e, page) if isinstance(e, HtmlElement) else e for e in ele if e != '\n']
|
||||
|
||||
except Exception as e:
|
||||
|
||||
if 'Invalid expression' in str(e):
|
||||
raise SyntaxError(f'Invalid xpath syntax. {loc}')
|
||||
elif 'Expected selector' in str(e):
|
||||
|
@ -16,9 +16,9 @@ from requests import Session, Response
|
||||
from tldextract import extract
|
||||
|
||||
from .base import BasePage
|
||||
from .common import str_to_loc, translate_loc, get_available_file_name, format_html
|
||||
from .common import get_available_file_name, format_html
|
||||
from .config import _cookie_to_dict
|
||||
from .session_element import SessionElement, execute_session_find
|
||||
from .session_element import SessionElement, make_session_ele
|
||||
|
||||
|
||||
class SessionPage(BasePage):
|
||||
@ -102,41 +102,27 @@ class SessionPage(BasePage):
|
||||
|
||||
def ele(self,
|
||||
loc_or_ele: Union[Tuple[str, str], str, SessionElement],
|
||||
mode: str = None, timeout=None) -> Union[SessionElement, List[SessionElement], str, None]:
|
||||
mode: str = None,
|
||||
timeout=None) -> Union[SessionElement, List[SessionElement], str, None]:
|
||||
"""返回页面中符合条件的元素、属性或节点文本,默认返回第一个 \n
|
||||
:param loc_or_ele: 元素的定位信息,可以是元素对象,loc元组,或查询字符串
|
||||
:param mode: 'single' 或 'all‘,对应查找一个或全部
|
||||
:param timeout: 不起实际作用,用于和父类对应
|
||||
:return: SessionElement对象
|
||||
"""
|
||||
if isinstance(loc_or_ele, (str, tuple)):
|
||||
if isinstance(loc_or_ele, str):
|
||||
loc_or_ele = str_to_loc(loc_or_ele)
|
||||
else:
|
||||
if len(loc_or_ele) != 2:
|
||||
raise ValueError("Len of loc_or_ele must be 2 when it's a tuple.")
|
||||
loc_or_ele = translate_loc(loc_or_ele)
|
||||
return loc_or_ele if isinstance(loc_or_ele, SessionElement) else make_session_ele(self, loc_or_ele, mode)
|
||||
|
||||
elif isinstance(loc_or_ele, SessionElement):
|
||||
return loc_or_ele
|
||||
|
||||
else:
|
||||
raise ValueError('Argument loc_or_str can only be tuple, str, SessionElement, Element.')
|
||||
|
||||
return execute_session_find(self, loc_or_ele, mode)
|
||||
|
||||
def eles(self,
|
||||
loc_or_str: Union[Tuple[str, str], str], timeout=None) -> List[SessionElement]:
|
||||
def eles(self, loc_or_str: Union[Tuple[str, str], str], timeout=None) -> List[SessionElement]:
|
||||
"""返回页面中所有符合条件的元素、属性或节点文本 \n
|
||||
:param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串
|
||||
:param timeout: 不起实际作用,用于和父类对应
|
||||
:return: SessionElement对象组成的列表
|
||||
"""
|
||||
if not isinstance(loc_or_str, (tuple, str)):
|
||||
raise TypeError('Type of loc_or_str can only be tuple or str.')
|
||||
|
||||
return super().eles(loc_or_str, timeout)
|
||||
|
||||
def s_ele(self, loc_or_str: Union[Tuple[str, str], str], mode: str = None, timeout=None):
|
||||
return self.ele(loc_or_str, mode=mode, timeout=timeout)
|
||||
|
||||
def get_cookies(self, as_dict: bool = False, all_domains: bool = False) -> Union[dict, list]:
|
||||
"""返回cookies \n
|
||||
:param as_dict: 是否以字典方式返回
|
||||
|
@ -12,6 +12,7 @@ from selenium.webdriver.remote.webelement import WebElement
|
||||
from .base import BaseElement
|
||||
from .common import format_html
|
||||
from .driver_element import execute_driver_find, DriverElement
|
||||
from .session_element import make_session_ele
|
||||
|
||||
|
||||
class ShadowRootElement(BaseElement):
|
||||
@ -89,6 +90,9 @@ class ShadowRootElement(BaseElement):
|
||||
elif loc_or_str[0] == 'text':
|
||||
return self._find_eles_by_text(loc_or_str[1], loc_or_str[2], loc_or_str[3], mode)
|
||||
|
||||
def s_ele(self, loc_or_ele, mode='single', timeout=None):
|
||||
return make_session_ele(self, loc_or_ele, mode)
|
||||
|
||||
def eles(self,
|
||||
loc_or_str: Union[Tuple[str, str], str],
|
||||
timeout: float = None) -> List[DriverElement]:
|
||||
|
Loading…
x
Reference in New Issue
Block a user