添加s_ele(),未完成

This commit is contained in:
g1879 2021-11-19 21:48:00 +08:00
parent 4241ac000d
commit 391d042635
9 changed files with 99 additions and 80 deletions

View File

@ -6,7 +6,7 @@
"""
from abc import abstractmethod
from re import sub
from typing import Union
from typing import Union, Tuple
from lxml.html import HtmlElement
from selenium.webdriver.remote.webelement import WebElement
@ -21,14 +21,26 @@ class BaseParser(object):
timeout: float = None):
return self.ele(loc_or_str, mode, timeout)
def eles(self, loc_or_str, timeout):
def eles(self,
loc_or_str: Union[Tuple[str, str], str],
timeout: float = None):
return self.ele(loc_or_str, mode='all', timeout=timeout)
def s_eles(self,
loc_or_str: Union[Tuple[str, str], str],
timeout: float = None):
"""查找并以SessionElement方式返回元素"""
return self.s_ele(loc_or_str, mode='all', timeout=timeout)
# ----------------以下属性或方法待后代实现----------------
@property
def html(self):
return
@abstractmethod
def s_ele(self, loc_or_ele, mode='single', timeout=None):
pass
@abstractmethod
def ele(self, loc_or_ele, mode='single', timeout=None):
pass
@ -50,8 +62,8 @@ class BaseElement(BaseParser):
"""返回后一个兄弟元素"""
return self.nexts()
def eles(self, loc_or_str, timeout):
return super().eles(loc_or_str, timeout)
# def eles(self, loc_or_str, timeout):
# return super().eles(loc_or_str, timeout)
# ----------------以下属性或方法由后代实现----------------
@property
@ -251,8 +263,8 @@ class BasePage(BaseParser):
"""返回当前访问的url有效性"""
return self._url_available
def eles(self, loc_or_str, timeout):
return super().eles(loc_or_str, timeout)
# def eles(self, loc_or_str, timeout):
# return super().eles(loc_or_str, timeout)
# ----------------以下属性或方法由后代实现----------------
@property

View File

@ -158,7 +158,11 @@ def _make_search_str(search_str: str) -> str:
def format_html(text: str, trans: bool = True) -> str:
"""处理html编码字符"""
"""处理html编码字符 \n
:param text: html文本
:param trans: 是否转码
:return: 格式化后的html文本
"""
if not text:
return text
@ -173,8 +177,10 @@ def translate_loc(loc: tuple) -> tuple:
:param loc: By类型的loc元组
:return: css selector或xpath类型的loc元组
"""
if len(loc) != 2:
raise ValueError('定位符长度必须为2。')
loc_by = 'xpath'
loc_str = None
if loc[0] == 'xpath':
loc_str = loc[1]
@ -201,6 +207,9 @@ def translate_loc(loc: tuple) -> tuple:
elif loc[0] == 'partial link text':
loc_str = f'//a[contains(text(),"{loc[1]}")]'
else:
raise ValueError('无法识别的定位符。')
return loc_by, loc_str

View File

@ -1,19 +1,19 @@
[paths]
chromedriver_path =
tmp_path =
chromedriver_path = D:\python\Google Chrome\Chrome\chromedriver75.exe
tmp_path = D:\python\projects\DrissionPage\DrissionPage\tmp
[chrome_options]
debugger_address =
binary_location =
debugger_address = 127.0.0.1:9222
binary_location = D:\python\Google Chrome\Chrome\chrome.exe
arguments = ['--no-sandbox', '--disable-gpu', '--ignore-certificate-errors', '--disable-infobars']
extensions = []
experimental_options = {'prefs': {'profile.default_content_settings.popups': 0, 'profile.default_content_setting_values': {'notifications': 2}, 'plugins.plugins_list': [{'enabled': False, 'name': 'Chrome PDF Viewer'}]}, 'useAutomationExtension': False, 'excludeSwitches': ['enable-automation']}
[session_options]
headers = {
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/603.3.8 (KHTML, like Gecko) Version/10.1.2 Safari/603.3.8",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"Connection": "keep-alive",
"Accept-Charset": "GB2312,utf-8;q=0.7,*;q=0.7"
}
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/603.3.8 (KHTML, like Gecko) Version/10.1.2 Safari/603.3.8",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"Connection": "keep-alive",
"Accept-Charset": "GB2312,utf-8;q=0.7,*;q=0.7"
}

View File

@ -18,6 +18,7 @@ from selenium.webdriver.support.wait import WebDriverWait
from .base import DrissionElement, BaseElement
from .common import str_to_loc, get_available_file_name, translate_loc, format_html
from .session_element import make_session_ele
class DriverElement(DrissionElement):
@ -123,16 +124,7 @@ class DriverElement(DrissionElement):
:param timeout: 查找元素超时时间
:return: DriverElement对象
"""
if isinstance(loc_or_str, (str, tuple)):
if isinstance(loc_or_str, str):
loc_or_str = str_to_loc(loc_or_str)
else:
if len(loc_or_str) != 2:
raise ValueError("Len of loc_or_str must be 2 when it's a tuple.")
loc_or_str = translate_loc(loc_or_str)
else:
raise ValueError('Argument loc_or_str can only be tuple or str.')
loc_or_str = str_to_loc(loc_or_str) if isinstance(loc_or_str, str) else translate_loc(loc_or_str)
loc_str = loc_or_str[1]
if loc_or_str[0] == 'xpath' and loc_or_str[1].lstrip().startswith('/'):
@ -142,9 +134,11 @@ class DriverElement(DrissionElement):
loc_str = f'{self.css_path}{loc_or_str[1]}'
loc_or_str = loc_or_str[0], loc_str
return execute_driver_find(self, loc_or_str, mode, timeout)
def s_ele(self, loc_or_ele, mode='single', timeout=None):
return make_session_ele(self, loc_or_ele, mode)
def eles(self,
loc_or_str: Union[Tuple[str, str], str],
timeout: float = None):

View File

@ -19,6 +19,7 @@ from selenium.webdriver.support.wait import WebDriverWait
from .base import BasePage
from .common import str_to_loc, get_available_file_name, translate_loc, format_html
from .driver_element import DriverElement, execute_driver_find, _wait_ele
from .session_element import make_session_ele
class DriverPage(BasePage):
@ -127,6 +128,9 @@ class DriverPage(BasePage):
return execute_driver_find(self, loc_or_ele, mode, timeout)
def s_ele(self, loc_or_ele, mode='single', timeout=None):
return make_session_ele(self, loc_or_ele, mode)
def eles(self,
loc_or_str: Union[Tuple[str, str], str],
timeout: float = None) -> List[DriverElement]:
@ -135,9 +139,6 @@ class DriverPage(BasePage):
:param timeout: 查找元素超时时间
:return: DriverElement对象组成的列表
"""
if not isinstance(loc_or_str, (tuple, str)):
raise TypeError('Type of loc_or_str can only be tuple or str.')
return super().eles(loc_or_str, timeout)
def get_cookies(self, as_dict: bool = False) -> Union[list, dict]:

View File

@ -133,6 +133,12 @@ class MixPage(SessionPage, DriverPage, BasePage):
elif self._mode == 'd':
return super(SessionPage, self).ele(loc_or_ele, mode=mode, timeout=timeout)
def s_ele(self, loc_or_ele, mode='single', timeout=None):
if self._mode == 's':
return super().s_ele(loc_or_ele, mode=mode)
elif self._mode == 'd':
return super(SessionPage, self).s_ele(loc_or_ele, mode=mode, timeout=timeout)
def eles(self,
loc_or_str: Union[Tuple[str, str], str],
timeout: float = None) -> Union[List[DriverElement], List[SessionElement]]:

View File

@ -145,16 +145,7 @@ class SessionElement(DrissionElement):
:param timeout: 不起实际作用用于和父类对应
:return: SessionElement对象
"""
if isinstance(loc_or_str, (str, tuple)):
if isinstance(loc_or_str, str):
loc_or_str = str_to_loc(loc_or_str)
else:
if len(loc_or_str) != 2:
raise ValueError("Len of loc_or_str must be 2 when it's a tuple.")
loc_or_str = translate_loc(loc_or_str)
else:
raise ValueError('Argument loc_or_str can only be tuple or str.')
loc_or_str = str_to_loc(loc_or_str) if isinstance(loc_or_str, str) else translate_loc(loc_or_str)
element = self
loc_str = loc_or_str[1]
@ -168,7 +159,7 @@ class SessionElement(DrissionElement):
loc_or_str = loc_or_str[0], loc_str
return execute_session_find(element, loc_or_str, mode)
return make_session_ele(element, loc_or_str, mode)
def eles(self, loc_or_str: Union[Tuple[str, str], str], timeout=None):
"""返回当前元素下级所有符合条件的子元素、属性或节点文本 \n
@ -178,6 +169,9 @@ class SessionElement(DrissionElement):
"""
return self.ele(loc_or_str, mode='all')
def s_ele(self, loc_or_str: Union[Tuple[str, str], str], mode: str = None, timeout=None):
return self.ele(loc_or_str, mode=mode, timeout=timeout)
def _get_ele_path(self, mode) -> str:
"""获取css路径或xpath路径
:param mode: 'css' 'xpath'
@ -223,9 +217,9 @@ class SessionElement(DrissionElement):
return link
def execute_session_find(page_or_ele,
loc: Tuple[str, str],
mode: str = 'single', ) -> Union[SessionElement, List[SessionElement], str, None]:
def make_session_ele(page_or_ele,
loc: Union[str, Tuple[str, str]],
mode: str = 'single', ) -> Union[SessionElement, List[SessionElement], str, None]:
"""执行session模式元素的查找 \n
页面查找元素及元素查找下级元素皆使用此方法 \n
:param page_or_ele: SessionPage对象或SessionElement对象
@ -238,30 +232,44 @@ def execute_session_find(page_or_ele,
raise ValueError(f"Argument mode can only be 'single' or 'all', not '{mode}'.")
# 根据传入对象类型获取页面对象和lxml元素对象
if isinstance(page_or_ele, SessionElement):
type_str = str(type(page_or_ele))
if isinstance(page_or_ele, str): # 直接传入html文本
page = None
page_or_ele = fromstring(page_or_ele)
elif type_str.endswith("SessionElement'>"): # SessionElement
page = page_or_ele.page
page_or_ele = page_or_ele.inner_ele
else: # 传入的是SessionPage对象
elif "Page" in type_str: # MixPage, DriverPage 或 SessionPage
page = page_or_ele
page_or_ele = fromstring(sub(r' ?', ' ', page_or_ele.response.text))
page_or_ele = fromstring(page_or_ele.html)
else: # DrissionElement 或 ShadowRootElement
page = page_or_ele.page
page_or_ele = fromstring(page_or_ele.html)
# else: # 传入的是SessionPage对象
# page = page_or_ele
# page_or_ele = fromstring(sub(r' ?', ' ', page_or_ele.response.text))
# ---------------处理定位符---------------
if isinstance(loc, str):
loc = str_to_loc(loc)
elif isinstance(loc, tuple):
loc = translate_loc(loc)
else:
raise ValueError("定位符必须为str或长度为2的tuple。")
# ---------------执行搜索-----------------
try:
# 用lxml内置方法获取lxml的元素对象列表
if loc[0] == 'xpath':
if loc[0] == 'xpath': # 用lxml内置方法获取lxml的元素对象列表
ele = page_or_ele.xpath(loc[1])
# 用css selector获取元素对象列表
else:
else: # 用css selector获取元素对象列表
ele = page_or_ele.cssselect(loc[1])
# 结果不是列表,如数字
if not isinstance(ele, list):
if not isinstance(ele, list): # 结果不是列表,如数字
return ele
# 把lxml元素对象包装成SessionElement对象并按需要返回第一个或全部
if mode == 'single':
ele = ele[0] if ele else None
if isinstance(ele, HtmlElement):
return SessionElement(ele, page)
elif isinstance(ele, str):
@ -273,7 +281,6 @@ def execute_session_find(page_or_ele,
return [SessionElement(e, page) if isinstance(e, HtmlElement) else e for e in ele if e != '\n']
except Exception as e:
if 'Invalid expression' in str(e):
raise SyntaxError(f'Invalid xpath syntax. {loc}')
elif 'Expected selector' in str(e):

View File

@ -16,9 +16,9 @@ from requests import Session, Response
from tldextract import extract
from .base import BasePage
from .common import str_to_loc, translate_loc, get_available_file_name, format_html
from .common import get_available_file_name, format_html
from .config import _cookie_to_dict
from .session_element import SessionElement, execute_session_find
from .session_element import SessionElement, make_session_ele
class SessionPage(BasePage):
@ -102,41 +102,27 @@ class SessionPage(BasePage):
def ele(self,
loc_or_ele: Union[Tuple[str, str], str, SessionElement],
mode: str = None, timeout=None) -> Union[SessionElement, List[SessionElement], str, None]:
mode: str = None,
timeout=None) -> Union[SessionElement, List[SessionElement], str, None]:
"""返回页面中符合条件的元素、属性或节点文本,默认返回第一个 \n
:param loc_or_ele: 元素的定位信息可以是元素对象loc元组或查询字符串
:param mode: 'single' 'all对应查找一个或全部
:param timeout: 不起实际作用用于和父类对应
:return: SessionElement对象
"""
if isinstance(loc_or_ele, (str, tuple)):
if isinstance(loc_or_ele, str):
loc_or_ele = str_to_loc(loc_or_ele)
else:
if len(loc_or_ele) != 2:
raise ValueError("Len of loc_or_ele must be 2 when it's a tuple.")
loc_or_ele = translate_loc(loc_or_ele)
return loc_or_ele if isinstance(loc_or_ele, SessionElement) else make_session_ele(self, loc_or_ele, mode)
elif isinstance(loc_or_ele, SessionElement):
return loc_or_ele
else:
raise ValueError('Argument loc_or_str can only be tuple, str, SessionElement, Element.')
return execute_session_find(self, loc_or_ele, mode)
def eles(self,
loc_or_str: Union[Tuple[str, str], str], timeout=None) -> List[SessionElement]:
def eles(self, loc_or_str: Union[Tuple[str, str], str], timeout=None) -> List[SessionElement]:
"""返回页面中所有符合条件的元素、属性或节点文本 \n
:param loc_or_str: 元素的定位信息可以是loc元组或查询字符串
:param timeout: 不起实际作用用于和父类对应
:return: SessionElement对象组成的列表
"""
if not isinstance(loc_or_str, (tuple, str)):
raise TypeError('Type of loc_or_str can only be tuple or str.')
return super().eles(loc_or_str, timeout)
def s_ele(self, loc_or_str: Union[Tuple[str, str], str], mode: str = None, timeout=None):
return self.ele(loc_or_str, mode=mode, timeout=timeout)
def get_cookies(self, as_dict: bool = False, all_domains: bool = False) -> Union[dict, list]:
"""返回cookies \n
:param as_dict: 是否以字典方式返回

View File

@ -12,6 +12,7 @@ from selenium.webdriver.remote.webelement import WebElement
from .base import BaseElement
from .common import format_html
from .driver_element import execute_driver_find, DriverElement
from .session_element import make_session_ele
class ShadowRootElement(BaseElement):
@ -89,6 +90,9 @@ class ShadowRootElement(BaseElement):
elif loc_or_str[0] == 'text':
return self._find_eles_by_text(loc_or_str[1], loc_or_str[2], loc_or_str[3], mode)
def s_ele(self, loc_or_ele, mode='single', timeout=None):
return make_session_ele(self, loc_or_ele, mode)
def eles(self,
loc_or_str: Union[Tuple[str, str], str],
timeout: float = None) -> List[DriverElement]: