mirror of
https://gitee.com/g1879/DrissionPage.git
synced 2024-12-10 04:00:23 +08:00
translate_loc_to_xpath()改名translate_loc();
format_html改名str_to_loc(); 增加format_html()
This commit is contained in:
parent
7dc36efc7c
commit
78437f604e
@ -5,24 +5,25 @@
|
|||||||
@File : common.py
|
@File : common.py
|
||||||
"""
|
"""
|
||||||
from abc import abstractmethod
|
from abc import abstractmethod
|
||||||
|
from html import unescape
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from re import split as re_SPLIT
|
from re import split as re_SPLIT
|
||||||
from shutil import rmtree
|
from shutil import rmtree
|
||||||
from typing import Union
|
from typing import Union
|
||||||
|
|
||||||
from lxml.etree import _Element
|
from lxml.html import HtmlElement
|
||||||
from selenium.webdriver.remote.webelement import WebElement
|
from selenium.webdriver.remote.webelement import WebElement
|
||||||
|
|
||||||
|
|
||||||
class DrissionElement(object):
|
class DrissionElement(object):
|
||||||
"""SessionElement和DriverElement的基类"""
|
"""SessionElement和DriverElement的基类"""
|
||||||
|
|
||||||
def __init__(self, ele: Union[WebElement, _Element], page=None):
|
def __init__(self, ele: Union[WebElement, HtmlElement], page=None):
|
||||||
self._inner_ele = ele
|
self._inner_ele = ele
|
||||||
self.page = page
|
self.page = page
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def inner_ele(self) -> Union[WebElement, _Element]:
|
def inner_ele(self) -> Union[WebElement, HtmlElement]:
|
||||||
return self._inner_ele
|
return self._inner_ele
|
||||||
|
|
||||||
@property
|
@property
|
||||||
@ -74,7 +75,7 @@ class DrissionElement(object):
|
|||||||
# pass
|
# pass
|
||||||
|
|
||||||
|
|
||||||
def get_loc_from_str(loc: str) -> tuple:
|
def str_to_loc(loc: str) -> tuple:
|
||||||
"""处理元素查找语句 \n
|
"""处理元素查找语句 \n
|
||||||
查找方式:属性、tag name及属性、文本、xpath、css selector \n
|
查找方式:属性、tag name及属性、文本、xpath、css selector \n
|
||||||
=表示精确匹配,:表示模糊匹配,无控制字符串时默认搜索该字符串 \n
|
=表示精确匹配,:表示模糊匹配,无控制字符串时默认搜索该字符串 \n
|
||||||
@ -182,7 +183,11 @@ def _make_search_str(search_str: str) -> str:
|
|||||||
return search_str
|
return search_str
|
||||||
|
|
||||||
|
|
||||||
def translate_loc_to_xpath(loc: tuple) -> tuple:
|
def format_html(text: str) -> str:
|
||||||
|
return unescape(text).replace('\xa0', ' ')
|
||||||
|
|
||||||
|
|
||||||
|
def translate_loc(loc: tuple) -> tuple:
|
||||||
"""把By类型的loc元组转换为css selector或xpath类型的 \n
|
"""把By类型的loc元组转换为css selector或xpath类型的 \n
|
||||||
:param loc: By类型的loc元组
|
:param loc: By类型的loc元组
|
||||||
:return: css selector或xpath类型的loc元组
|
:return: css selector或xpath类型的loc元组
|
||||||
|
@ -14,7 +14,7 @@ from selenium.common.exceptions import NoAlertPresentException
|
|||||||
from selenium.webdriver.chrome.webdriver import WebDriver
|
from selenium.webdriver.chrome.webdriver import WebDriver
|
||||||
from selenium.webdriver.remote.webelement import WebElement
|
from selenium.webdriver.remote.webelement import WebElement
|
||||||
|
|
||||||
from .common import get_loc_from_str, get_available_file_name, translate_loc_to_xpath
|
from .common import str_to_loc, get_available_file_name, translate_loc, format_html
|
||||||
from .driver_element import DriverElement, execute_driver_find
|
from .driver_element import DriverElement, execute_driver_find
|
||||||
|
|
||||||
|
|
||||||
@ -43,7 +43,7 @@ class DriverPage(object):
|
|||||||
@property
|
@property
|
||||||
def html(self) -> str:
|
def html(self) -> str:
|
||||||
"""返回页面html文本"""
|
"""返回页面html文本"""
|
||||||
return self.driver.find_element_by_xpath("//*").get_attribute("outerHTML")
|
return format_html(self.driver.find_element_by_xpath("//*").get_attribute("outerHTML"))
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def url_available(self) -> bool:
|
def url_available(self) -> bool:
|
||||||
@ -139,11 +139,11 @@ class DriverPage(object):
|
|||||||
# 接收到字符串或元组,获取定位loc元组
|
# 接收到字符串或元组,获取定位loc元组
|
||||||
if isinstance(loc_or_ele, (str, tuple)):
|
if isinstance(loc_or_ele, (str, tuple)):
|
||||||
if isinstance(loc_or_ele, str):
|
if isinstance(loc_or_ele, str):
|
||||||
loc_or_ele = get_loc_from_str(loc_or_ele)
|
loc_or_ele = str_to_loc(loc_or_ele)
|
||||||
else:
|
else:
|
||||||
if len(loc_or_ele) != 2:
|
if len(loc_or_ele) != 2:
|
||||||
raise ValueError("Len of loc_or_ele must be 2 when it's a tuple.")
|
raise ValueError("Len of loc_or_ele must be 2 when it's a tuple.")
|
||||||
loc_or_ele = translate_loc_to_xpath(loc_or_ele)
|
loc_or_ele = translate_loc(loc_or_ele)
|
||||||
|
|
||||||
if loc_or_ele[0] == 'xpath' and not loc_or_ele[1].startswith(('/', '(')):
|
if loc_or_ele[0] == 'xpath' and not loc_or_ele[1].startswith(('/', '(')):
|
||||||
loc_or_ele = loc_or_ele[0], f'//{loc_or_ele[1]}'
|
loc_or_ele = loc_or_ele[0], f'//{loc_or_ele[1]}'
|
||||||
@ -219,7 +219,7 @@ class DriverPage(object):
|
|||||||
elif isinstance(loc_or_ele, WebElement):
|
elif isinstance(loc_or_ele, WebElement):
|
||||||
is_ele = True
|
is_ele = True
|
||||||
elif isinstance(loc_or_ele, str):
|
elif isinstance(loc_or_ele, str):
|
||||||
loc_or_ele = get_loc_from_str(loc_or_ele)
|
loc_or_ele = str_to_loc(loc_or_ele)
|
||||||
elif isinstance(loc_or_ele, tuple):
|
elif isinstance(loc_or_ele, tuple):
|
||||||
pass
|
pass
|
||||||
else:
|
else:
|
||||||
|
@ -16,7 +16,7 @@ from urllib.parse import urlparse, quote, unquote
|
|||||||
|
|
||||||
from requests import Session, Response
|
from requests import Session, Response
|
||||||
|
|
||||||
from .common import get_loc_from_str, translate_loc_to_xpath, get_available_file_name
|
from .common import str_to_loc, translate_loc, get_available_file_name, format_html
|
||||||
from .config import OptionsManager
|
from .config import OptionsManager
|
||||||
from .session_element import SessionElement, execute_session_find
|
from .session_element import SessionElement, execute_session_find
|
||||||
|
|
||||||
@ -65,7 +65,7 @@ class SessionPage(object):
|
|||||||
@property
|
@property
|
||||||
def html(self) -> str:
|
def html(self) -> str:
|
||||||
"""返回页面html文本"""
|
"""返回页面html文本"""
|
||||||
return self.response.text
|
return format_html(self.response.text)
|
||||||
|
|
||||||
def ele(self,
|
def ele(self,
|
||||||
loc_or_ele: Union[Tuple[str, str], str, SessionElement],
|
loc_or_ele: Union[Tuple[str, str], str, SessionElement],
|
||||||
@ -98,11 +98,12 @@ class SessionPage(object):
|
|||||||
"""
|
"""
|
||||||
if isinstance(loc_or_ele, (str, tuple)):
|
if isinstance(loc_or_ele, (str, tuple)):
|
||||||
if isinstance(loc_or_ele, str):
|
if isinstance(loc_or_ele, str):
|
||||||
loc_or_ele = get_loc_from_str(loc_or_ele)
|
loc_or_ele = str_to_loc(loc_or_ele)
|
||||||
else:
|
else:
|
||||||
if len(loc_or_ele) != 2:
|
if len(loc_or_ele) != 2:
|
||||||
raise ValueError("Len of loc_or_ele must be 2 when it's a tuple.")
|
raise ValueError("Len of loc_or_ele must be 2 when it's a tuple.")
|
||||||
loc_or_ele = translate_loc_to_xpath(loc_or_ele)
|
loc_or_ele = translate_loc(loc_or_ele)
|
||||||
|
|
||||||
if loc_or_ele[0] == 'xpath' and not loc_or_ele[1].startswith(('/', '(')):
|
if loc_or_ele[0] == 'xpath' and not loc_or_ele[1].startswith(('/', '(')):
|
||||||
loc_or_ele = loc_or_ele[0], f'//{loc_or_ele[1]}'
|
loc_or_ele = loc_or_ele[0], f'//{loc_or_ele[1]}'
|
||||||
|
|
||||||
|
@ -1,12 +1,11 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
# -*- coding:utf-8 -*-
|
# -*- coding:utf-8 -*-
|
||||||
from html import unescape
|
|
||||||
from re import split as re_SPLIT
|
from re import split as re_SPLIT
|
||||||
from typing import Union, Any
|
from typing import Union, Any
|
||||||
|
|
||||||
from selenium.webdriver.remote.webelement import WebElement
|
from selenium.webdriver.remote.webelement import WebElement
|
||||||
|
|
||||||
from .common import DrissionElement
|
from .common import DrissionElement, format_html
|
||||||
from .driver_element import execute_driver_find
|
from .driver_element import execute_driver_find
|
||||||
|
|
||||||
|
|
||||||
@ -31,7 +30,7 @@ class ShadowRootElement(DrissionElement):
|
|||||||
|
|
||||||
@property
|
@property
|
||||||
def html(self):
|
def html(self):
|
||||||
return unescape(self.inner_ele.get_attribute('innerHTML')).replace('\xa0', ' ')
|
return format_html(self.inner_ele.get_attribute('innerHTML'))
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def parent(self):
|
def parent(self):
|
||||||
@ -87,7 +86,7 @@ class ShadowRootElement(DrissionElement):
|
|||||||
:return: DriverElement对象
|
:return: DriverElement对象
|
||||||
"""
|
"""
|
||||||
if isinstance(loc_or_str, str):
|
if isinstance(loc_or_str, str):
|
||||||
loc_or_str = get_css_from_str(loc_or_str)
|
loc_or_str = str_to_css_loc(loc_or_str)
|
||||||
elif isinstance(loc_or_str, tuple) and len(loc_or_str) == 2:
|
elif isinstance(loc_or_str, tuple) and len(loc_or_str) == 2:
|
||||||
if loc_or_str[0] == 'xpath':
|
if loc_or_str[0] == 'xpath':
|
||||||
raise ValueError('不支持xpath')
|
raise ValueError('不支持xpath')
|
||||||
@ -189,7 +188,7 @@ class ShadowRootElement(DrissionElement):
|
|||||||
return None if mode == 'single' else results
|
return None if mode == 'single' else results
|
||||||
|
|
||||||
|
|
||||||
def get_css_from_str(loc: str) -> tuple:
|
def str_to_css_loc(loc: str) -> tuple:
|
||||||
"""处理元素查找语句 \n
|
"""处理元素查找语句 \n
|
||||||
查找方式:属性、tag name及属性、文本、css selector \n
|
查找方式:属性、tag name及属性、文本、css selector \n
|
||||||
=表示精确匹配,:表示模糊匹配,无控制字符串时默认搜索该字符串 \n
|
=表示精确匹配,:表示模糊匹配,无控制字符串时默认搜索该字符串 \n
|
||||||
@ -212,6 +211,7 @@ def get_css_from_str(loc: str) -> tuple:
|
|||||||
# 根据属性查找
|
# 根据属性查找
|
||||||
if loc.startswith('@'):
|
if loc.startswith('@'):
|
||||||
r = re_SPLIT(r'([:=])', loc[1:], maxsplit=1)
|
r = re_SPLIT(r'([:=])', loc[1:], maxsplit=1)
|
||||||
|
|
||||||
if len(r) == 3:
|
if len(r) == 3:
|
||||||
mode = '=' if r[1] == '=' else '*='
|
mode = '=' if r[1] == '=' else '*='
|
||||||
loc_str = f'*[{r[0]}{mode}{r[2]}]'
|
loc_str = f'*[{r[0]}{mode}{r[2]}]'
|
||||||
@ -225,6 +225,7 @@ def get_css_from_str(loc: str) -> tuple:
|
|||||||
else:
|
else:
|
||||||
at_lst = loc[4:].split('@', maxsplit=1)
|
at_lst = loc[4:].split('@', maxsplit=1)
|
||||||
r = re_SPLIT(r'([:=])', at_lst[1], maxsplit=1)
|
r = re_SPLIT(r'([:=])', at_lst[1], maxsplit=1)
|
||||||
|
|
||||||
if len(r) == 3:
|
if len(r) == 3:
|
||||||
if r[0] == 'text()':
|
if r[0] == 'text()':
|
||||||
match = 'exact' if r[1] == '=' else 'fuzzy'
|
match = 'exact' if r[1] == '=' else 'fuzzy'
|
||||||
|
Loading…
x
Reference in New Issue
Block a user