translate_loc_to_xpath()改名translate_loc();

format_html改名str_to_loc();
增加format_html()
This commit is contained in:
g1879 2020-11-12 18:04:02 +08:00
parent 7dc36efc7c
commit 78437f604e
4 changed files with 26 additions and 19 deletions

View File

@ -5,24 +5,25 @@
@File : common.py @File : common.py
""" """
from abc import abstractmethod from abc import abstractmethod
from html import unescape
from pathlib import Path from pathlib import Path
from re import split as re_SPLIT from re import split as re_SPLIT
from shutil import rmtree from shutil import rmtree
from typing import Union from typing import Union
from lxml.etree import _Element from lxml.html import HtmlElement
from selenium.webdriver.remote.webelement import WebElement from selenium.webdriver.remote.webelement import WebElement
class DrissionElement(object): class DrissionElement(object):
"""SessionElement和DriverElement的基类""" """SessionElement和DriverElement的基类"""
def __init__(self, ele: Union[WebElement, _Element], page=None): def __init__(self, ele: Union[WebElement, HtmlElement], page=None):
self._inner_ele = ele self._inner_ele = ele
self.page = page self.page = page
@property @property
def inner_ele(self) -> Union[WebElement, _Element]: def inner_ele(self) -> Union[WebElement, HtmlElement]:
return self._inner_ele return self._inner_ele
@property @property
@ -74,7 +75,7 @@ class DrissionElement(object):
# pass # pass
def get_loc_from_str(loc: str) -> tuple: def str_to_loc(loc: str) -> tuple:
"""处理元素查找语句 \n """处理元素查找语句 \n
查找方式属性tag name及属性文本xpathcss selector \n 查找方式属性tag name及属性文本xpathcss selector \n
=表示精确匹配:表示模糊匹配无控制字符串时默认搜索该字符串 \n =表示精确匹配:表示模糊匹配无控制字符串时默认搜索该字符串 \n
@ -182,7 +183,11 @@ def _make_search_str(search_str: str) -> str:
return search_str return search_str
def translate_loc_to_xpath(loc: tuple) -> tuple: def format_html(text: str) -> str:
return unescape(text).replace('\xa0', ' ')
def translate_loc(loc: tuple) -> tuple:
"""把By类型的loc元组转换为css selector或xpath类型的 \n """把By类型的loc元组转换为css selector或xpath类型的 \n
:param loc: By类型的loc元组 :param loc: By类型的loc元组
:return: css selector或xpath类型的loc元组 :return: css selector或xpath类型的loc元组

View File

@ -14,7 +14,7 @@ from selenium.common.exceptions import NoAlertPresentException
from selenium.webdriver.chrome.webdriver import WebDriver from selenium.webdriver.chrome.webdriver import WebDriver
from selenium.webdriver.remote.webelement import WebElement from selenium.webdriver.remote.webelement import WebElement
from .common import get_loc_from_str, get_available_file_name, translate_loc_to_xpath from .common import str_to_loc, get_available_file_name, translate_loc, format_html
from .driver_element import DriverElement, execute_driver_find from .driver_element import DriverElement, execute_driver_find
@ -43,7 +43,7 @@ class DriverPage(object):
@property @property
def html(self) -> str: def html(self) -> str:
"""返回页面html文本""" """返回页面html文本"""
return self.driver.find_element_by_xpath("//*").get_attribute("outerHTML") return format_html(self.driver.find_element_by_xpath("//*").get_attribute("outerHTML"))
@property @property
def url_available(self) -> bool: def url_available(self) -> bool:
@ -139,11 +139,11 @@ class DriverPage(object):
# 接收到字符串或元组获取定位loc元组 # 接收到字符串或元组获取定位loc元组
if isinstance(loc_or_ele, (str, tuple)): if isinstance(loc_or_ele, (str, tuple)):
if isinstance(loc_or_ele, str): if isinstance(loc_or_ele, str):
loc_or_ele = get_loc_from_str(loc_or_ele) loc_or_ele = str_to_loc(loc_or_ele)
else: else:
if len(loc_or_ele) != 2: if len(loc_or_ele) != 2:
raise ValueError("Len of loc_or_ele must be 2 when it's a tuple.") raise ValueError("Len of loc_or_ele must be 2 when it's a tuple.")
loc_or_ele = translate_loc_to_xpath(loc_or_ele) loc_or_ele = translate_loc(loc_or_ele)
if loc_or_ele[0] == 'xpath' and not loc_or_ele[1].startswith(('/', '(')): if loc_or_ele[0] == 'xpath' and not loc_or_ele[1].startswith(('/', '(')):
loc_or_ele = loc_or_ele[0], f'//{loc_or_ele[1]}' loc_or_ele = loc_or_ele[0], f'//{loc_or_ele[1]}'
@ -219,7 +219,7 @@ class DriverPage(object):
elif isinstance(loc_or_ele, WebElement): elif isinstance(loc_or_ele, WebElement):
is_ele = True is_ele = True
elif isinstance(loc_or_ele, str): elif isinstance(loc_or_ele, str):
loc_or_ele = get_loc_from_str(loc_or_ele) loc_or_ele = str_to_loc(loc_or_ele)
elif isinstance(loc_or_ele, tuple): elif isinstance(loc_or_ele, tuple):
pass pass
else: else:

View File

@ -16,7 +16,7 @@ from urllib.parse import urlparse, quote, unquote
from requests import Session, Response from requests import Session, Response
from .common import get_loc_from_str, translate_loc_to_xpath, get_available_file_name from .common import str_to_loc, translate_loc, get_available_file_name, format_html
from .config import OptionsManager from .config import OptionsManager
from .session_element import SessionElement, execute_session_find from .session_element import SessionElement, execute_session_find
@ -65,7 +65,7 @@ class SessionPage(object):
@property @property
def html(self) -> str: def html(self) -> str:
"""返回页面html文本""" """返回页面html文本"""
return self.response.text return format_html(self.response.text)
def ele(self, def ele(self,
loc_or_ele: Union[Tuple[str, str], str, SessionElement], loc_or_ele: Union[Tuple[str, str], str, SessionElement],
@ -98,11 +98,12 @@ class SessionPage(object):
""" """
if isinstance(loc_or_ele, (str, tuple)): if isinstance(loc_or_ele, (str, tuple)):
if isinstance(loc_or_ele, str): if isinstance(loc_or_ele, str):
loc_or_ele = get_loc_from_str(loc_or_ele) loc_or_ele = str_to_loc(loc_or_ele)
else: else:
if len(loc_or_ele) != 2: if len(loc_or_ele) != 2:
raise ValueError("Len of loc_or_ele must be 2 when it's a tuple.") raise ValueError("Len of loc_or_ele must be 2 when it's a tuple.")
loc_or_ele = translate_loc_to_xpath(loc_or_ele) loc_or_ele = translate_loc(loc_or_ele)
if loc_or_ele[0] == 'xpath' and not loc_or_ele[1].startswith(('/', '(')): if loc_or_ele[0] == 'xpath' and not loc_or_ele[1].startswith(('/', '(')):
loc_or_ele = loc_or_ele[0], f'//{loc_or_ele[1]}' loc_or_ele = loc_or_ele[0], f'//{loc_or_ele[1]}'

View File

@ -1,12 +1,11 @@
#!/usr/bin/env python #!/usr/bin/env python
# -*- coding:utf-8 -*- # -*- coding:utf-8 -*-
from html import unescape
from re import split as re_SPLIT from re import split as re_SPLIT
from typing import Union, Any from typing import Union, Any
from selenium.webdriver.remote.webelement import WebElement from selenium.webdriver.remote.webelement import WebElement
from .common import DrissionElement from .common import DrissionElement, format_html
from .driver_element import execute_driver_find from .driver_element import execute_driver_find
@ -31,7 +30,7 @@ class ShadowRootElement(DrissionElement):
@property @property
def html(self): def html(self):
return unescape(self.inner_ele.get_attribute('innerHTML')).replace('\xa0', ' ') return format_html(self.inner_ele.get_attribute('innerHTML'))
@property @property
def parent(self): def parent(self):
@ -87,7 +86,7 @@ class ShadowRootElement(DrissionElement):
:return: DriverElement对象 :return: DriverElement对象
""" """
if isinstance(loc_or_str, str): if isinstance(loc_or_str, str):
loc_or_str = get_css_from_str(loc_or_str) loc_or_str = str_to_css_loc(loc_or_str)
elif isinstance(loc_or_str, tuple) and len(loc_or_str) == 2: elif isinstance(loc_or_str, tuple) and len(loc_or_str) == 2:
if loc_or_str[0] == 'xpath': if loc_or_str[0] == 'xpath':
raise ValueError('不支持xpath') raise ValueError('不支持xpath')
@ -189,7 +188,7 @@ class ShadowRootElement(DrissionElement):
return None if mode == 'single' else results return None if mode == 'single' else results
def get_css_from_str(loc: str) -> tuple: def str_to_css_loc(loc: str) -> tuple:
"""处理元素查找语句 \n """处理元素查找语句 \n
查找方式属性tag name及属性文本css selector \n 查找方式属性tag name及属性文本css selector \n
=表示精确匹配:表示模糊匹配无控制字符串时默认搜索该字符串 \n =表示精确匹配:表示模糊匹配无控制字符串时默认搜索该字符串 \n
@ -212,6 +211,7 @@ def get_css_from_str(loc: str) -> tuple:
# 根据属性查找 # 根据属性查找
if loc.startswith('@'): if loc.startswith('@'):
r = re_SPLIT(r'([:=])', loc[1:], maxsplit=1) r = re_SPLIT(r'([:=])', loc[1:], maxsplit=1)
if len(r) == 3: if len(r) == 3:
mode = '=' if r[1] == '=' else '*=' mode = '=' if r[1] == '=' else '*='
loc_str = f'*[{r[0]}{mode}{r[2]}]' loc_str = f'*[{r[0]}{mode}{r[2]}]'
@ -225,6 +225,7 @@ def get_css_from_str(loc: str) -> tuple:
else: else:
at_lst = loc[4:].split('@', maxsplit=1) at_lst = loc[4:].split('@', maxsplit=1)
r = re_SPLIT(r'([:=])', at_lst[1], maxsplit=1) r = re_SPLIT(r'([:=])', at_lst[1], maxsplit=1)
if len(r) == 3: if len(r) == 3:
if r[0] == 'text()': if r[0] == 'text()':
match = 'exact' if r[1] == '=' else 'fuzzy' match = 'exact' if r[1] == '=' else 'fuzzy'