translate_loc_to_xpath()改名translate_loc()；

format_html改名str_to_loc()；增加format_html()
2024-12-10 04:00:23 +08:00 · 2020-11-12 18:04:02 +08:00 · 2020-11-12 18:04:02 +08:00 · 78437f604e
commit 78437f604e
parent 7dc36efc7c
4 changed files with 26 additions and 19 deletions
--- a/DrissionPage/common.py
+++ b/DrissionPage/common.py
@ -5,24 +5,25 @@
@File    :   common.py
 """
 from abc import abstractmethod
+from html import unescape
 from pathlib import Path
 from re import split as re_SPLIT
 from shutil import rmtree
 from typing import Union

-from lxml.etree import _Element
+from lxml.html import HtmlElement
 from selenium.webdriver.remote.webelement import WebElement


 class DrissionElement(object):
    """SessionElement和DriverElement的基类"""

-    def __init__(self, ele: Union[WebElement, _Element], page=None):
+    def __init__(self, ele: Union[WebElement, HtmlElement], page=None):
        self._inner_ele = ele
        self.page = page

    @property
-    def inner_ele(self) -> Union[WebElement, _Element]:
+    def inner_ele(self) -> Union[WebElement, HtmlElement]:
        return self._inner_ele

    @property
@ -74,7 +75,7 @@ class DrissionElement(object):
    #     pass


-def get_loc_from_str(loc: str) -> tuple:
+def str_to_loc(loc: str) -> tuple:
    """处理元素查找语句                                                \n
    查找方式：属性、tag name及属性、文本、xpath、css selector            \n
    =表示精确匹配，:表示模糊匹配，无控制字符串时默认搜索该字符串             \n
@ -182,7 +183,11 @@ def _make_search_str(search_str: str) -> str:
    return search_str


-def translate_loc_to_xpath(loc: tuple) -> tuple:
+def format_html(text: str) -> str:
+    return unescape(text).replace('\xa0', ' ')
+
+
+def translate_loc(loc: tuple) -> tuple:
    """把By类型的loc元组转换为css selector或xpath类型的  \n
    :param loc: By类型的loc元组
    :return: css selector或xpath类型的loc元组
--- a/DrissionPage/driver_page.py
+++ b/DrissionPage/driver_page.py
@ -14,7 +14,7 @@ from selenium.common.exceptions import NoAlertPresentException
 from selenium.webdriver.chrome.webdriver import WebDriver
 from selenium.webdriver.remote.webelement import WebElement

-from .common import get_loc_from_str, get_available_file_name, translate_loc_to_xpath
+from .common import str_to_loc, get_available_file_name, translate_loc, format_html
 from .driver_element import DriverElement, execute_driver_find


@ -43,7 +43,7 @@ class DriverPage(object):
    @property
    def html(self) -> str:
        """返回页面html文本"""
-        return self.driver.find_element_by_xpath("//*").get_attribute("outerHTML")
+        return format_html(self.driver.find_element_by_xpath("//*").get_attribute("outerHTML"))

    @property
    def url_available(self) -> bool:
@ -139,11 +139,11 @@ class DriverPage(object):
        # 接收到字符串或元组，获取定位loc元组
        if isinstance(loc_or_ele, (str, tuple)):
            if isinstance(loc_or_ele, str):
-                loc_or_ele = get_loc_from_str(loc_or_ele)
+                loc_or_ele = str_to_loc(loc_or_ele)
            else:
                if len(loc_or_ele) != 2:
                    raise ValueError("Len of loc_or_ele must be 2 when it's a tuple.")
-                loc_or_ele = translate_loc_to_xpath(loc_or_ele)
+                loc_or_ele = translate_loc(loc_or_ele)

            if loc_or_ele[0] == 'xpath' and not loc_or_ele[1].startswith(('/', '(')):
                loc_or_ele = loc_or_ele[0], f'//{loc_or_ele[1]}'
@ -219,7 +219,7 @@ class DriverPage(object):
        elif isinstance(loc_or_ele, WebElement):
            is_ele = True
        elif isinstance(loc_or_ele, str):
-            loc_or_ele = get_loc_from_str(loc_or_ele)
+            loc_or_ele = str_to_loc(loc_or_ele)
        elif isinstance(loc_or_ele, tuple):
            pass
        else:
--- a/DrissionPage/session_page.py
+++ b/DrissionPage/session_page.py
@ -16,7 +16,7 @@ from urllib.parse import urlparse, quote, unquote

 from requests import Session, Response

-from .common import get_loc_from_str, translate_loc_to_xpath, get_available_file_name
+from .common import str_to_loc, translate_loc, get_available_file_name, format_html
 from .config import OptionsManager
 from .session_element import SessionElement, execute_session_find

@ -65,7 +65,7 @@ class SessionPage(object):
    @property
    def html(self) -> str:
        """返回页面html文本"""
-        return self.response.text
+        return format_html(self.response.text)

    def ele(self,
            loc_or_ele: Union[Tuple[str, str], str, SessionElement],
@ -98,11 +98,12 @@ class SessionPage(object):
        """
        if isinstance(loc_or_ele, (str, tuple)):
            if isinstance(loc_or_ele, str):
-                loc_or_ele = get_loc_from_str(loc_or_ele)
+                loc_or_ele = str_to_loc(loc_or_ele)
            else:
                if len(loc_or_ele) != 2:
                    raise ValueError("Len of loc_or_ele must be 2 when it's a tuple.")
-                loc_or_ele = translate_loc_to_xpath(loc_or_ele)
+                loc_or_ele = translate_loc(loc_or_ele)
+
            if loc_or_ele[0] == 'xpath' and not loc_or_ele[1].startswith(('/', '(')):
                loc_or_ele = loc_or_ele[0], f'//{loc_or_ele[1]}'

--- a/DrissionPage/shadow_root_element.py
+++ b/DrissionPage/shadow_root_element.py
@ -1,12 +1,11 @@
 #!/usr/bin/env python
 # -*- coding:utf-8 -*-
-from html import unescape
 from re import split as re_SPLIT
 from typing import Union, Any

 from selenium.webdriver.remote.webelement import WebElement

-from .common import DrissionElement
+from .common import DrissionElement, format_html
 from .driver_element import execute_driver_find


@ -31,7 +30,7 @@ class ShadowRootElement(DrissionElement):

    @property
    def html(self):
-        return unescape(self.inner_ele.get_attribute('innerHTML')).replace('\xa0', ' ')
+        return format_html(self.inner_ele.get_attribute('innerHTML'))

    @property
    def parent(self):
@ -87,7 +86,7 @@ class ShadowRootElement(DrissionElement):
        :return: DriverElement对象
        """
        if isinstance(loc_or_str, str):
-            loc_or_str = get_css_from_str(loc_or_str)
+            loc_or_str = str_to_css_loc(loc_or_str)
        elif isinstance(loc_or_str, tuple) and len(loc_or_str) == 2:
            if loc_or_str[0] == 'xpath':
                raise ValueError('不支持xpath')
@ -189,7 +188,7 @@ class ShadowRootElement(DrissionElement):
        return None if mode == 'single' else results


-def get_css_from_str(loc: str) -> tuple:
+def str_to_css_loc(loc: str) -> tuple:
    """处理元素查找语句                                                \n
    查找方式：属性、tag name及属性、文本、css selector                   \n
    =表示精确匹配，:表示模糊匹配，无控制字符串时默认搜索该字符串             \n
@ -212,6 +211,7 @@ def get_css_from_str(loc: str) -> tuple:
    # 根据属性查找
    if loc.startswith('@'):
        r = re_SPLIT(r'([:=])', loc[1:], maxsplit=1)
+
        if len(r) == 3:
            mode = '=' if r[1] == '=' else '*='
            loc_str = f'*[{r[0]}{mode}{r[2]}]'
@ -225,6 +225,7 @@ def get_css_from_str(loc: str) -> tuple:
        else:
            at_lst = loc[4:].split('@', maxsplit=1)
            r = re_SPLIT(r'([:=])', at_lst[1], maxsplit=1)
+
            if len(r) == 3:
                if r[0] == 'text()':
                    match = 'exact' if r[1] == '=' else 'fuzzy'