优化定位语法

2024-12-10 04:00:23 +08:00 · 2021-12-07 23:11:03 +08:00 · 2021-12-07 23:11:03 +08:00 · ef9a48adf5
commit ef9a48adf5
parent 1fa392802d
1 changed files with 33 additions and 69 deletions
--- a/DrissionPage/common.py
+++ b/DrissionPage/common.py
@ -6,7 +6,7 @@
 """
 from html import unescape
 from pathlib import Path
-from re import split, search, sub, findall
+from re import split, search, sub
 from shutil import rmtree
 from typing import Union
 from zipfile import ZipFile
@ -106,14 +106,11 @@ def str_to_loc(loc: str) -> tuple:
        loc = f'text{loc[2:]}'

    # 根据属性查找
-    if loc.startswith('@'):
+    if loc.startswith('@@'):
+        loc_str = _make_xpath_str('*', loc)  # TODO: 改成多属性
+
+    elif loc.startswith('@'):
        loc_str = _make_xpath_str('*', loc)
-        # r = split(r'([:=])', loc[1:], maxsplit=1)
-        # if len(r) == 3:
-        #     mode = 'exact' if r[1] == '=' else 'fuzzy'
-        #     loc_str = _make_xpath_str('*', f'@{r[0]}', r[2], mode)
-        # else:
-        #     loc_str = f'//*[@{loc[1:]}]'

    # 根据tag name查找
    elif loc.startswith(('tag:', 'tag=')):
@ -121,25 +118,16 @@ def str_to_loc(loc: str) -> tuple:
        if at_ind == -1:
            loc_str = f'//*[name()="{loc[4:]}"]'
        else:
-            loc_str = _make_xpath_str(loc[4:at_ind], loc[at_ind:])
-        # if '@' not in loc[4:]:
-        #     loc_str = f'//*[name()="{loc[4:]}"]'
-        # else:
-        #     at_lst = loc[4:].split('@', maxsplit=1)
-        #     r = split(r'([:=])', at_lst[1], maxsplit=1)
-        #     if len(r) == 3:
-        #         mode = 'exact' if r[1] == '=' else 'fuzzy'
-        #         arg_str = 'text()' if r[0] in ('text()', 'tx()') else f'@{r[0]}'
-        #         loc_str = _make_xpath_str(at_lst[0], arg_str, r[2], mode)
-        #     else:
-        #         loc_str = f'//*[name()="{at_lst[0]}" and @{r[0]}]'
+            if loc[4:].startswith('@@'):  # TODO: 改成多属性
+                loc_str = _make_xpath_str(loc[4:at_ind], loc[at_ind:])
+            else:
+                loc_str = _make_xpath_str(loc[4:at_ind], loc[at_ind:])

    # 根据文本查找
-    elif loc.startswith(('text:', 'text=')):
-        if len(loc) > 5:
-            loc_str = _make_xpath_str('*', f'@text(){loc[4:]}')
-        else:
-            loc_str = '//*[not(text())]'
+    elif loc.startswith('text:'):
+        loc_str = f'//*[text()={_make_search_str(loc[5:])}]'
+    elif loc.startswith('text='):
+        loc_str = f'//*/text()[contains(., {_make_search_str(loc[5:])})]/..'

    # 用xpath查找
    elif loc.startswith(('xpath:', 'xpath=')):
@ -151,63 +139,39 @@ def str_to_loc(loc: str) -> tuple:
    elif loc.startswith(('css:', 'css=')):
        loc_by = 'css selector'
        loc_str = loc[4:]
-
    elif loc.startswith(('c:', 'c=')):
        loc_by = 'css selector'
        loc_str = loc[2:]

    # 根据文本模糊查找
+    elif loc:
+        loc_str = f'//*/text()[contains(., {_make_search_str(loc)})]/..'
    else:
-        if loc:
-            loc_str = _make_xpath_str('*', f'@text():{loc}')
-        else:
-            loc_str = '//*[not(text())]'
+        loc_str = '//*[not(text())]'

    return loc_by, loc_str


 def _make_xpath_str(tag: str, text: str) -> str:
+    """生成xpath语句                                          \n
+    :param tag: 标签名
+    :param text: 待处理的字符串
+    :return: xpath字符串
+    """
    tag_name = '' if tag == '*' else f'name()="{tag}" and '
-    r = findall(r'@([^@]*)', text)
-    res_list = []
-    for i in r:
-        r = split(r'([:=])', i, maxsplit=1)
-        if len(r) == 3:
-            arg_str = 'text()' if r[0] in ('text()', 'tx()') else f'@{r[0]}'
-            if r[1] == '=':
-                loc_str = f'{arg_str}={_make_search_str(r[2])}'
+    r = split(r'([:=])', text, maxsplit=1)
+    if len(r) != 3:
+        return f'//*[{tag_name}{r[0]}]'
+    else:
+        if r[1] == '=':  # 精确查找
+            arg = 'text()' if r[0] in ('@text()', '@tx()') else r[0]
+            return f'//*[{tag_name}{arg}={_make_search_str(r[2])}]'
+        else:  # 模糊查找
+            if r[0] in ('@text()', '@tx()'):
+                tag_name = '' if tag == '*' else f'{tag}/'
+                return f'//{tag_name}text()[contains(., {_make_search_str(r[2])})]/..'
            else:
-                loc_str = f'contains({arg_str}, {_make_search_str(r[2])})'
-        else:
-            loc_str = f'@{i}'
-        res_list.append(loc_str)
-
-    s = ' and '.join(res_list)
-    return f"//*[{tag_name}{s}]"
-
-
-# def _make_xpath_str(tag: str, arg: str, val: str, mode: str = 'fuzzy') -> str:
-#     """生成xpath语句                                          \n
-#     :param tag: 标签名
-#     :param arg: 属性名
-#     :param val: 属性值
-#     :param mode: 'exact' 或 'fuzzy'，对应精确或模糊查找
-#     :return: xpath字符串
-#     """
-#     tag_name = '' if tag == '*' else f'name()="{tag}" and '
-#
-#     if mode == 'exact':
-#         return f'//*[{tag_name}{arg}={_make_search_str(val)}]'
-#
-#     elif mode == 'fuzzy':
-#         if arg == 'text()':
-#             tag_name = '' if tag == '*' else f'{tag}/'
-#             return f'//{tag_name}text()[contains(., {_make_search_str(val)})]/..'
-#         else:
-#             return f"//*[{tag_name}contains({arg},{_make_search_str(val)})]"
-#
-#     else:
-#         raise ValueError("mode参数只能是'exact'或'fuzzy'。")
+                return f"//*[{tag_name}contains({r[0]},{_make_search_str(r[2])})]"


 def translate_loc(loc: tuple) -> tuple: