From 5ab29a040d02f99d7f3cfc75e730bae214b006f0 Mon Sep 17 00:00:00 2001 From: g1879 Date: Sun, 5 Dec 2021 23:41:05 +0800 Subject: [PATCH] =?UTF-8?q?=E5=AE=9A=E4=BD=8D=E8=AF=AD=E6=B3=95=E6=94=AF?= =?UTF-8?q?=E6=8C=81=E5=90=8C=E6=97=B6=E6=9F=A5=E6=89=BE=E5=A4=9A=E5=B1=9E?= =?UTF-8?q?=E6=80=A7=EF=BC=8C=E6=9C=AA=E5=AE=8C=E6=88=90?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/common.py | 111 +++++++++++++++++++++++++---------------- 1 file changed, 69 insertions(+), 42 deletions(-) diff --git a/DrissionPage/common.py b/DrissionPage/common.py index beddbdf..e91ddaa 100644 --- a/DrissionPage/common.py +++ b/DrissionPage/common.py @@ -6,7 +6,7 @@ """ from html import unescape from pathlib import Path -from re import split as re_SPLIT, search, sub +from re import split, search, sub, findall from shutil import rmtree from typing import Union from zipfile import ZipFile @@ -107,32 +107,39 @@ def str_to_loc(loc: str) -> tuple: # 根据属性查找 if loc.startswith('@'): - r = re_SPLIT(r'([:=])', loc[1:], maxsplit=1) - if len(r) == 3: - mode = 'exact' if r[1] == '=' else 'fuzzy' - loc_str = _make_xpath_str('*', f'@{r[0]}', r[2], mode) - else: - loc_str = f'//*[@{loc[1:]}]' + loc_str = _make_xpath_str('*', loc) + # r = split(r'([:=])', loc[1:], maxsplit=1) + # if len(r) == 3: + # mode = 'exact' if r[1] == '=' else 'fuzzy' + # loc_str = _make_xpath_str('*', f'@{r[0]}', r[2], mode) + # else: + # loc_str = f'//*[@{loc[1:]}]' # 根据tag name查找 elif loc.startswith(('tag:', 'tag=')): - if '@' not in loc[4:]: + at_ind = loc.find('@') + if at_ind == -1: loc_str = f'//*[name()="{loc[4:]}"]' else: - at_lst = loc[4:].split('@', maxsplit=1) - r = re_SPLIT(r'([:=])', at_lst[1], maxsplit=1) - if len(r) == 3: - mode = 'exact' if r[1] == '=' else 'fuzzy' - arg_str = 'text()' if r[0] in ('text()', 'tx()') else f'@{r[0]}' - loc_str = _make_xpath_str(at_lst[0], arg_str, r[2], mode) - else: - loc_str = f'//*[name()="{at_lst[0]}" and @{r[0]}]' + loc_str = _make_xpath_str(loc[4:at_ind], loc[at_ind:]) + # if '@' not in loc[4:]: + # loc_str = f'//*[name()="{loc[4:]}"]' + # else: + # at_lst = loc[4:].split('@', maxsplit=1) + # r = split(r'([:=])', at_lst[1], maxsplit=1) + # if len(r) == 3: + # mode = 'exact' if r[1] == '=' else 'fuzzy' + # arg_str = 'text()' if r[0] in ('text()', 'tx()') else f'@{r[0]}' + # loc_str = _make_xpath_str(at_lst[0], arg_str, r[2], mode) + # else: + # loc_str = f'//*[name()="{at_lst[0]}" and @{r[0]}]' # 根据文本查找 elif loc.startswith(('text:', 'text=')): if len(loc) > 5: - mode = 'exact' if loc[4] == '=' else 'fuzzy' - loc_str = _make_xpath_str('*', 'text()', loc[5:], mode) + # mode = 'exact' if loc[4] == '=' else 'fuzzy' + # loc_str = _make_xpath_str('*', 'text()', loc[5:], mode) + loc_str = _make_xpath_str('*', f'@text(){loc[4:]}') else: loc_str = '//*[not(text())]' @@ -153,13 +160,34 @@ def str_to_loc(loc: str) -> tuple: # 根据文本模糊查找 else: if loc: - loc_str = _make_xpath_str('*', 'text()', loc, 'fuzzy') + # loc_str = _make_xpath_str('*', 'text()', loc, 'fuzzy') + loc_str = _make_xpath_str('*', f'@text():{loc}') else: loc_str = '//*[not(text())]' return loc_by, loc_str +def _make_xpath_str(tag: str, text: str): + tag_name = '' if tag == '*' else f'name()="{tag}" and ' + r = findall(r'@([^@]*)', text) + res_list = [] + for i in r: + r = split(r'([:=])', i, maxsplit=1) + if len(r) == 3: + arg_str = 'text()' if r[0] in ('text()', 'tx()') else f'@{r[0]}' + if r[1] == '=': + loc_str = f'{arg_str}={_make_search_str(r[2])}' + else: + loc_str = f'contains({arg_str}, {_make_search_str(r[2])})' + else: + loc_str = f'@{i}' + res_list.append(loc_str) + + s = ' and '.join(res_list) + return f"//*[{tag_name}{s}]" + + def translate_loc(loc: tuple) -> tuple: """把By类型的loc元组转换为css selector或xpath类型的 \n :param loc: By类型的loc元组 @@ -201,28 +229,28 @@ def translate_loc(loc: tuple) -> tuple: return loc_by, loc_str -def _make_xpath_str(tag: str, arg: str, val: str, mode: str = 'fuzzy') -> str: - """生成xpath语句 \n - :param tag: 标签名 - :param arg: 属性名 - :param val: 属性值 - :param mode: 'exact' 或 'fuzzy',对应精确或模糊查找 - :return: xpath字符串 - """ - tag_name = '' if tag == '*' else f'name()="{tag}" and ' - - if mode == 'exact': - return f'//*[{tag_name}{arg}={_make_search_str(val)}]' - - elif mode == 'fuzzy': - if arg == 'text()': - tag_name = '' if tag == '*' else f'{tag}/' - return f'//{tag_name}text()[contains(., {_make_search_str(val)})]/..' - else: - return f"//*[{tag_name}contains({arg},{_make_search_str(val)})]" - - else: - raise ValueError("mode参数只能是'exact'或'fuzzy'。") +# def _make_xpath_str(tag: str, arg: str, val: str, mode: str = 'fuzzy') -> str: +# """生成xpath语句 \n +# :param tag: 标签名 +# :param arg: 属性名 +# :param val: 属性值 +# :param mode: 'exact' 或 'fuzzy',对应精确或模糊查找 +# :return: xpath字符串 +# """ +# tag_name = '' if tag == '*' else f'name()="{tag}" and ' +# +# if mode == 'exact': +# return f'//*[{tag_name}{arg}={_make_search_str(val)}]' +# +# elif mode == 'fuzzy': +# if arg == 'text()': +# tag_name = '' if tag == '*' else f'{tag}/' +# return f'//{tag_name}text()[contains(., {_make_search_str(val)})]/..' +# else: +# return f"//*[{tag_name}contains({arg},{_make_search_str(val)})]" +# +# else: +# raise ValueError("mode参数只能是'exact'或'fuzzy'。") def _make_search_str(search_str: str) -> str: @@ -239,7 +267,6 @@ def _make_search_str(search_str: str) -> str: search_str += ',' + '\'"\',' if key < parts_num - 1 else '' search_str += ',"")' - return search_str