From ef9a48adf5d867d0c06250229df1f7675bc328cb Mon Sep 17 00:00:00 2001 From: g1879 Date: Tue, 7 Dec 2021 23:11:03 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BC=98=E5=8C=96=E5=AE=9A=E4=BD=8D=E8=AF=AD?= =?UTF-8?q?=E6=B3=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/common.py | 102 +++++++++++++---------------------------- 1 file changed, 33 insertions(+), 69 deletions(-) diff --git a/DrissionPage/common.py b/DrissionPage/common.py index 1280c1b..dcb0c55 100644 --- a/DrissionPage/common.py +++ b/DrissionPage/common.py @@ -6,7 +6,7 @@ """ from html import unescape from pathlib import Path -from re import split, search, sub, findall +from re import split, search, sub from shutil import rmtree from typing import Union from zipfile import ZipFile @@ -106,14 +106,11 @@ def str_to_loc(loc: str) -> tuple: loc = f'text{loc[2:]}' # 根据属性查找 - if loc.startswith('@'): + if loc.startswith('@@'): + loc_str = _make_xpath_str('*', loc) # TODO: 改成多属性 + + elif loc.startswith('@'): loc_str = _make_xpath_str('*', loc) - # r = split(r'([:=])', loc[1:], maxsplit=1) - # if len(r) == 3: - # mode = 'exact' if r[1] == '=' else 'fuzzy' - # loc_str = _make_xpath_str('*', f'@{r[0]}', r[2], mode) - # else: - # loc_str = f'//*[@{loc[1:]}]' # 根据tag name查找 elif loc.startswith(('tag:', 'tag=')): @@ -121,25 +118,16 @@ def str_to_loc(loc: str) -> tuple: if at_ind == -1: loc_str = f'//*[name()="{loc[4:]}"]' else: - loc_str = _make_xpath_str(loc[4:at_ind], loc[at_ind:]) - # if '@' not in loc[4:]: - # loc_str = f'//*[name()="{loc[4:]}"]' - # else: - # at_lst = loc[4:].split('@', maxsplit=1) - # r = split(r'([:=])', at_lst[1], maxsplit=1) - # if len(r) == 3: - # mode = 'exact' if r[1] == '=' else 'fuzzy' - # arg_str = 'text()' if r[0] in ('text()', 'tx()') else f'@{r[0]}' - # loc_str = _make_xpath_str(at_lst[0], arg_str, r[2], mode) - # else: - # loc_str = f'//*[name()="{at_lst[0]}" and @{r[0]}]' + if loc[4:].startswith('@@'): # TODO: 改成多属性 + loc_str = _make_xpath_str(loc[4:at_ind], loc[at_ind:]) + else: + loc_str = _make_xpath_str(loc[4:at_ind], loc[at_ind:]) # 根据文本查找 - elif loc.startswith(('text:', 'text=')): - if len(loc) > 5: - loc_str = _make_xpath_str('*', f'@text(){loc[4:]}') - else: - loc_str = '//*[not(text())]' + elif loc.startswith('text:'): + loc_str = f'//*[text()={_make_search_str(loc[5:])}]' + elif loc.startswith('text='): + loc_str = f'//*/text()[contains(., {_make_search_str(loc[5:])})]/..' # 用xpath查找 elif loc.startswith(('xpath:', 'xpath=')): @@ -151,63 +139,39 @@ def str_to_loc(loc: str) -> tuple: elif loc.startswith(('css:', 'css=')): loc_by = 'css selector' loc_str = loc[4:] - elif loc.startswith(('c:', 'c=')): loc_by = 'css selector' loc_str = loc[2:] # 根据文本模糊查找 + elif loc: + loc_str = f'//*/text()[contains(., {_make_search_str(loc)})]/..' else: - if loc: - loc_str = _make_xpath_str('*', f'@text():{loc}') - else: - loc_str = '//*[not(text())]' + loc_str = '//*[not(text())]' return loc_by, loc_str def _make_xpath_str(tag: str, text: str) -> str: + """生成xpath语句 \n + :param tag: 标签名 + :param text: 待处理的字符串 + :return: xpath字符串 + """ tag_name = '' if tag == '*' else f'name()="{tag}" and ' - r = findall(r'@([^@]*)', text) - res_list = [] - for i in r: - r = split(r'([:=])', i, maxsplit=1) - if len(r) == 3: - arg_str = 'text()' if r[0] in ('text()', 'tx()') else f'@{r[0]}' - if r[1] == '=': - loc_str = f'{arg_str}={_make_search_str(r[2])}' + r = split(r'([:=])', text, maxsplit=1) + if len(r) != 3: + return f'//*[{tag_name}{r[0]}]' + else: + if r[1] == '=': # 精确查找 + arg = 'text()' if r[0] in ('@text()', '@tx()') else r[0] + return f'//*[{tag_name}{arg}={_make_search_str(r[2])}]' + else: # 模糊查找 + if r[0] in ('@text()', '@tx()'): + tag_name = '' if tag == '*' else f'{tag}/' + return f'//{tag_name}text()[contains(., {_make_search_str(r[2])})]/..' else: - loc_str = f'contains({arg_str}, {_make_search_str(r[2])})' - else: - loc_str = f'@{i}' - res_list.append(loc_str) - - s = ' and '.join(res_list) - return f"//*[{tag_name}{s}]" - - -# def _make_xpath_str(tag: str, arg: str, val: str, mode: str = 'fuzzy') -> str: -# """生成xpath语句 \n -# :param tag: 标签名 -# :param arg: 属性名 -# :param val: 属性值 -# :param mode: 'exact' 或 'fuzzy',对应精确或模糊查找 -# :return: xpath字符串 -# """ -# tag_name = '' if tag == '*' else f'name()="{tag}" and ' -# -# if mode == 'exact': -# return f'//*[{tag_name}{arg}={_make_search_str(val)}]' -# -# elif mode == 'fuzzy': -# if arg == 'text()': -# tag_name = '' if tag == '*' else f'{tag}/' -# return f'//{tag_name}text()[contains(., {_make_search_str(val)})]/..' -# else: -# return f"//*[{tag_name}contains({arg},{_make_search_str(val)})]" -# -# else: -# raise ValueError("mode参数只能是'exact'或'fuzzy'。") + return f"//*[{tag_name}contains({r[0]},{_make_search_str(r[2])})]" def translate_loc(loc: tuple) -> tuple: