定位语法支持同时查找多属性,未完成

This commit is contained in:
g1879 2021-12-05 23:41:05 +08:00
parent bf34208bed
commit 5ab29a040d

View File

@ -6,7 +6,7 @@
""" """
from html import unescape from html import unescape
from pathlib import Path from pathlib import Path
from re import split as re_SPLIT, search, sub from re import split, search, sub, findall
from shutil import rmtree from shutil import rmtree
from typing import Union from typing import Union
from zipfile import ZipFile from zipfile import ZipFile
@ -107,32 +107,39 @@ def str_to_loc(loc: str) -> tuple:
# 根据属性查找 # 根据属性查找
if loc.startswith('@'): if loc.startswith('@'):
r = re_SPLIT(r'([:=])', loc[1:], maxsplit=1) loc_str = _make_xpath_str('*', loc)
if len(r) == 3: # r = split(r'([:=])', loc[1:], maxsplit=1)
mode = 'exact' if r[1] == '=' else 'fuzzy' # if len(r) == 3:
loc_str = _make_xpath_str('*', f'@{r[0]}', r[2], mode) # mode = 'exact' if r[1] == '=' else 'fuzzy'
else: # loc_str = _make_xpath_str('*', f'@{r[0]}', r[2], mode)
loc_str = f'//*[@{loc[1:]}]' # else:
# loc_str = f'//*[@{loc[1:]}]'
# 根据tag name查找 # 根据tag name查找
elif loc.startswith(('tag:', 'tag=')): elif loc.startswith(('tag:', 'tag=')):
if '@' not in loc[4:]: at_ind = loc.find('@')
if at_ind == -1:
loc_str = f'//*[name()="{loc[4:]}"]' loc_str = f'//*[name()="{loc[4:]}"]'
else: else:
at_lst = loc[4:].split('@', maxsplit=1) loc_str = _make_xpath_str(loc[4:at_ind], loc[at_ind:])
r = re_SPLIT(r'([:=])', at_lst[1], maxsplit=1) # if '@' not in loc[4:]:
if len(r) == 3: # loc_str = f'//*[name()="{loc[4:]}"]'
mode = 'exact' if r[1] == '=' else 'fuzzy' # else:
arg_str = 'text()' if r[0] in ('text()', 'tx()') else f'@{r[0]}' # at_lst = loc[4:].split('@', maxsplit=1)
loc_str = _make_xpath_str(at_lst[0], arg_str, r[2], mode) # r = split(r'([:=])', at_lst[1], maxsplit=1)
else: # if len(r) == 3:
loc_str = f'//*[name()="{at_lst[0]}" and @{r[0]}]' # mode = 'exact' if r[1] == '=' else 'fuzzy'
# arg_str = 'text()' if r[0] in ('text()', 'tx()') else f'@{r[0]}'
# loc_str = _make_xpath_str(at_lst[0], arg_str, r[2], mode)
# else:
# loc_str = f'//*[name()="{at_lst[0]}" and @{r[0]}]'
# 根据文本查找 # 根据文本查找
elif loc.startswith(('text:', 'text=')): elif loc.startswith(('text:', 'text=')):
if len(loc) > 5: if len(loc) > 5:
mode = 'exact' if loc[4] == '=' else 'fuzzy' # mode = 'exact' if loc[4] == '=' else 'fuzzy'
loc_str = _make_xpath_str('*', 'text()', loc[5:], mode) # loc_str = _make_xpath_str('*', 'text()', loc[5:], mode)
loc_str = _make_xpath_str('*', f'@text(){loc[4:]}')
else: else:
loc_str = '//*[not(text())]' loc_str = '//*[not(text())]'
@ -153,13 +160,34 @@ def str_to_loc(loc: str) -> tuple:
# 根据文本模糊查找 # 根据文本模糊查找
else: else:
if loc: if loc:
loc_str = _make_xpath_str('*', 'text()', loc, 'fuzzy') # loc_str = _make_xpath_str('*', 'text()', loc, 'fuzzy')
loc_str = _make_xpath_str('*', f'@text():{loc}')
else: else:
loc_str = '//*[not(text())]' loc_str = '//*[not(text())]'
return loc_by, loc_str return loc_by, loc_str
def _make_xpath_str(tag: str, text: str):
tag_name = '' if tag == '*' else f'name()="{tag}" and '
r = findall(r'@([^@]*)', text)
res_list = []
for i in r:
r = split(r'([:=])', i, maxsplit=1)
if len(r) == 3:
arg_str = 'text()' if r[0] in ('text()', 'tx()') else f'@{r[0]}'
if r[1] == '=':
loc_str = f'{arg_str}={_make_search_str(r[2])}'
else:
loc_str = f'contains({arg_str}, {_make_search_str(r[2])})'
else:
loc_str = f'@{i}'
res_list.append(loc_str)
s = ' and '.join(res_list)
return f"//*[{tag_name}{s}]"
def translate_loc(loc: tuple) -> tuple: def translate_loc(loc: tuple) -> tuple:
"""把By类型的loc元组转换为css selector或xpath类型的 \n """把By类型的loc元组转换为css selector或xpath类型的 \n
:param loc: By类型的loc元组 :param loc: By类型的loc元组
@ -201,28 +229,28 @@ def translate_loc(loc: tuple) -> tuple:
return loc_by, loc_str return loc_by, loc_str
def _make_xpath_str(tag: str, arg: str, val: str, mode: str = 'fuzzy') -> str: # def _make_xpath_str(tag: str, arg: str, val: str, mode: str = 'fuzzy') -> str:
"""生成xpath语句 \n # """生成xpath语句 \n
:param tag: 标签名 # :param tag: 标签名
:param arg: 属性名 # :param arg: 属性名
:param val: 属性值 # :param val: 属性值
:param mode: 'exact' 'fuzzy'对应精确或模糊查找 # :param mode: 'exact' 或 'fuzzy',对应精确或模糊查找
:return: xpath字符串 # :return: xpath字符串
""" # """
tag_name = '' if tag == '*' else f'name()="{tag}" and ' # tag_name = '' if tag == '*' else f'name()="{tag}" and '
#
if mode == 'exact': # if mode == 'exact':
return f'//*[{tag_name}{arg}={_make_search_str(val)}]' # return f'//*[{tag_name}{arg}={_make_search_str(val)}]'
#
elif mode == 'fuzzy': # elif mode == 'fuzzy':
if arg == 'text()': # if arg == 'text()':
tag_name = '' if tag == '*' else f'{tag}/' # tag_name = '' if tag == '*' else f'{tag}/'
return f'//{tag_name}text()[contains(., {_make_search_str(val)})]/..' # return f'//{tag_name}text()[contains(., {_make_search_str(val)})]/..'
else: # else:
return f"//*[{tag_name}contains({arg},{_make_search_str(val)})]" # return f"//*[{tag_name}contains({arg},{_make_search_str(val)})]"
#
else: # else:
raise ValueError("mode参数只能是'exact''fuzzy'") # raise ValueError("mode参数只能是'exact'或'fuzzy'。")
def _make_search_str(search_str: str) -> str: def _make_search_str(search_str: str) -> str:
@ -239,7 +267,6 @@ def _make_search_str(search_str: str) -> str:
search_str += ',' + '\'"\',' if key < parts_num - 1 else '' search_str += ',' + '\'"\',' if key < parts_num - 1 else ''
search_str += ',"")' search_str += ',"")'
return search_str return search_str