2023-01-18 10:35:34 +08:00

244 lines
7.5 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# -*- coding:utf-8 -*-
"""
@Author : g1879
@Contact : g1879@qq.com
"""
from re import split
def get_loc(loc, translate_css=False):
"""接收selenium定位元组或本库定位语法转换为标准定位元组可翻译css selector为xpath \n
:param loc: selenium定位元组或本库定位语法
:param translate_css: 是否翻译css selector为xpath
:return: DrissionPage定位元组
"""
if isinstance(loc, tuple):
loc = translate_loc(loc)
elif isinstance(loc, str):
loc = str_to_loc(loc)
else:
raise TypeError('loc参数只能是tuple或str。')
if loc[0] == 'css selector' and translate_css:
from lxml.cssselect import CSSSelector, ExpressionError
try:
path = str(CSSSelector(loc[1], translator='html').path)
path = path[20:] if path.startswith('descendant-or-self::') else path
loc = 'xpath', path
except ExpressionError:
pass
return loc
def str_to_loc(loc):
"""处理元素查找语句 \n
查找方式属性、tag name及属性、文本、xpath、css selector、id、class \n
@表示属性,.表示class#表示id=表示精确匹配,:表示模糊匹配,无控制字符串时默认搜索该字符串 \n
"""
loc_by = 'xpath'
if loc.startswith('.'):
if loc.startswith(('.=', '.:',)):
loc = loc.replace('.', '@class', 1)
else:
loc = loc.replace('.', '@class=', 1)
elif loc.startswith('#'):
if loc.startswith(('#=', '#:',)):
loc = loc.replace('#', '@id', 1)
else:
loc = loc.replace('#', '@id=', 1)
elif loc.startswith(('t:', 't=')):
loc = f'tag:{loc[2:]}'
elif loc.startswith(('tx:', 'tx=')):
loc = f'text{loc[2:]}'
# ------------------------------------------------------------------
# 多属性查找
if loc.startswith('@@') and loc != '@@':
loc_str = _make_multi_xpath_str('*', loc)
# 单属性查找
elif loc.startswith('@') and loc != '@':
loc_str = _make_single_xpath_str('*', loc)
# 根据tag name查找
elif loc.startswith(('tag:', 'tag=')) and loc not in ('tag:', 'tag='):
at_ind = loc.find('@')
if at_ind == -1:
loc_str = f'//*[name()="{loc[4:]}"]'
else:
if loc[at_ind:].startswith('@@'):
loc_str = _make_multi_xpath_str(loc[4:at_ind], loc[at_ind:])
else:
loc_str = _make_single_xpath_str(loc[4:at_ind], loc[at_ind:])
# 根据文本查找
elif loc.startswith('text='):
loc_str = f'//*[text()={_make_search_str(loc[5:])}]'
elif loc.startswith('text:') and loc != 'text:':
loc_str = f'//*/text()[contains(., {_make_search_str(loc[5:])})]/..'
# 用xpath查找
elif loc.startswith(('xpath:', 'xpath=')) and loc not in ('xpath:', 'xpath='):
loc_str = loc[6:]
elif loc.startswith(('x:', 'x=')) and loc not in ('x:', 'x='):
loc_str = loc[2:]
# 用css selector查找
elif loc.startswith(('css:', 'css=')) and loc not in ('css:', 'css='):
loc_by = 'css selector'
loc_str = loc[4:]
elif loc.startswith(('c:', 'c=')) and loc not in ('c:', 'c='):
loc_by = 'css selector'
loc_str = loc[2:]
# 根据文本模糊查找
elif loc:
loc_str = f'//*/text()[contains(., {_make_search_str(loc)})]/..'
else:
loc_str = '//*'
return loc_by, loc_str
def _make_single_xpath_str(tag: str, text: str) -> str:
"""生成xpath语句 \n
:param tag: 标签名
:param text: 待处理的字符串
:return: xpath字符串
"""
arg_list = [] if tag == '*' else [f'name()="{tag}"']
arg_str = txt_str = ''
if text == '@':
arg_str = 'not(@*)'
else:
r = split(r'([:=])', text, maxsplit=1)
len_r = len(r)
len_r0 = len(r[0])
if len_r != 3 and len_r0 > 1:
arg_str = 'normalize-space(text())' if r[0] in ('@text()', '@tx()') else f'{r[0]}'
elif len_r == 3 and len_r0 > 1:
if r[1] == '=': # 精确查找
arg = '.' if r[0] in ('@text()', '@tx()') else r[0]
arg_str = f'{arg}={_make_search_str(r[2])}'
else: # 模糊查找
if r[0] in ('@text()', '@tx()'):
txt_str = f'/text()[contains(., {_make_search_str(r[2])})]/..'
arg_str = ''
else:
arg_str = f"contains({r[0]},{_make_search_str(r[2])})"
if arg_str:
arg_list.append(arg_str)
arg_str = ' and '.join(arg_list)
return f'//*[{arg_str}]{txt_str}' if arg_str else f'//*{txt_str}'
def _make_multi_xpath_str(tag: str, text: str) -> str:
"""生成多属性查找的xpath语句 \n
:param tag: 标签名
:param text: 待处理的字符串
:return: xpath字符串
"""
arg_list = [] if tag == '*' else [f'name()="{tag}"']
args = text.split('@@')
for arg in args[1:]:
r = split(r'([:=])', arg, maxsplit=1)
arg_str = ''
len_r = len(r)
if not r[0]: # 不查询任何属性
arg_str = 'not(@*)'
else:
r[0], ignore = (r[0][1:], True) if r[0][0] == '-' else (r[0], None) # 是否去除某个属性
if len_r != 3: # 只有属性名没有属性内容,查询是否存在该属性
arg_str = 'normalize-space(text())' if r[0] in ('text()', 'tx()') else f'@{r[0]}'
elif len_r == 3: # 属性名和内容都有
arg = '.' if r[0] in ('text()', 'tx()') else f'@{r[0]}'
if r[1] == '=':
arg_str = f'{arg}={_make_search_str(r[2])}'
else:
arg_str = f'contains({arg},{_make_search_str(r[2])})'
if arg_str and ignore:
arg_str = f'not({arg_str})'
if arg_str:
arg_list.append(arg_str)
arg_str = ' and '.join(arg_list)
return f'//*[{arg_str}]' if arg_str else f'//*'
def _make_search_str(search_str: str) -> str:
""""转义,不知何故不能直接用 \ 来转义 \n
:param search_str: 查询字符串
:return: 把"转义后的字符串
"""
parts = search_str.split('"')
parts_num = len(parts)
search_str = 'concat('
for key, i in enumerate(parts):
search_str += f'"{i}"'
search_str += ',' + '\'"\',' if key < parts_num - 1 else ''
search_str += ',"")'
return search_str
def translate_loc(loc):
"""把By类型的loc元组转换为css selector或xpath类型的 \n
:param loc: By类型的loc元组
:return: css selector或xpath类型的loc元组
"""
if len(loc) != 2:
raise ValueError('定位符长度必须为2。')
loc_by = 'xpath'
loc_0 = loc[0].lower()
if loc_0 == 'xpath':
loc_str = loc[1]
elif loc_0 == 'css selector':
loc_by = loc_0
loc_str = loc[1]
elif loc_0 == 'id':
loc_str = f'//*[@id="{loc[1]}"]'
elif loc_0 == 'class name':
loc_str = f'//*[@class="{loc[1]}"]'
elif loc_0 == 'link text':
loc_str = f'//a[text()="{loc[1]}"]'
elif loc_0 == 'name':
loc_str = f'//*[@name="{loc[1]}"]'
elif loc_0 == 'tag name':
loc_str = f'//{loc[1]}'
elif loc_0 == 'partial link text':
loc_str = f'//a[contains(text(),"{loc[1]}")]'
else:
raise ValueError('无法识别的定位符。')
return loc_by, loc_str