This commit is contained in:
g1879 2024-09-17 23:52:29 +08:00
parent 4f88a04810
commit 758fcf8b73
8 changed files with 38 additions and 23 deletions

View File

@ -17,7 +17,7 @@ from requests import Session
from .._configs.session_options import SessionOptions
from .._elements.none_element import NoneElement
from .._functions.elements import get_frame, get_eles
from .._functions.locator import get_loc
from .._functions.locator import get_loc, is_selenium_loc
from .._functions.settings import Settings
from .._functions.web import format_html
from ..errors import ElementNotFoundError
@ -40,7 +40,17 @@ class BaseParser(object):
timeout = 0
if timeout is None:
timeout = self.timeout
return get_eles(locators, self, any_one, first_ele, timeout)
if isinstance(locators, tuple) and not is_selenium_loc(locators):
raise ValueError(f"locators参数为tuple时必须是单独的定位符即长度为2且第一位是'id', 'xpath', 'link text', "
f"'partial link text','name', 'tag name', 'class name', 'css selector' 之一。\n"
f"现在是:{locators}")
r = get_eles(locators, self, any_one, first_ele, timeout)
if any_one:
for ele in r:
if r[ele]:
return ele, r[ele]
return None, None
return r
# ----------------以下属性或方法待后代实现----------------
@property

View File

@ -41,13 +41,15 @@ class BaseParser(object):
any_one: bool = True,
first_ele: bool = True,
timeout: float = None) -> Union[Dict[str, ChromiumElement], Dict[str, SessionElement],
Dict[str, List[ChromiumElement]], Dict[str, List[SessionElement]]]:
Dict[str, List[ChromiumElement]], Dict[str, List[SessionElement]], Tuple[str, SessionElement],
Tuple[str, ChromiumElement]]:
"""传入多个定位符获取多个ele
:param locators: 定位符组成的列表
:param any_one: 是否任何一个定位符找到结果即返回
:param first_ele: 每个定位符是否只获取第一个元素
:param timeout: 超时时间
:return: 多个定位符组成的dictfirst_only为False返回列表否则为元素无结果的返回False
:return: any_one为True时返回一个找到的元素定位符和对象组成的元组格式(loc, ele)全都没找到返回(None, None)
any_one为False时返回dict格式key为定位符value为找到的元素或列表
"""
...

View File

@ -7,7 +7,7 @@
"""
from time import perf_counter, sleep
from .locator import is_loc
from .locator import is_str_loc
from .._elements.none_element import NoneElement
@ -263,7 +263,7 @@ class Getter(object):
def get_eles(locators, owner, any_one=False, first_ele=True, timeout=10):
if isinstance(locators, str):
if isinstance(locators, (tuple, str)):
locators = (locators,)
res = {loc: None for loc in locators}
@ -293,7 +293,7 @@ def get_eles(locators, owner, any_one=False, first_ele=True, timeout=10):
def get_frame(owner, loc_ind_ele, timeout=None):
if isinstance(loc_ind_ele, str):
if not is_loc(loc_ind_ele):
if not is_str_loc(loc_ind_ele):
xpath = f'xpath://*[(name()="iframe" or name()="frame") and ' \
f'(@name="{loc_ind_ele}" or @id="{loc_ind_ele}")]'
else:
@ -309,15 +309,7 @@ def get_frame(owner, loc_ind_ele, timeout=None):
raise TypeError('该定位符不是指向frame元素。')
r = ele
elif isinstance(loc_ind_ele, int):
if loc_ind_ele == 0:
loc_ind_ele = 1
elif loc_ind_ele < 0:
loc_ind_ele = f'last()+{loc_ind_ele}+1'
xpath = f'xpath:(//*[name()="frame" or name()="iframe"])[{loc_ind_ele}]'
r = owner._ele(xpath, timeout=timeout)
elif loc_ind_ele._type == 'ChromiumFrame':
elif getattr(loc_ind_ele, '_type', None) == 'ChromiumFrame':
r = loc_ind_ele
else:

View File

@ -559,14 +559,14 @@ class Getter(object):
...
def get_eles(locators: Union[str, List[str], tuple],
def get_eles(locators: Union[str, tuple, List[Union[str, tuple]]],
owner: BaseParser,
any_one: bool = False,
first_ele: bool = True,
timeout: float = 10) -> Union[Dict[str, ChromiumElement], Dict[str, SessionElement],
Dict[str, List[ChromiumElement]], Dict[str, List[SessionElement]]]:
"""传入多个定位符获取多个ele
:param locators: 定位符组成的列表
:param locators: 定位符或它们组成的列表
:param owner: 页面或元素对象
:param any_one: 是否找到任何一个即返回
:param first_ele: 每个定位符是否只获取第一个元素

View File

@ -79,11 +79,17 @@ def _get_arg(text) -> list:
return [name, None, None] if len(r) != 3 else [name, r[1], r[2]]
def is_loc(text):
def is_str_loc(text):
return text.startswith(('.', '#', '@', 't:', 't=', 'tag:', 'tag=', 'tx:', 'tx=', 'tx^', 'tx$', 'text:', 'text=',
'text^', 'text$', 'xpath:', 'xpath=', 'x:', 'x=', 'css:', 'css=', 'c:', 'c='))
def is_selenium_loc(loc):
return (isinstance(loc, tuple) and len(loc) == 2 and isinstance(loc[1], str)
and loc[0] in ('id', 'xpath', 'link text', 'partial link text', 'name', 'tag name', 'class name',
'css selector'))
def get_loc(loc, translate_css=False, css_mode=False):
if isinstance(loc, tuple):
loc = translate_css_loc(loc) if css_mode else translate_loc(loc)
@ -457,7 +463,7 @@ def translate_loc(loc):
loc_str = f'//a[contains(text(),"{loc[1]}")]'
else:
raise ValueError('无法识别的定位符。')
raise ValueError(f'无法识别的定位符:{loc}')
return loc_by, loc_str

View File

@ -16,11 +16,16 @@ def locator_to_tuple(loc: str) -> dict:
...
def is_loc(text: str) -> bool:
def is_str_loc(text: str) -> bool:
"""返回text是否定位符"""
...
def is_selenium_loc(loc: tuple) -> bool:
"""返回tuple是否selenium的定位符"""
...
def get_loc(loc: Union[tuple, str],
translate_css: bool = False,
css_mode: bool = False) -> tuple:

View File

@ -1,7 +1,7 @@
requests
lxml
cssselect
DownloadKit>=2.0.2
DownloadKit>=2.0.4
websocket-client
click
tldextract

View File

@ -23,7 +23,7 @@ setup(
'lxml',
'requests',
'cssselect',
'DownloadKit>=2.0.2',
'DownloadKit>=2.0.4',
'websocket-client',
'click',
'tldextract',