This commit is contained in:
g1879 2024-09-17 23:52:29 +08:00
parent 4f88a04810
commit 758fcf8b73
8 changed files with 38 additions and 23 deletions

View File

@ -17,7 +17,7 @@ from requests import Session
from .._configs.session_options import SessionOptions from .._configs.session_options import SessionOptions
from .._elements.none_element import NoneElement from .._elements.none_element import NoneElement
from .._functions.elements import get_frame, get_eles from .._functions.elements import get_frame, get_eles
from .._functions.locator import get_loc from .._functions.locator import get_loc, is_selenium_loc
from .._functions.settings import Settings from .._functions.settings import Settings
from .._functions.web import format_html from .._functions.web import format_html
from ..errors import ElementNotFoundError from ..errors import ElementNotFoundError
@ -40,7 +40,17 @@ class BaseParser(object):
timeout = 0 timeout = 0
if timeout is None: if timeout is None:
timeout = self.timeout timeout = self.timeout
return get_eles(locators, self, any_one, first_ele, timeout) if isinstance(locators, tuple) and not is_selenium_loc(locators):
raise ValueError(f"locators参数为tuple时必须是单独的定位符即长度为2且第一位是'id', 'xpath', 'link text', "
f"'partial link text','name', 'tag name', 'class name', 'css selector' 之一。\n"
f"现在是:{locators}")
r = get_eles(locators, self, any_one, first_ele, timeout)
if any_one:
for ele in r:
if r[ele]:
return ele, r[ele]
return None, None
return r
# ----------------以下属性或方法待后代实现---------------- # ----------------以下属性或方法待后代实现----------------
@property @property

View File

@ -41,13 +41,15 @@ class BaseParser(object):
any_one: bool = True, any_one: bool = True,
first_ele: bool = True, first_ele: bool = True,
timeout: float = None) -> Union[Dict[str, ChromiumElement], Dict[str, SessionElement], timeout: float = None) -> Union[Dict[str, ChromiumElement], Dict[str, SessionElement],
Dict[str, List[ChromiumElement]], Dict[str, List[SessionElement]]]: Dict[str, List[ChromiumElement]], Dict[str, List[SessionElement]], Tuple[str, SessionElement],
Tuple[str, ChromiumElement]]:
"""传入多个定位符获取多个ele """传入多个定位符获取多个ele
:param locators: 定位符组成的列表 :param locators: 定位符组成的列表
:param any_one: 是否任何一个定位符找到结果即返回 :param any_one: 是否任何一个定位符找到结果即返回
:param first_ele: 每个定位符是否只获取第一个元素 :param first_ele: 每个定位符是否只获取第一个元素
:param timeout: 超时时间 :param timeout: 超时时间
:return: 多个定位符组成的dictfirst_only为False返回列表否则为元素无结果的返回False :return: any_one为True时返回一个找到的元素定位符和对象组成的元组格式(loc, ele)全都没找到返回(None, None)
any_one为False时返回dict格式key为定位符value为找到的元素或列表
""" """
... ...

View File

@ -7,7 +7,7 @@
""" """
from time import perf_counter, sleep from time import perf_counter, sleep
from .locator import is_loc from .locator import is_str_loc
from .._elements.none_element import NoneElement from .._elements.none_element import NoneElement
@ -263,7 +263,7 @@ class Getter(object):
def get_eles(locators, owner, any_one=False, first_ele=True, timeout=10): def get_eles(locators, owner, any_one=False, first_ele=True, timeout=10):
if isinstance(locators, str): if isinstance(locators, (tuple, str)):
locators = (locators,) locators = (locators,)
res = {loc: None for loc in locators} res = {loc: None for loc in locators}
@ -293,7 +293,7 @@ def get_eles(locators, owner, any_one=False, first_ele=True, timeout=10):
def get_frame(owner, loc_ind_ele, timeout=None): def get_frame(owner, loc_ind_ele, timeout=None):
if isinstance(loc_ind_ele, str): if isinstance(loc_ind_ele, str):
if not is_loc(loc_ind_ele): if not is_str_loc(loc_ind_ele):
xpath = f'xpath://*[(name()="iframe" or name()="frame") and ' \ xpath = f'xpath://*[(name()="iframe" or name()="frame") and ' \
f'(@name="{loc_ind_ele}" or @id="{loc_ind_ele}")]' f'(@name="{loc_ind_ele}" or @id="{loc_ind_ele}")]'
else: else:
@ -309,15 +309,7 @@ def get_frame(owner, loc_ind_ele, timeout=None):
raise TypeError('该定位符不是指向frame元素。') raise TypeError('该定位符不是指向frame元素。')
r = ele r = ele
elif isinstance(loc_ind_ele, int): elif getattr(loc_ind_ele, '_type', None) == 'ChromiumFrame':
if loc_ind_ele == 0:
loc_ind_ele = 1
elif loc_ind_ele < 0:
loc_ind_ele = f'last()+{loc_ind_ele}+1'
xpath = f'xpath:(//*[name()="frame" or name()="iframe"])[{loc_ind_ele}]'
r = owner._ele(xpath, timeout=timeout)
elif loc_ind_ele._type == 'ChromiumFrame':
r = loc_ind_ele r = loc_ind_ele
else: else:

View File

@ -559,14 +559,14 @@ class Getter(object):
... ...
def get_eles(locators: Union[str, List[str], tuple], def get_eles(locators: Union[str, tuple, List[Union[str, tuple]]],
owner: BaseParser, owner: BaseParser,
any_one: bool = False, any_one: bool = False,
first_ele: bool = True, first_ele: bool = True,
timeout: float = 10) -> Union[Dict[str, ChromiumElement], Dict[str, SessionElement], timeout: float = 10) -> Union[Dict[str, ChromiumElement], Dict[str, SessionElement],
Dict[str, List[ChromiumElement]], Dict[str, List[SessionElement]]]: Dict[str, List[ChromiumElement]], Dict[str, List[SessionElement]]]:
"""传入多个定位符获取多个ele """传入多个定位符获取多个ele
:param locators: 定位符组成的列表 :param locators: 定位符或它们组成的列表
:param owner: 页面或元素对象 :param owner: 页面或元素对象
:param any_one: 是否找到任何一个即返回 :param any_one: 是否找到任何一个即返回
:param first_ele: 每个定位符是否只获取第一个元素 :param first_ele: 每个定位符是否只获取第一个元素

View File

@ -79,11 +79,17 @@ def _get_arg(text) -> list:
return [name, None, None] if len(r) != 3 else [name, r[1], r[2]] return [name, None, None] if len(r) != 3 else [name, r[1], r[2]]
def is_loc(text): def is_str_loc(text):
return text.startswith(('.', '#', '@', 't:', 't=', 'tag:', 'tag=', 'tx:', 'tx=', 'tx^', 'tx$', 'text:', 'text=', return text.startswith(('.', '#', '@', 't:', 't=', 'tag:', 'tag=', 'tx:', 'tx=', 'tx^', 'tx$', 'text:', 'text=',
'text^', 'text$', 'xpath:', 'xpath=', 'x:', 'x=', 'css:', 'css=', 'c:', 'c=')) 'text^', 'text$', 'xpath:', 'xpath=', 'x:', 'x=', 'css:', 'css=', 'c:', 'c='))
def is_selenium_loc(loc):
return (isinstance(loc, tuple) and len(loc) == 2 and isinstance(loc[1], str)
and loc[0] in ('id', 'xpath', 'link text', 'partial link text', 'name', 'tag name', 'class name',
'css selector'))
def get_loc(loc, translate_css=False, css_mode=False): def get_loc(loc, translate_css=False, css_mode=False):
if isinstance(loc, tuple): if isinstance(loc, tuple):
loc = translate_css_loc(loc) if css_mode else translate_loc(loc) loc = translate_css_loc(loc) if css_mode else translate_loc(loc)
@ -457,7 +463,7 @@ def translate_loc(loc):
loc_str = f'//a[contains(text(),"{loc[1]}")]' loc_str = f'//a[contains(text(),"{loc[1]}")]'
else: else:
raise ValueError('无法识别的定位符。') raise ValueError(f'无法识别的定位符:{loc}')
return loc_by, loc_str return loc_by, loc_str

View File

@ -16,11 +16,16 @@ def locator_to_tuple(loc: str) -> dict:
... ...
def is_loc(text: str) -> bool: def is_str_loc(text: str) -> bool:
"""返回text是否定位符""" """返回text是否定位符"""
... ...
def is_selenium_loc(loc: tuple) -> bool:
"""返回tuple是否selenium的定位符"""
...
def get_loc(loc: Union[tuple, str], def get_loc(loc: Union[tuple, str],
translate_css: bool = False, translate_css: bool = False,
css_mode: bool = False) -> tuple: css_mode: bool = False) -> tuple:

View File

@ -1,7 +1,7 @@
requests requests
lxml lxml
cssselect cssselect
DownloadKit>=2.0.2 DownloadKit>=2.0.4
websocket-client websocket-client
click click
tldextract tldextract

View File

@ -23,7 +23,7 @@ setup(
'lxml', 'lxml',
'requests', 'requests',
'cssselect', 'cssselect',
'DownloadKit>=2.0.2', 'DownloadKit>=2.0.4',
'websocket-client', 'websocket-client',
'click', 'click',
'tldextract', 'tldextract',