改进css_path;修复sr中可能获取错元素问题,未完成

This commit is contained in:
g1879 2023-11-23 18:05:57 +08:00
parent 8699bc82d3
commit 75f05062fb
8 changed files with 233 additions and 56 deletions

View File

@ -6,9 +6,11 @@
from time import sleep, perf_counter from time import sleep, perf_counter
from .chromium_driver import BrowserDriver, ChromiumDriver from .chromium_driver import BrowserDriver, ChromiumDriver
from .._commons.tools import stop_process_on_port from .._commons.tools import stop_process_on_port, raise_error
from .._units.download_manager import DownloadManager from .._units.download_manager import DownloadManager
__ERROR__ = 'error'
class Browser(object): class Browser(object):
BROWSERS = {} BROWSERS = {}
@ -88,7 +90,8 @@ class Browser(object):
:param cmd_args: 参数 :param cmd_args: 参数
:return: 执行的结果 :return: 执行的结果
""" """
return self._driver.run(cmd, **cmd_args) r = self._driver.run(cmd, **cmd_args)
return r if __ERROR__ not in r else raise_error(r)
@property @property
def driver(self): def driver(self):

View File

@ -13,17 +13,18 @@ def is_loc(text):
'text^', 'text$', 'xpath:', 'xpath=', 'x:', 'x=', 'css:', 'css=', 'c:', 'c=')) 'text^', 'text$', 'xpath:', 'xpath=', 'x:', 'x=', 'css:', 'css=', 'c:', 'c='))
def get_loc(loc, translate_css=False): def get_loc(loc, translate_css=False, css_mode=False):
"""接收本库定位语法或selenium定位元组转换为标准定位元组可翻译css selector为xpath """接收本库定位语法或selenium定位元组转换为标准定位元组可翻译css selector为xpath
:param loc: 本库定位语法或selenium定位元组 :param loc: 本库定位语法或selenium定位元组
:param translate_css: 是否翻译css selector为xpath :param translate_css: 是否翻译css selector为xpath
:param css_mode: 是否尽量用css selector方式
:return: DrissionPage定位元组 :return: DrissionPage定位元组
""" """
if isinstance(loc, tuple): if isinstance(loc, tuple):
loc = translate_loc(loc) loc = translate_css_loc(loc) if css_mode else translate_loc(loc)
elif isinstance(loc, str): elif isinstance(loc, str):
loc = str_to_loc(loc) loc = str_to_css_loc(loc) if css_mode else str_to_loc(loc)
else: else:
raise TypeError('loc参数只能是tuple或str。') raise TypeError('loc参数只能是tuple或str。')
@ -127,6 +128,100 @@ def str_to_loc(loc):
return loc_by, loc_str return loc_by, loc_str
def str_to_css_loc(loc):
"""处理元素查找语句
:param loc: 查找语法字符串
:return: 匹配符元组
"""
return str_to_loc(loc)
loc_by = 'css selector'
if loc.startswith('.'):
if loc.startswith(('.=', '.:', '.^', '.$')):
loc = loc.replace('.', '@class', 1)
else:
loc = loc.replace('.', '@class=', 1)
elif loc.startswith('#'):
if loc.startswith(('#=', '#:', '#^', '#$')):
loc = loc.replace('#', '@id', 1)
else:
loc = loc.replace('#', '@id=', 1)
elif loc.startswith(('t:', 't=')):
loc = f'tag:{loc[2:]}'
elif loc.startswith(('tx:', 'tx=', 'tx^', 'tx$')):
loc = f'text{loc[2:]}'
# ------------------------------------------------------------------
# 多属性查找
if loc.startswith('@@') and loc != '@@':
loc_str = _make_multi_xpath_str('*', loc)
elif loc.startswith('@|') and loc != '@|':
loc_str = _make_multi_xpath_str('*', loc, False)
# 单属性查找
elif loc.startswith('@') and loc != '@':
loc_str = _make_single_xpath_str('*', loc)
# 根据tag name查找
elif loc.startswith(('tag:', 'tag=')) and loc not in ('tag:', 'tag='):
at_ind = loc.find('@')
if at_ind == -1:
loc_str = loc[4:]
else:
if loc[at_ind:].startswith('@@'):
loc_str = _make_multi_xpath_str(loc[4:at_ind], loc[at_ind:])
elif loc[at_ind:].startswith('@|'):
loc_str = _make_multi_xpath_str(loc[4:at_ind], loc[at_ind:], False)
else:
loc_str = _make_single_xpath_str(loc[4:at_ind], loc[at_ind:])
# 根据文本查找
elif loc.startswith('text='):
loc_by = 'xpath'
loc_str = f'//*[text()={_make_search_str(loc[5:])}]'
elif loc.startswith('text:') and loc != 'text:':
loc_by = 'xpath'
loc_str = f'//*/text()[contains(., {_make_search_str(loc[5:])})]/..'
elif loc.startswith('text^') and loc != 'text^':
loc_by = 'xpath'
loc_str = f'//*/text()[starts-with(., {_make_search_str(loc[5:])})]/..'
elif loc.startswith('text$') and loc != 'text$':
loc_by = 'xpath'
loc_str = f'//*/text()[substring(., string-length(.) - string-length({_make_search_str(loc[5:])}) +1) = ' \
f'{_make_search_str(loc[5:])}]/..'
# 用xpath查找
elif loc.startswith(('xpath:', 'xpath=')) and loc not in ('xpath:', 'xpath='):
loc_by = 'xpath'
loc_str = loc[6:]
elif loc.startswith(('x:', 'x=')) and loc not in ('x:', 'x='):
loc_by = 'xpath'
loc_str = loc[2:]
# 用css selector查找
elif loc.startswith(('css:', 'css=')) and loc not in ('css:', 'css='):
loc_str = loc[4:]
elif loc.startswith(('c:', 'c=')) and loc not in ('c:', 'c='):
loc_str = loc[2:]
# 根据文本模糊查找
elif loc:
loc_by = 'xpath'
loc_str = f'//*/text()[contains(., {_make_search_str(loc)})]/..'
else:
loc_str = '*'
return loc_by, loc_str
def _make_single_xpath_str(tag: str, text: str) -> str: def _make_single_xpath_str(tag: str, text: str) -> str:
"""生成xpath语句 """生成xpath语句
:param tag: 标签名 :param tag: 标签名
@ -298,3 +393,56 @@ def translate_loc(loc):
raise ValueError('无法识别的定位符。') raise ValueError('无法识别的定位符。')
return loc_by, loc_str return loc_by, loc_str
def translate_css_loc(loc):
"""把By类型的loc元组转换为css selector或xpath类型的
:param loc: By类型的loc元组
:return: css selector或xpath类型的loc元组
"""
if len(loc) != 2:
raise ValueError('定位符长度必须为2。')
loc_by = By.CSS_SELECTOR
loc_0 = loc[0].lower()
if loc_0 == By.XPATH:
loc_by = By.XPATH
loc_str = loc[1]
elif loc_0 == By.CSS_SELECTOR:
loc_by = loc_0
loc_str = loc[1]
elif loc_0 == By.ID:
loc_str = f'#{css_trans(loc[1])}'
elif loc_0 == By.CLASS_NAME:
loc_str = f'.{css_trans(loc[1])}'
elif loc_0 == By.PARTIAL_LINK_TEXT:
loc_by = By.XPATH
loc_str = f'//a[text()="{css_trans(loc[1])}"]'
elif loc_0 == By.NAME:
loc_str = f'*[@name={css_trans(loc[1])}]'
elif loc_0 == By.TAG_NAME:
loc_str = loc[1]
elif loc_0 == By.PARTIAL_LINK_TEXT:
loc_by = By.XPATH
loc_str = f'//a[contains(text(),"{loc[1]}")]'
else:
raise ValueError('无法识别的定位符。')
if loc_by == By.CSS_SELECTOR:
pass
return loc_by, loc_str
def css_trans(txt):
c = ('!', '"', '#', '$', '%', '&', '\'', '(', ')', '*', '+', ',', '-', '.', '/', ':', ';', '<', '=', '>', '?', '@',
'[', '\\', ']', '^', '`', ',', '{', '|', '}', '~', ' ')
return ''.join([fr'\{i}' if i in c else i for i in txt])

View File

@ -9,10 +9,16 @@ from typing import Union
def is_loc(text: str) -> bool: ... def is_loc(text: str) -> bool: ...
def get_loc(loc: Union[tuple, str], translate_css: bool = False) -> tuple: ... def get_loc(loc: Union[tuple, str], translate_css: bool = False, css_mode: bool = False) -> tuple: ...
def str_to_loc(loc: str) -> tuple: ... def str_to_loc(loc: str) -> tuple: ...
def translate_loc(loc: tuple) -> tuple: ... def translate_loc(loc: tuple) -> tuple: ...
def translate_css_loc(loc: tuple) -> tuple: ...
def css_trans(txt: str) -> str: ...

View File

@ -3,8 +3,8 @@
@Author : g1879 @Author : g1879
@Contact : g1879@qq.com @Contact : g1879@qq.com
""" """
from platform import system
from pathlib import Path from pathlib import Path
from platform import system
from re import search, sub from re import search, sub
from shutil import rmtree from shutil import rmtree
from time import perf_counter, sleep from time import perf_counter, sleep
@ -12,6 +12,7 @@ from time import perf_counter, sleep
from psutil import process_iter, AccessDenied, NoSuchProcess, ZombieProcess from psutil import process_iter, AccessDenied, NoSuchProcess, ZombieProcess
from .._configs.options_manage import OptionsManager from .._configs.options_manage import OptionsManager
from ..errors import ContextLostError, ElementLostError, CDPError, PageClosedError, NoRectError, AlertExistsError
def get_usable_path(path, is_file=True, parents=True): def get_usable_path(path, is_file=True, parents=True):
@ -250,3 +251,29 @@ def configs_to_here(save_name=None):
om = OptionsManager('default') om = OptionsManager('default')
save_name = f'{save_name}.ini' if save_name is not None else 'dp_configs.ini' save_name = f'{save_name}.ini' if save_name is not None else 'dp_configs.ini'
om.save(save_name) om.save(save_name)
def raise_error(r):
"""抛出error对应报错
:param r: 包含error的dict
:return: None
"""
error = r['error']
if error in ('Cannot find context with specified id', 'Inspected target navigated or closed'):
raise ContextLostError
elif error in ('Could not find node with given id', 'Could not find object with given id',
'No node with given id found', 'Node with given id does not belong to the document',
'No node found for given backend id'):
raise ElementLostError
elif error == 'tab closed':
raise PageClosedError
elif error == 'timeout':
raise TimeoutError
elif error == 'alert exists.':
raise AlertExistsError
elif error in ('Node does not have a layout object', 'Could not compute box model.'):
raise NoRectError
elif r['type'] == 'call_method_error':
raise CDPError(f'\n错误:{r["error"]}\nmethod{r["method"]}\nargs{r["args"]}')
else:
raise RuntimeError(r)

View File

@ -42,3 +42,6 @@ def stop_process_on_port(port: Union[int, str]) -> None: ...
def configs_to_here(file_name: Union[Path, str] = None) -> None: ... def configs_to_here(file_name: Union[Path, str] = None) -> None: ...
def raise_error(r: dict) -> None: ...

View File

@ -10,9 +10,9 @@ from time import perf_counter, sleep
from .none_element import NoneElement from .none_element import NoneElement
from .session_element import make_session_ele from .session_element import make_session_ele
from .._base.base import DrissionElement, BaseElement from .._base.base import DrissionElement, BaseElement
from .._commons.settings import Settings
from .._commons.keys import keys_to_typing, keyDescriptionForString, keyDefinitions from .._commons.keys import keys_to_typing, keyDescriptionForString, keyDefinitions
from .._commons.locator import get_loc from .._commons.locator import get_loc
from .._commons.settings import Settings
from .._commons.tools import get_usable_path from .._commons.tools import get_usable_path
from .._commons.web import make_absolute_link, get_ele_txt, format_html, is_js_func, offset_scroll from .._commons.web import make_absolute_link, get_ele_txt, format_html, is_js_func, offset_scroll
from .._units.clicker import Clicker from .._units.clicker import Clicker
@ -711,7 +711,7 @@ class ChromiumElement(DrissionElement):
elif mode == 'css': elif mode == 'css':
txt1 = '' txt1 = ''
txt3 = '' txt3 = ''
txt4 = '''path = '>' + ":nth-child(" + nth + ")" + path;''' txt4 = '''path = '>' + el.tagName + ":nth-child(" + nth + ")" + path;'''
txt5 = '''return path.substr(1);''' txt5 = '''return path.substr(1);'''
else: else:
@ -736,7 +736,7 @@ class ChromiumElement(DrissionElement):
return e(this);} return e(this);}
''' '''
t = self.run_js(js) t = self.run_js(js)
return f':root{t}' if mode == 'css' else t return f'{t}' if mode == 'css' else t
def _set_file_input(self, files): def _set_file_input(self, files):
"""对上传控件写入路径 """对上传控件写入路径
@ -1022,31 +1022,42 @@ class ChromiumShadowRoot(BaseElement):
:param raise_err: 找不到元素是是否抛出异常为None时根据全局设置 :param raise_err: 找不到元素是是否抛出异常为None时根据全局设置
:return: ChromiumElement对象或其组成的列表 :return: ChromiumElement对象或其组成的列表
""" """
loc = get_loc(loc_or_str) loc = get_loc(loc_or_str, css_mode=False)
if loc[0] == 'css selector' and str(loc[1]).startswith(':root'): if loc[0] == 'css selector' and str(loc[1]).startswith(':root'):
loc = loc[0], loc[1][5:] loc = loc[0], loc[1][5:]
result = None
timeout = timeout if timeout is not None else self.page.timeout timeout = timeout if timeout is not None else self.page.timeout
end_time = perf_counter() + timeout end_time = perf_counter() + timeout
eles = make_session_ele(self.html).eles(loc) while not result and perf_counter() <= end_time:
while not eles and perf_counter() <= end_time: if loc[0] == 'css selector':
eles = make_session_ele(self.html).eles(loc)
if not eles:
return NoneElement() if single else eles
css_paths = [i.css_path[47:] for i in eles]
if single: if single:
node_id = self.page.run_cdp('DOM.querySelector', nodeId=self._node_id, selector=css_paths[0])['nodeId'] nod_id = self.page.run_cdp('DOM.querySelector', nodeId=self._node_id, selector=loc[1])['nodeId']
return make_chromium_ele(self.page, node_id=node_id) if node_id else NoneElement() result = make_chromium_ele(self.page, node_id=nod_id) if nod_id else NoneElement()
else: else:
results = [] nod_ids = self.page.run_cdp('DOM.querySelectorAll', nodeId=self._node_id, selector=loc[1])['nodeId']
for i in css_paths: result = [make_chromium_ele(self.page, node_id=n) for n in nod_ids]
else:
eles = make_session_ele(self.html).eles(loc)
if not eles:
result = NoneElement() if single else eles
continue
css = [i.css_path[61:] for i in eles]
if single:
node_id = self.page.run_cdp('DOM.querySelector', nodeId=self._node_id, selector=css[0])['nodeId']
result = make_chromium_ele(self.page, node_id=node_id) if node_id else NoneElement()
else:
result = []
for i in css:
node_id = self.page.run_cdp('DOM.querySelector', nodeId=self._node_id, selector=i)['nodeId'] node_id = self.page.run_cdp('DOM.querySelector', nodeId=self._node_id, selector=i)['nodeId']
if node_id: if node_id:
results.append(make_chromium_ele(self.page, node_id=node_id)) result.append(make_chromium_ele(self.page, node_id=node_id))
return results
return result
def _get_node_id(self, obj_id): def _get_node_id(self, obj_id):
"""返回元素node id""" """返回元素node id"""

View File

@ -267,14 +267,14 @@ class SessionElement(DrissionElement):
while ele: while ele:
if mode == 'css': if mode == 'css':
brothers = len(ele.eles(f'xpath:./preceding-sibling::*')) brothers = len(ele.eles(f'xpath:./preceding-sibling::*'))
path_str = f'>:nth-child({brothers + 1}){path_str}' path_str = f'>{ele.tag}:nth-child({brothers + 1}){path_str}'
else: else:
brothers = len(ele.eles(f'xpath:./preceding-sibling::{ele.tag}')) brothers = len(ele.eles(f'xpath:./preceding-sibling::{ele.tag}'))
path_str = f'/{ele.tag}[{brothers + 1}]{path_str}' if brothers > 0 else f'/{ele.tag}{path_str}' path_str = f'/{ele.tag}[{brothers + 1}]{path_str}' if brothers > 0 else f'/{ele.tag}{path_str}'
ele = ele.parent() ele = ele.parent()
return f':root{path_str[1:]}' if mode == 'css' else path_str return f'{path_str[1:]}' if mode == 'css' else path_str
def make_session_ele(html_or_ele, loc=None, single=True): def make_session_ele(html_or_ele, loc=None, single=True):

View File

@ -10,9 +10,9 @@ from threading import Thread
from time import perf_counter, sleep from time import perf_counter, sleep
from .._base.base import BasePage from .._base.base import BasePage
from .._commons.settings import Settings
from .._commons.locator import get_loc, is_loc from .._commons.locator import get_loc, is_loc
from .._commons.tools import get_usable_path from .._commons.settings import Settings
from .._commons.tools import get_usable_path, raise_error
from .._commons.web import location_in_viewport from .._commons.web import location_in_viewport
from .._elements.chromium_element import ChromiumElement, run_js, make_chromium_ele from .._elements.chromium_element import ChromiumElement, run_js, make_chromium_ele
from .._elements.none_element import NoneElement from .._elements.none_element import NoneElement
@ -25,8 +25,8 @@ from .._units.scroller import PageScroller
from .._units.setter import ChromiumBaseSetter from .._units.setter import ChromiumBaseSetter
from .._units.states import PageStates from .._units.states import PageStates
from .._units.waiter import BaseWaiter from .._units.waiter import BaseWaiter
from ..errors import (ContextLostError, ElementLostError, CDPError, PageClosedError, NoRectError, AlertExistsError, from ..errors import (ContextLostError, ElementLostError, CDPError, PageClosedError, GetDocumentError,
GetDocumentError, ElementNotFoundError) ElementNotFoundError)
__ERROR__ = 'error' __ERROR__ = 'error'
@ -441,28 +441,7 @@ class ChromiumBase(BasePage):
:return: 执行的结果 :return: 执行的结果
""" """
r = self.driver.run(cmd, **cmd_args) r = self.driver.run(cmd, **cmd_args)
if __ERROR__ not in r: return r if __ERROR__ not in r else raise_error(r)
return r
error = r[__ERROR__]
if error in ('Cannot find context with specified id', 'Inspected target navigated or closed'):
raise ContextLostError
elif error in ('Could not find node with given id', 'Could not find object with given id',
'No node with given id found', 'Node with given id does not belong to the document',
'No node found for given backend id'):
raise ElementLostError
elif error == 'tab closed':
raise PageClosedError
elif error == 'timeout':
raise TimeoutError
elif error == 'alert exists.':
raise AlertExistsError
elif error in ('Node does not have a layout object', 'Could not compute box model.'):
raise NoRectError
elif r['type'] == 'call_method_error':
raise CDPError(f'\n错误:{r["error"]}\nmethod{r["method"]}\nargs{r["args"]}')
else:
raise RuntimeError(r)
def run_cdp_loaded(self, cmd, **cmd_args): def run_cdp_loaded(self, cmd, **cmd_args):
"""执行Chrome DevTools Protocol语句执行前等待页面加载完毕 """执行Chrome DevTools Protocol语句执行前等待页面加载完毕