继续开发新版,未完成

This commit is contained in:
g1879 2022-10-28 19:09:34 +08:00
parent 35f19aa174
commit c447448e8f
8 changed files with 992 additions and 618 deletions

View File

@ -46,14 +46,9 @@ class BaseParser(object):
class BaseElement(BaseParser): class BaseElement(BaseParser):
"""各元素类的基类""" """各元素类的基类"""
def __init__(self, ele: Union[WebElement, HtmlElement], page=None): def __init__(self, page=None):
self._inner_ele = ele
self.page = page self.page = page
@property
def inner_ele(self) -> Union[WebElement, HtmlElement]:
return self._inner_ele
# ----------------以下属性或方法由后代实现---------------- # ----------------以下属性或方法由后代实现----------------
@property @property
def tag(self): def tag(self):

View File

@ -1,13 +1,17 @@
# -*- coding:utf-8 -*- # -*- coding:utf-8 -*-
# 问题跨iframe查找元素可能出现同名元素如何解决 # 问题跨iframe查找元素可能出现同名元素如何解决
# 须用DOM.documentUpdated检测元素有效性 # 须用DOM.documentUpdated检测元素有效性
from typing import Union, Tuple, List
from .base import DrissionElement
from .common import make_absolute_link, get_loc
class ChromeElement(object): class ChromeElement(DrissionElement):
def __init__(self, page, node_id: str = None, obj_id: str = None): def __init__(self, page, node_id: str = None, obj_id: str = None):
self.page = page super().__init__(page)
if not node_id and not obj_id: if not node_id and not obj_id:
raise TypeError('node_id或obj_id必须传入一个') raise TypeError('node_id或obj_id必须传入一个')
if node_id: if node_id:
self._node_id = node_id self._node_id = node_id
@ -17,18 +21,86 @@ class ChromeElement(object):
self._obj_id = obj_id self._obj_id = obj_id
@property @property
def html(self): def html(self) -> str:
"""返回元素outerHTML文本"""
return self.page.driver.DOM.getOuterHTML(nodeId=self._node_id)['outerHTML'] return self.page.driver.DOM.getOuterHTML(nodeId=self._node_id)['outerHTML']
def ele(self, xpath: str): @property
# todo: 引号记得转码 def inner_html(self) -> str:
js = f'''function(){{ """返回元素innerHTML文本"""
frame=this.contentDocument; return self.page.driver.Runtime.callFunctionOn('function(){this.innerHTML;}')
return document.evaluate("{xpath}", frame, null, 9, null).singleNodeValue;
}}''' @property
r = self.page.driver.Runtime.callFunctionOn(functionDeclaration=js, def attrs(self) -> dict:
objectId=self._obj_id)['result'].get('objectId', None) attrs = self.page.driver.DOM.getAttributes(nodeId=self._node_id)['attributes']
return r if not r else _ele(self.page, obj_id=r) attrs_len = len(attrs)
return {attrs[i]: attrs[i + 1] for i in range(0, attrs_len, 2)}
def ele(self,
loc_or_str: Union[Tuple[str, str], str],
timeout: float = None) -> Union['ChromeElement', str, None]:
"""返回当前元素下级符合条件的第一个元素、属性或节点文本 \n
:param loc_or_str: 元素的定位信息可以是loc元组或查询字符串
:param timeout: 查找元素超时时间默认与元素所在页面等待时间一致
:return: DriverElement对象或属性文本
"""
return self._ele(loc_or_str, timeout)
def eles(self,
loc_or_str: Union[Tuple[str, str], str],
timeout: float = None) -> List[Union['ChromeElement', str]]:
"""返回当前元素下级所有符合条件的子元素、属性或节点文本 \n
:param loc_or_str: 元素的定位信息可以是loc元组或查询字符串
:param timeout: 查找元素超时时间默认与元素所在页面等待时间一致
:return: DriverElement对象或属性文本组成的列表
"""
return self._ele(loc_or_str, timeout=timeout, single=False)
def _ele(self,
loc_or_str: Union[Tuple[str, str], str],
timeout: float = None,
single: bool = True) -> Union['ChromeElement', str, None, List[Union['ChromeElement', str]]]:
"""返回当前元素下级符合条件的子元素、属性或节点文本,默认返回第一个 \n
:param loc_or_str: 元素的定位信息可以是loc元组或查询字符串
:param timeout: 查找元素超时时间
:param single: True则返回第一个False则返回全部
:return: DriverElement对象
"""
return make_chrome_ele(self, loc_or_str, single, timeout)
def attr(self, attr: str) -> Union[str, None]:
"""返回attribute属性值 \n
:param attr: 属性名
:return: 属性值文本没有该属性返回None
"""
# 获取href属性时返回绝对url
attrs = self.attrs
if attr == 'href':
link = attrs['href']
# 若为链接为None、js或邮件直接返回
if not link or link.lower().startswith(('javascript:', 'mailto:')):
return link
else: # 其它情况直接返回绝对url
return make_absolute_link(link, self.page)
elif attr == 'src':
return make_absolute_link(attrs['src'], self.page)
elif attr == 'text':
return self.text
elif attr == 'innerText':
return self.raw_text
elif attr in ('html', 'outerHTML'):
return self.html
elif attr == 'innerHTML':
return self.inner_html
else:
return attrs[attr]
def click(self, by_js: bool = True): def click(self, by_js: bool = True):
if by_js: if by_js:
@ -41,6 +113,237 @@ class ChromeElement(object):
def _get_node_id(self, obj_id): def _get_node_id(self, obj_id):
return self.page.driver.DOM.requestNode(objectId=obj_id)['nodeId'] return self.page.driver.DOM.requestNode(objectId=obj_id)['nodeId']
@property
def tag(self) -> str:
return self.page.driver.DOM.describeNode(nodeId=self._node_id)['node']['localName']
def _ele(page, node_id=None, obj_id=None) -> ChromeElement: @property
return ChromeElement(page=page, node_id=node_id, obj_id=obj_id) def is_valid(self):
return True
@property
def text(self):
return
@property
def raw_text(self):
return
def _get_ele_path(self, mode):
return ''
def make_chrome_ele(ele: ChromeElement,
loc: Union[str, Tuple[str, str]],
single: bool = True,
timeout: float = None) -> Union[ChromeElement, str, None, List[Union[ChromeElement, str]]]:
"""在chrome元素中查找 \n
:param ele: ChromeElement对象
:param loc: 元素定位元组
:param single: True则返回第一个False则返回全部
:param timeout: 查找元素超时时间
:return: 返回DriverElement元素或它们组成的列表
"""
# ---------------处理定位符---------------
if isinstance(loc, (str, tuple)):
loc = get_loc(loc)
else:
raise ValueError("定位符必须为str或长度为2的tuple对象。")
loc_str = loc[1]
if loc[0] == 'xpath' and loc[1].lstrip().startswith('/'):
loc_str = f'.{loc_str}'
elif loc[0] == 'css selector' and loc[1].lstrip().startswith('>'):
loc_str = f'{ele.css_path}{loc[1]}'
loc = loc[0], loc_str
timeout = timeout if timeout is not None else ele.page.timeout
# ---------------执行查找-----------------
if loc[0] == 'xpath':
type_txt = '9' if single else '7'
node_txt = 'this.contentDocument' if ele.tag in ('iframe', 'frame') else 'this'
js = _make_js(loc[1], type_txt, node_txt)
print(js)
r = ele.page.driver.Runtime.callFunctionOn(functionDeclaration=js, objectId=ele._obj_id,)
# print(r)
if r['result']['type'] == 'string':
return r['result']['value']
if r['result']['subtype'] == 'null':
return None if single else []
if r['result']['className'] == 'TypeError':
if 'The result is not a node set' in r['result']['description']:
js = _make_js(loc[1], '1', node_txt)
r = ele.page.driver.Runtime.callFunctionOn(functionDeclaration=js, objectId=ele._obj_id)
return r['result']['value']
else:
raise RuntimeError(r['result']['description'])
elif 'objectId' in r['result']:
if not single:
r = ele.page.driver.Runtime.getProperties(objectId=r['result']['objectId'])['result']
result = []
for i in r:
if not i['enumerable']:
break
result.append(ChromeElement(ele.page, obj_id=i['value']['objectId']))
r = result
return r
# try:
# # 使用xpath查找
# if loc[0] == 'xpath':
# js = _make_js()
# r = ele.page.driver.Runtime.callFunctionOn(functionDeclaration=js,
# objectId=self._obj_id)['result'].get('objectId', None)
# return r if not r else _ele(self.page, obj_id=r)
#
# return wait.until(ElementsByXpath(page, loc[1], single, timeout))
#
# # 使用css selector查找
# else:
# if single:
# return DriverElement(wait.until(ec.presence_of_element_located(loc)), page)
# else:
# eles = wait.until(ec.presence_of_all_elements_located(loc))
# return [DriverElement(ele, page) for ele in eles]
#
# except TimeoutException:
# return [] if not single else None
#
# except InvalidElementStateException:
# raise ValueError(f'无效的查找语句:{loc}')
def _make_js(xpath: str, type_txt: str, node_txt: str):
for_txt = ''
# 获取第一个元素、节点或属性
if type_txt == '9':
return_txt = '''
if(e.singleNodeValue==null){return null;}
else if(e.singleNodeValue.constructor.name=="Text"){return e.singleNodeValue.data;}
else if(e.singleNodeValue.constructor.name=="Attr"){return e.singleNodeValue.nodeValue;}
else if(e.singleNodeValue.constructor.name=="Comment"){return e.singleNodeValue.nodeValue;}
else{return e.singleNodeValue;}'''
# 按顺序获取所有元素、节点或属性
elif type_txt == '7':
for_txt = """
var a=new Array();
for(var i = 0; i <e.snapshotLength ; i++){
if(e.snapshotItem(i).constructor.name=="Text"){a.push(e.snapshotItem(i).data);}
else if(e.snapshotItem(i).constructor.name=="Attr"){a.push(e.snapshotItem(i).nodeValue);}
else if(e.snapshotItem(i).constructor.name=="Comment"){a.push(e.snapshotItem(i).nodeValue);}
else{a.push(e.snapshotItem(i));}}"""
return_txt = 'return a;'
elif type_txt == '2':
return_txt = 'return e.stringValue;'
elif type_txt == '1':
return_txt = 'return e.numberValue;'
else:
return_txt = 'return e.singleNodeValue;'
js = f'function(){{var e=document.evaluate(\'{xpath}\',{node_txt},null,{type_txt},null);\n{for_txt}\n{return_txt}}}'
return js
# class ElementsByXpath(object):
# """用js通过xpath获取元素、节点或属性与WebDriverWait配合使用"""
#
# def __init__(self, page, xpath: str = None, single: bool = False, timeout: float = 10):
# """
# :param page: DrissionPage对象
# :param xpath: xpath文本
# :param single: True则返回第一个False则返回全部
# :param timeout: 超时时间
# """
# self.page = page
# self.xpath = xpath
# self.single = single
# self.timeout = timeout
#
# def __call__(self, ele_or_driver: Union[RemoteWebDriver, WebElement]) \
# -> Union[str, DriverElement, None, List[str or DriverElement]]:
#
# def get_nodes(node=None, xpath_txt=None, type_txt='7'):
# """用js通过xpath获取元素、节点或属性
# :param node: 'document' 或 元素对象
# :param xpath_txt: xpath语句
# :param type_txt: resultType,参考 https://developer.mozilla.org/zh-CN/docs/Web/API/Document/evaluate
# :return: 元素对象或属性、文本字符串
# """
# node_txt = 'document' if not node or node == 'document' else 'arguments[0]'
# for_txt = ''
#
# # 获取第一个元素、节点或属性
# if type_txt == '9':
# return_txt = '''
# if(e.singleNodeValue.constructor.name=="Text"){return e.singleNodeValue.data;}
# else if(e.singleNodeValue.constructor.name=="Attr"){return e.singleNodeValue.nodeValue;}
# else if(e.singleNodeValue.constructor.name=="Comment"){return e.singleNodeValue.nodeValue;}
# else{return e.singleNodeValue;}
# '''
#
# # 按顺序获取所有元素、节点或属性
# elif type_txt == '7':
# for_txt = """
# var a=new Array();
# for(var i = 0; i <e.snapshotLength ; i++){
# if(e.snapshotItem(i).constructor.name=="Text"){a.push(e.snapshotItem(i).data);}
# else if(e.snapshotItem(i).constructor.name=="Attr"){a.push(e.snapshotItem(i).nodeValue);}
# else if(e.snapshotItem(i).constructor.name=="Comment"){a.push(e.snapshotItem(i).nodeValue);}
# else{a.push(e.snapshotItem(i));}
# }
# """
# return_txt = 'return a;'
#
# elif type_txt == '2':
# return_txt = 'return e.stringValue;'
# elif type_txt == '1':
# return_txt = 'return e.numberValue;'
# else:
# return_txt = 'return e.singleNodeValue;'
#
# js = """
# var e=document.evaluate(arguments[1], """ + node_txt + """, null, """ + type_txt + """,null);
# """ + for_txt + """
# """ + return_txt + """
# """
# return driver.execute_script(js, node, xpath_txt)
#
# if isinstance(ele_or_driver, RemoteWebDriver):
# driver, the_node = ele_or_driver, 'document'
# else:
# driver, the_node = ele_or_driver.parent, ele_or_driver
#
# # 把lxml元素对象包装成DriverElement对象并按需要返回第一个或全部
# if self.single:
# try:
# e = get_nodes(the_node, xpath_txt=self.xpath, type_txt='9')
#
# if isinstance(e, WebElement):
# return DriverElement(e, self.page)
# elif isinstance(e, str):
# return format_html(e)
# else:
# return e
#
# # 找不到目标时
# except JavascriptException as err:
# if 'The result is not a node set' in err.msg:
# try:
# return get_nodes(the_node, xpath_txt=self.xpath, type_txt='1')
# except JavascriptException:
# return None
# else:
# return None
#
# else: # 返回全部
# return ([DriverElement(x, self.page) if isinstance(x, WebElement)
# else format_html(x)
# for x in get_nodes(the_node, xpath_txt=self.xpath)
# if x != '\n'])

View File

@ -70,6 +70,10 @@ class ChromePage(BasePage):
"""返回当前页面加载状态,""" """返回当前页面加载状态,"""
return self.driver.Runtime.evaluate(expression='document.readyState;')['result']['value'] return self.driver.Runtime.evaluate(expression='document.readyState;')['result']['value']
@property
def active_ele(self):
pass
def get(self, def get(self,
url: str, url: str,
show_errmsg: bool = False, show_errmsg: bool = False,
@ -90,6 +94,7 @@ class ChromePage(BasePage):
interval=interval, interval=interval,
show_errmsg=show_errmsg, show_errmsg=show_errmsg,
timeout=timeout) timeout=timeout)
self.driver.DOM.getDocument()
return self._url_available return self._url_available
def get_cookies(self, as_dict: bool = False): def get_cookies(self, as_dict: bool = False):
@ -101,6 +106,12 @@ class ChromePage(BasePage):
def eles(self, loc_or_ele: Union[Tuple[str, str], str, ChromeElement], timeout: float = None): def eles(self, loc_or_ele: Union[Tuple[str, str], str, ChromeElement], timeout: float = None):
return self._ele(loc_or_ele, timeout=timeout, single=False) return self._ele(loc_or_ele, timeout=timeout, single=False)
def s_ele(self):
pass
def s_eles(self):
pass
def _ele(self, def _ele(self,
loc_or_ele: Union[Tuple[str, str], str, ChromeElement], loc_or_ele: Union[Tuple[str, str], str, ChromeElement],
timeout: float = None, timeout: float = None,
@ -164,6 +175,47 @@ class ChromePage(BasePage):
""" """
return self.driver.call_method(cmd, **cmd_args) return self.driver.call_method(cmd, **cmd_args)
def set_user_agent(self, ua: str) -> None:
"""为当前tab设置user agent只在当前tab有效 \n
:param ua: user agent字符串
:return: None
"""
self.driver.Network.setUserAgentOverride(userAgent=ua)
def get_session_storage(self, item: str = None) -> Union[str, dict, None]:
"""获取sessionStorage信息不设置item则获取全部 \n
:param item: 要获取的项不设置则返回全部
:return: sessionStorage一个或所有项内容
"""
js = f'sessionStorage.getItem("{item}");' if item else 'sessionStorage;'
return self.driver.Runtime.evaluate(js)
def get_local_storage(self, item: str = None) -> Union[str, dict, None]:
"""获取localStorage信息不设置item则获取全部 \n
:param item: 要获取的项目不设置则返回全部
:return: localStorage一个或所有项内容
"""
js = f'localStorage.getItem("{item}");' if item else 'localStorage;'
return self.driver.Runtime.evaluate(js)
def set_session_storage(self, item: str, value: Union[str, bool]) -> None:
"""设置或删除某项sessionStorage信息 \n
:param item: 要设置的项
:param value: 项的值设置为False时删除该项
:return: None
"""
s = f'sessionStorage.removeItem("{item}");' if item is False else f'sessionStorage.setItem("{item}","{value}");'
return self.driver.Runtime.evaluate(s)
def set_local_storage(self, item: str, value: Union[str, bool]) -> None:
"""设置或删除某项localStorage信息 \n
:param item: 要设置的项
:param value: 项的值设置为False时删除该项
:return: None
"""
s = f'localStorage.removeItem("{item}");' if item is False else f'localStorage.setItem("{item}","{value}");'
return self.driver.Runtime.evaluate(s)
def create_tab(self, url: str = None) -> None: def create_tab(self, url: str = None) -> None:
"""新建并定位到一个标签页,该标签页在最后面 \n """新建并定位到一个标签页,该标签页在最后面 \n
:param url: 新标签页跳转到的网址 :param url: 新标签页跳转到的网址
@ -194,6 +246,10 @@ class ChromePage(BasePage):
if activate: if activate:
requests_get(f'http://{self.debugger_address}/json/activate/{tab}') requests_get(f'http://{self.debugger_address}/json/activate/{tab}')
def to_front(self) -> None:
"""激活当前标签页使其处于最前面"""
requests_get(f'http://{self.debugger_address}/json/activate/{self.current_tab_handle}')
def close_tabs(self, num_or_handles: Union[int, str, list, tuple, set] = None, others: bool = False) -> None: def close_tabs(self, num_or_handles: Union[int, str, list, tuple, set] = None, others: bool = False) -> None:
"""关闭传入的标签页,默认关闭当前页。可传入多个 \n """关闭传入的标签页,默认关闭当前页。可传入多个 \n
注意当程序使用的是接管的浏览器获取到的 handle 顺序和视觉效果不一致不能按序号关闭 \n 注意当程序使用的是接管的浏览器获取到的 handle 顺序和视觉效果不一致不能按序号关闭 \n

View File

@ -10,6 +10,7 @@ from re import split, search, sub
from shutil import rmtree from shutil import rmtree
from typing import Union from typing import Union
from zipfile import ZipFile from zipfile import ZipFile
from urllib.parse import urlparse, urljoin, urlunparse
def get_ele_txt(e) -> str: def get_ele_txt(e) -> str:
@ -451,3 +452,28 @@ def get_long(txt) -> int:
""" """
txt_len = len(txt) txt_len = len(txt)
return int((len(txt.encode('utf-8')) - txt_len) / 2 + txt_len) return int((len(txt.encode('utf-8')) - txt_len) / 2 + txt_len)
def make_absolute_link(link, page=None) -> str:
"""获取绝对url
:param link: 超链接
:param page: 页面对象
:return: 绝对链接
"""
if not link:
return link
parsed = urlparse(link)._asdict()
# 是相对路径与页面url拼接并返回
if not parsed['netloc']:
return urljoin(page.url, link) if page else link
# 是绝对路径但缺少协议从页面url获取协议并修复
if not parsed['scheme'] and page:
parsed['scheme'] = urlparse(page.url).scheme
parsed = tuple(v for v in parsed.values())
return urlunparse(parsed)
# 绝对路径且不缺协议,直接返回
return link

File diff suppressed because it is too large Load Diff

View File

@ -30,9 +30,10 @@ class DriverElement(DrissionElement):
:param ele: 被包装的WebElement元素 :param ele: 被包装的WebElement元素
:param page: 元素所在页面 :param page: 元素所在页面
""" """
super().__init__(ele, page) super().__init__(page)
self._select = None self._select = None
self._scroll = None self._scroll = None
self._inner_ele = ele
def __repr__(self) -> str: def __repr__(self) -> str:
attrs = [f"{attr}='{self.attrs[attr]}'" for attr in self.attrs] attrs = [f"{attr}='{self.attrs[attr]}'" for attr in self.attrs]
@ -50,6 +51,10 @@ class DriverElement(DrissionElement):
return self.ele(loc_or_str, timeout) return self.ele(loc_or_str, timeout)
# -----------------共有属性和方法------------------- # -----------------共有属性和方法-------------------
@property
def inner_ele(self) -> WebElement:
return self._inner_ele
@property @property
def tag(self) -> str: def tag(self) -> str:
"""返回元素类型""" """返回元素类型"""

View File

@ -12,7 +12,7 @@ from lxml.etree import tostring
from lxml.html import HtmlElement, fromstring from lxml.html import HtmlElement, fromstring
from .base import DrissionElement, BasePage, BaseElement from .base import DrissionElement, BasePage, BaseElement
from .common import get_ele_txt, get_loc from .common import get_ele_txt, get_loc, make_absolute_link
class SessionElement(DrissionElement): class SessionElement(DrissionElement):
@ -23,7 +23,12 @@ class SessionElement(DrissionElement):
:param ele: 被包装的HtmlElement元素 :param ele: 被包装的HtmlElement元素
:param page: 元素所在页面对象如果是从 html 文本生成的元素则为 None :param page: 元素所在页面对象如果是从 html 文本生成的元素则为 None
""" """
super().__init__(ele, page) super().__init__(page)
self._inner_ele = ele
@property
def inner_ele(self) -> HtmlElement:
return self._inner_ele
def __repr__(self) -> str: def __repr__(self) -> str:
attrs = [f"{attr}='{self.attrs[attr]}'" for attr in self.attrs] attrs = [f"{attr}='{self.attrs[attr]}'" for attr in self.attrs]
@ -180,10 +185,10 @@ class SessionElement(DrissionElement):
return link return link
else: # 其它情况直接返回绝对url else: # 其它情况直接返回绝对url
return self._make_absolute(link) return make_absolute_link(link, self.page)
elif attr == 'src': elif attr == 'src':
return self._make_absolute(self.inner_ele.get('src')) return make_absolute_link(self.inner_ele.get('src'), self.page)
elif attr == 'text': elif attr == 'text':
return self.text return self.text
@ -268,30 +273,6 @@ class SessionElement(DrissionElement):
return f':root{path_str[1:]}' if mode == 'css' else path_str return f':root{path_str[1:]}' if mode == 'css' else path_str
# ----------------session独有方法-----------------------
def _make_absolute(self, link) -> str:
"""获取绝对url
:param link: 超链接
:return: 绝对链接
"""
if not link:
return link
parsed = urlparse(link)._asdict()
# 是相对路径与页面url拼接并返回
if not parsed['netloc']:
return urljoin(self.page.url, link) if self.page else link
# 是绝对路径但缺少协议从页面url获取协议并修复
if not parsed['scheme'] and self.page:
parsed['scheme'] = urlparse(self.page.url).scheme
parsed = tuple(v for v in parsed.values())
return urlunparse(parsed)
# 绝对路径且不缺协议,直接返回
return link
def make_session_ele(html_or_ele: Union[str, BaseElement, BasePage], def make_session_ele(html_or_ele: Union[str, BaseElement, BasePage],
loc: Union[str, Tuple[str, str]] = None, loc: Union[str, Tuple[str, str]] = None,

View File

@ -19,8 +19,13 @@ class ShadowRootElement(BaseElement):
"""ShadowRootElement是用于处理ShadowRoot的类使用方法和DriverElement基本一致""" """ShadowRootElement是用于处理ShadowRoot的类使用方法和DriverElement基本一致"""
def __init__(self, inner_ele: WebElement, parent_ele: DriverElement): def __init__(self, inner_ele: WebElement, parent_ele: DriverElement):
super().__init__(inner_ele, parent_ele.page) super().__init__(parent_ele.page)
self.parent_ele = parent_ele self.parent_ele = parent_ele
self._inner_ele = inner_ele
@property
def inner_ele(self) -> WebElement:
return self._inner_ele
def __repr__(self) -> str: def __repr__(self) -> str:
return f'<ShadowRootElement in {self.parent_ele} >' return f'<ShadowRootElement in {self.parent_ele} >'