继续开发新版,未完成

This commit is contained in:
g1879 2022-10-30 23:31:53 +08:00
parent c447448e8f
commit 00e7680858
5 changed files with 424 additions and 189 deletions

View File

@ -1,15 +1,22 @@
# -*- coding:utf-8 -*- # -*- coding:utf-8 -*-
# 问题跨iframe查找元素可能出现同名元素如何解决 """
# 须用DOM.documentUpdated检测元素有效性 @Author : g1879
@Contact : g1879@qq.com
@File : chrome_element.py
"""
from typing import Union, Tuple, List from typing import Union, Tuple, List
from time import perf_counter
from .session_element import make_session_ele
from .base import DrissionElement from .base import DrissionElement
from .common import make_absolute_link, get_loc from .common import make_absolute_link, get_loc, get_ele_txt, format_html
class ChromeElement(DrissionElement): class ChromeElement(DrissionElement):
def __init__(self, page, node_id: str = None, obj_id: str = None): def __init__(self, page, node_id: str = None, obj_id: str = None):
super().__init__(page) super().__init__(page)
self._select = None
self._scroll = None
if not node_id and not obj_id: if not node_id and not obj_id:
raise TypeError('node_id或obj_id必须传入一个。') raise TypeError('node_id或obj_id必须传入一个。')
@ -20,15 +27,32 @@ class ChromeElement(DrissionElement):
self._node_id = self._get_node_id(obj_id) self._node_id = self._get_node_id(obj_id)
self._obj_id = obj_id self._obj_id = obj_id
def __repr__(self) -> str:
attrs = [f"{attr}='{self.attrs[attr]}'" for attr in self.attrs]
return f'<ChromeElement {self.tag} {" ".join(attrs)}>'
@property
def obj_id(self) -> str:
return self._obj_id
@property
def node_id(self) -> str:
return self._node_id
@property @property
def html(self) -> str: def html(self) -> str:
"""返回元素outerHTML文本""" """返回元素outerHTML文本"""
return self.page.driver.DOM.getOuterHTML(nodeId=self._node_id)['outerHTML'] return self.page.driver.DOM.getOuterHTML(nodeId=self._node_id)['outerHTML']
@property
def tag(self) -> str:
return self.page.driver.DOM.describeNode(nodeId=self._node_id)['node']['localName']
@property @property
def inner_html(self) -> str: def inner_html(self) -> str:
"""返回元素innerHTML文本""" """返回元素innerHTML文本"""
return self.page.driver.Runtime.callFunctionOn('function(){this.innerHTML;}') return self.page.driver.Runtime.callFunctionOn(functionDeclaration='function(){return this.innerHTML;}',
objectId=self._obj_id)['result']['value']
@property @property
def attrs(self) -> dict: def attrs(self) -> dict:
@ -36,6 +60,52 @@ class ChromeElement(DrissionElement):
attrs_len = len(attrs) attrs_len = len(attrs)
return {attrs[i]: attrs[i + 1] for i in range(0, attrs_len, 2)} return {attrs[i]: attrs[i + 1] for i in range(0, attrs_len, 2)}
@property
def size(self) -> dict:
"""返回元素宽和高"""
model = self.page.driver.DOM.getBoxModel(nodeId=self._node_id)['model']
return {"height": model['height'], "width": model['width']}
@property
def client_location(self) -> dict:
"""返回元素左上角坐标"""
js = '''function(){
return this.getBoundingClientRect().left.toString()+" "+this.getBoundingClientRect().top.toString();}'''
xy = self.page.driver.Runtime.callFunctionOn(functionDeclaration=js, objectId=self._obj_id)['result']['value']
x, y = xy.split(' ')
return {'x': int(x.split('.')[0]), 'y': int(y.split('.')[0])}
@property
def location(self) -> dict:
"""返回元素左上角坐标"""
js = '''function(){
function getElementPagePosition(element){
var actualLeft = element.offsetLeft;
var current = element.offsetParent;
while (current !== null){
actualLeft += current.offsetLeft;
current = current.offsetParent;
}
var actualTop = element.offsetTop;
var current = element.offsetParent;
while (current !== null){
actualTop += (current.offsetTop+current.clientTop);
current = current.offsetParent;
}
return actualLeft.toString() +' '+actualTop.toString();
}
return getElementPagePosition(this);}'''
xy = self.page.driver.Runtime.callFunctionOn(functionDeclaration=js, objectId=self._obj_id)['result']['value']
x, y = xy.split(' ')
return {'x': int(x.split('.')[0]), 'y': int(y.split('.')[0])}
@property
def scroll(self) -> 'ChromeScroll':
"""用于滚动滚动条的对象"""
if self._scroll is None:
self._scroll = ChromeScroll(self)
return self._scroll
def ele(self, def ele(self,
loc_or_str: Union[Tuple[str, str], str], loc_or_str: Union[Tuple[str, str], str],
timeout: float = None) -> Union['ChromeElement', str, None]: timeout: float = None) -> Union['ChromeElement', str, None]:
@ -102,35 +172,114 @@ class ChromeElement(DrissionElement):
else: else:
return attrs[attr] return attrs[attr]
def click(self, by_js: bool = True): def prop(self, prop: str) -> Union[str, int, None]:
"""获取property属性值 \n
:param prop: 属性名
:return: 属性值文本
"""
p = self.page.driver.Runtime.getProperties(objectId=self._obj_id)['result']
for i in p:
if i['name'] == prop:
if 'value' not in i or 'value' not in i['value']:
return None
return format_html(i['value']['value'])
def set_prop(self, prop: str, value: str) -> None:
"""设置元素property属性 \n
:param prop: 属性名
:param value: 属性值
:return: 是否设置成功
"""
value = value.replace("'", "\\'")
r = self.page.driver.Runtime.callFunctionOn(functionDeclaration=f"function(){{this.{prop}='{value}';}}",
objectId=self._obj_id)
if 'exceptionDetails' in r:
raise SyntaxError(r['result']['description'])
def click(self, by_js: bool = False) -> None:
"""点击元素 \n
尝试点击直到超时若都失败就改用js点击 \n
:param by_js: 是否用js点击为True时直接用js点击为False时重试失败也不会改用js
:return: 是否点击成功
"""
if by_js: if by_js:
js = 'function(){this.click();}' js = 'function(){this.click();}'
self.page.driver.Runtime.callFunctionOn(functionDeclaration=js, objectId=self._obj_id) self.page.driver.Runtime.callFunctionOn(functionDeclaration=js, objectId=self._obj_id)
return
def _get_obj_id(self, node_id): self.page.driver.DOM.scrollIntoViewIfNeeded(nodeId=self._node_id)
xy = self.client_location
size = self.size
x = xy['x'] + size['width'] // 2
y = xy['y'] + size['height'] // 2
self.page.driver.Input.dispatchMouseEvent(type='mousePressed', x=x, y=y, button='left', clickCount=1)
self.page.driver.Input.dispatchMouseEvent(type='mouseReleased', x=x, y=y, button='left')
# js = """function(){const event=new MouseEvent('click',{view:window, bubbles:true, cancelable:true});
# this.dispatchEvent(event);}"""
# self.page.driver.Runtime.callFunctionOn(functionDeclaration=js, objectId=self._obj_id)
def _get_obj_id(self, node_id) -> str:
return self.page.driver.DOM.resolveNode(nodeId=node_id)['object']['objectId'] return self.page.driver.DOM.resolveNode(nodeId=node_id)['object']['objectId']
def _get_node_id(self, obj_id): def _get_node_id(self, obj_id) -> str:
return self.page.driver.DOM.requestNode(objectId=obj_id)['nodeId'] return self.page.driver.DOM.requestNode(objectId=obj_id)['nodeId']
@property
def tag(self) -> str:
return self.page.driver.DOM.describeNode(nodeId=self._node_id)['node']['localName']
@property @property
def is_valid(self): def is_valid(self):
return True return True
@property @property
def text(self): def text(self) -> str:
return """返回元素内所有文本"""
return get_ele_txt(make_session_ele(self.html))
@property @property
def raw_text(self): def raw_text(self):
return return
def _get_ele_path(self, mode): def _get_ele_path(self, mode) -> str:
return '' """返获取css路径或xpath路径"""
if mode == 'xpath':
txt1 = 'var tag = el.nodeName.toLowerCase();'
# txt2 = '''return '//' + tag + '[@id="' + el.id + '"]' + path;'''
txt3 = ''' && sib.nodeName.toLowerCase()==tag'''
txt4 = '''
if(nth>1){path = '/' + tag + '[' + nth + ']' + path;}
else{path = '/' + tag + path;}'''
txt5 = '''return path;'''
elif mode == 'css':
txt1 = ''
# txt2 = '''return '#' + el.id + path;'''
txt3 = ''
txt4 = '''path = '>' + ":nth-child(" + nth + ")" + path;'''
txt5 = '''return path.substr(1);'''
else:
raise ValueError(f"mode参数只能是'xpath''css',现在是:'{mode}'")
js = '''function(){
function e(el) {
if (!(el instanceof Element)) return;
var path = '';
while (el.nodeType === Node.ELEMENT_NODE) {
''' + txt1 + '''
var sib = el, nth = 0;
while (sib) {
if(sib.nodeType === Node.ELEMENT_NODE''' + txt3 + '''){nth += 1;}
sib = sib.previousSibling;
}
''' + txt4 + '''
el = el.parentNode;
}
''' + txt5 + '''
}
return e(this);}
'''
t = self.page.driver.Runtime.callFunctionOn(functionDeclaration=js, objectId=self._obj_id)['result']['value']
return f':root{t}' if mode == 'css' else t
def make_chrome_ele(ele: ChromeElement, def make_chrome_ele(ele: ChromeElement,
@ -161,60 +310,74 @@ def make_chrome_ele(ele: ChromeElement,
# ---------------执行查找----------------- # ---------------执行查找-----------------
if loc[0] == 'xpath': if loc[0] == 'xpath':
type_txt = '9' if single else '7' return _find_by_xpath(ele, loc[1], single, timeout)
node_txt = 'this.contentDocument' if ele.tag in ('iframe', 'frame') else 'this'
js = _make_js(loc[1], type_txt, node_txt) else:
print(js) return _find_by_css(ele, loc[1], single, timeout)
r = ele.page.driver.Runtime.callFunctionOn(functionDeclaration=js, objectId=ele._obj_id,)
# print(r)
if r['result']['type'] == 'string': def _find_by_xpath(ele: ChromeElement, xpath: str, single: bool, timeout: float):
type_txt = '9' if single else '7'
node_txt = 'this.contentDocument' if ele.tag in ('iframe', 'frame') else 'this'
js = _make_js(xpath, type_txt, node_txt)
r = ele.page.driver.Runtime.callFunctionOn(functionDeclaration=js, objectId=ele.obj_id)
if r['result']['type'] == 'string':
return r['result']['value']
if 'exceptionDetails' in r:
if 'The result is not a node set' in r['result']['description']:
js = _make_js(xpath, '1', node_txt)
r = ele.page.driver.Runtime.callFunctionOn(functionDeclaration=js, objectId=ele.obj_id)
return r['result']['value'] return r['result']['value']
else:
raise SyntaxError(f'查询语句错误:\n{r}')
t1 = perf_counter()
while (r['result']['subtype'] == 'null'
or r['result']['description'] == 'NodeList(0)') and perf_counter() - t1 < timeout:
r = ele.page.driver.Runtime.callFunctionOn(functionDeclaration=js, objectId=ele.obj_id)
if single:
if r['result']['subtype'] == 'null': if r['result']['subtype'] == 'null':
return None if single else [] return None
if r['result']['className'] == 'TypeError': else:
if 'The result is not a node set' in r['result']['description']: return ChromeElement(ele.page, obj_id=r['result']['objectId'])
js = _make_js(loc[1], '1', node_txt)
r = ele.page.driver.Runtime.callFunctionOn(functionDeclaration=js, objectId=ele._obj_id)
return r['result']['value']
else: else:
raise RuntimeError(r['result']['description']) if r['result']['description'] == 'NodeList(0)':
return []
else:
r = ele.page.driver.Runtime.getProperties(objectId=r['result']['objectId'], ownProperties=True)['result']
return [ChromeElement(ele.page, obj_id=i['value']['objectId'])
if i['value']['type'] == 'object' else i['value']['value']
for i in r[:-1]]
elif 'objectId' in r['result']:
if not single:
r = ele.page.driver.Runtime.getProperties(objectId=r['result']['objectId'])['result']
result = []
for i in r:
if not i['enumerable']:
break
result.append(ChromeElement(ele.page, obj_id=i['value']['objectId']))
r = result
return r def _find_by_css(ele: ChromeElement, selector: str, single: bool, timeout: float):
selector = selector.replace('"', r'\"')
find_all = '' if single else 'All'
js = f'function(){{return this.querySelector{find_all}("{selector}");}}'
r = ele.page.driver.Runtime.callFunctionOn(functionDeclaration=js, objectId=ele.obj_id)
if 'exceptionDetails' in r:
raise SyntaxError(f'查询语句错误:\n{r}')
# try: t1 = perf_counter()
# # 使用xpath查找 while (r['result']['subtype'] == 'null'
# if loc[0] == 'xpath': or r['result']['description'] == 'NodeList(0)') and perf_counter() - t1 < timeout:
# js = _make_js() r = ele.page.driver.Runtime.callFunctionOn(functionDeclaration=js, objectId=ele.obj_id)
# r = ele.page.driver.Runtime.callFunctionOn(functionDeclaration=js,
# objectId=self._obj_id)['result'].get('objectId', None) if single:
# return r if not r else _ele(self.page, obj_id=r) if r['result']['subtype'] == 'null':
# return None
# return wait.until(ElementsByXpath(page, loc[1], single, timeout)) else:
# return ChromeElement(ele.page, obj_id=r['result']['objectId'])
# # 使用css selector查找
# else: else:
# if single: if r['result']['description'] == 'NodeList(0)':
# return DriverElement(wait.until(ec.presence_of_element_located(loc)), page) return []
# else: else:
# eles = wait.until(ec.presence_of_all_elements_located(loc)) r = ele.page.driver.Runtime.getProperties(objectId=r['result']['objectId'], ownProperties=True)['result']
# return [DriverElement(ele, page) for ele in eles] return [ChromeElement(ele.page, obj_id=i['value']['objectId']) for i in r]
#
# except TimeoutException:
# return [] if not single else None
#
# except InvalidElementStateException:
# raise ValueError(f'无效的查找语句:{loc}')
def _make_js(xpath: str, type_txt: str, node_txt: str): def _make_js(xpath: str, type_txt: str, node_txt: str):
@ -251,99 +414,86 @@ else{a.push(e.snapshotItem(i));}}"""
return js return js
# class ElementsByXpath(object):
# """用js通过xpath获取元素、节点或属性与WebDriverWait配合使用""" class ChromeScroll(object):
# """用于滚动的对象"""
# def __init__(self, page, xpath: str = None, single: bool = False, timeout: float = 10):
# """ def __init__(self, page_or_ele):
# :param page: DrissionPage对象 """
# :param xpath: xpath文本 :param page_or_ele: ChromePage或ChromeElement
# :param single: True则返回第一个False则返回全部 """
# :param timeout: 超时时间 if isinstance(page_or_ele, ChromeElement):
# """ self.t1 = self.t2 = 'this'
# self.page = page self.obj_id = page_or_ele.obj_id
# self.xpath = xpath self.page = page_or_ele.page
# self.single = single else:
# self.timeout = timeout self.t1 = 'window'
# self.t2 = 'document.documentElement'
# def __call__(self, ele_or_driver: Union[RemoteWebDriver, WebElement]) \ self.obj_id = None
# -> Union[str, DriverElement, None, List[str or DriverElement]]: self.page = page_or_ele
#
# def get_nodes(node=None, xpath_txt=None, type_txt='7'): def _run_script(self, js: str):
# """用js通过xpath获取元素、节点或属性 js = js.format(self.t1, self.t2, self.t2)
# :param node: 'document' 或 元素对象 if self.obj_id:
# :param xpath_txt: xpath语句 js = f'function(){{{js}}}'
# :param type_txt: resultType,参考 https://developer.mozilla.org/zh-CN/docs/Web/API/Document/evaluate self.page.driver.Runtime.callFunctionOn(functionDeclaration=js, objectId=self.obj_id)
# :return: 元素对象或属性、文本字符串 else:
# """ self.page.driver.Runtime.evaluate(expression=js)
# node_txt = 'document' if not node or node == 'document' else 'arguments[0]'
# for_txt = '' def to_top(self) -> None:
# """滚动到顶端,水平位置不变"""
# # 获取第一个元素、节点或属性 self._run_script('{}.scrollTo({}.scrollLeft,0);')
# if type_txt == '9':
# return_txt = ''' def to_bottom(self) -> None:
# if(e.singleNodeValue.constructor.name=="Text"){return e.singleNodeValue.data;} """滚动到底端,水平位置不变"""
# else if(e.singleNodeValue.constructor.name=="Attr"){return e.singleNodeValue.nodeValue;} self._run_script('{}.scrollTo({}.scrollLeft,{}.scrollHeight);')
# else if(e.singleNodeValue.constructor.name=="Comment"){return e.singleNodeValue.nodeValue;}
# else{return e.singleNodeValue;} def to_half(self) -> None:
# ''' """滚动到垂直中间位置,水平位置不变"""
# self._run_script('{}.scrollTo({}.scrollLeft,{}.scrollHeight/2);')
# # 按顺序获取所有元素、节点或属性
# elif type_txt == '7': def to_rightmost(self) -> None:
# for_txt = """ """滚动到最右边,垂直位置不变"""
# var a=new Array(); self._run_script('{}.scrollTo({}.scrollWidth,{}.scrollTop);')
# for(var i = 0; i <e.snapshotLength ; i++){
# if(e.snapshotItem(i).constructor.name=="Text"){a.push(e.snapshotItem(i).data);} def to_leftmost(self) -> None:
# else if(e.snapshotItem(i).constructor.name=="Attr"){a.push(e.snapshotItem(i).nodeValue);} """滚动到最左边,垂直位置不变"""
# else if(e.snapshotItem(i).constructor.name=="Comment"){a.push(e.snapshotItem(i).nodeValue);} self._run_script('{}.scrollTo(0,{}.scrollTop);')
# else{a.push(e.snapshotItem(i));}
# } def to_location(self, x: int, y: int) -> None:
# """ """滚动到指定位置 \n
# return_txt = 'return a;' :param x: 水平距离
# :param y: 垂直距离
# elif type_txt == '2': :return: None
# return_txt = 'return e.stringValue;' """
# elif type_txt == '1': self._run_script(f'{{}}.scrollTo({x},{y});')
# return_txt = 'return e.numberValue;'
# else: def up(self, pixel: int = 300) -> None:
# return_txt = 'return e.singleNodeValue;' """向上滚动若干像素,水平位置不变 \n
# :param pixel: 滚动的像素
# js = """ :return: None
# var e=document.evaluate(arguments[1], """ + node_txt + """, null, """ + type_txt + """,null); """
# """ + for_txt + """ pixel = -pixel
# """ + return_txt + """ self._run_script(f'{{}}.scrollBy(0,{pixel});')
# """
# return driver.execute_script(js, node, xpath_txt) def down(self, pixel: int = 300) -> None:
# """向下滚动若干像素,水平位置不变 \n
# if isinstance(ele_or_driver, RemoteWebDriver): :param pixel: 滚动的像素
# driver, the_node = ele_or_driver, 'document' :return: None
# else: """
# driver, the_node = ele_or_driver.parent, ele_or_driver self._run_script(f'{{}}.scrollBy(0,{pixel});')
#
# # 把lxml元素对象包装成DriverElement对象并按需要返回第一个或全部 def left(self, pixel: int = 300) -> None:
# if self.single: """向左滚动若干像素,垂直位置不变 \n
# try: :param pixel: 滚动的像素
# e = get_nodes(the_node, xpath_txt=self.xpath, type_txt='9') :return: None
# """
# if isinstance(e, WebElement): pixel = -pixel
# return DriverElement(e, self.page) self._run_script(f'{{}}.scrollBy({pixel},0);')
# elif isinstance(e, str):
# return format_html(e) def right(self, pixel: int = 300) -> None:
# else: """向右滚动若干像素,垂直位置不变 \n
# return e :param pixel: 滚动的像素
# :return: None
# # 找不到目标时 """
# except JavascriptException as err: self._run_script(f'{{}}.scrollBy({pixel},0);')
# if 'The result is not a node set' in err.msg:
# try:
# return get_nodes(the_node, xpath_txt=self.xpath, type_txt='1')
# except JavascriptException:
# return None
# else:
# return None
#
# else: # 返回全部
# return ([DriverElement(x, self.page) if isinstance(x, WebElement)
# else format_html(x)
# for x in get_nodes(the_node, xpath_txt=self.xpath)
# if x != '\n'])

View File

@ -1,15 +1,17 @@
# -*- coding:utf-8 -*- # -*- coding:utf-8 -*-
from os import sep
from pathlib import Path
from time import perf_counter, sleep from time import perf_counter, sleep
from typing import Union, Tuple from typing import Union, Tuple, List
from pychrome import Tab from pychrome import Tab
from requests import get as requests_get from requests import get as requests_get
from json import loads from json import loads
from .base import BasePage from .base import BasePage
from .common import get_loc from .common import get_loc, get_usable_path
from .drission import connect_chrome from .drission import connect_chrome
from .chrome_element import ChromeElement from .chrome_element import ChromeElement, ChromeScroll
class ChromePage(BasePage): class ChromePage(BasePage):
@ -23,18 +25,21 @@ class ChromePage(BasePage):
connect_chrome(path, self.debugger_address) connect_chrome(path, self.debugger_address)
tab_handle = self.tab_handles[0] if not tab_handle else tab_handle tab_handle = self.tab_handles[0] if not tab_handle else tab_handle
self._connect_debugger(tab_handle) self._connect_debugger(tab_handle)
self._scroll = None
def _connect_debugger(self, tab_handle: str): def __call__(self, loc_or_str: Union[Tuple[str, str], str, 'ChromeElement'],
self.driver = Tab(id=tab_handle, type='page', timeout: float = None) -> Union['ChromeElement', str, None]:
webSocketDebuggerUrl=f'ws://{self.debugger_address}/devtools/page/{tab_handle}') """在内部查找元素 \n
self.driver.start() ele = page('@id=ele_id') \n
self.driver.DOM.enable() :param loc_or_str: 元素的定位信息可以是loc元组或查询字符串
self.driver.DOM.getDocument() :param timeout: 超时时间
:return: DriverElement对象或属性文本
"""
return self.ele(loc_or_str, timeout)
@property @property
def url(self) -> str: def url(self) -> str:
"""返回当前页面url""" """返回当前页面url"""
# todo: 是否有更好的方法?
json = loads(requests_get(f'http://{self.debugger_address}/json').text) json = loads(requests_get(f'http://{self.debugger_address}/json').text)
return [i['url'] for i in json if i['id'] == self.driver.id][0] return [i['url'] for i in json if i['id'] == self.driver.id][0]
@ -49,6 +54,14 @@ class ChromePage(BasePage):
"""当返回内容是json格式时返回对应的字典""" """当返回内容是json格式时返回对应的字典"""
return loads(self('t:pre').text) return loads(self('t:pre').text)
@property
def tabs_count(self) -> int:
"""返回标签页数量"""
try:
return len(self.tab_handles)
except Exception:
return 0
@property @property
def tab_handles(self) -> list: def tab_handles(self) -> list:
"""返回所有标签页id""" """返回所有标签页id"""
@ -71,8 +84,18 @@ class ChromePage(BasePage):
return self.driver.Runtime.evaluate(expression='document.readyState;')['result']['value'] return self.driver.Runtime.evaluate(expression='document.readyState;')['result']['value']
@property @property
def active_ele(self): def scroll(self) -> ChromeScroll:
pass """用于滚动滚动条的对象"""
if self._scroll is None:
self._scroll = ChromeScroll(self)
return self._scroll
@property
def size(self) -> dict:
"""返回页面总长宽"""
w = self.driver.Runtime.evaluate(expression='document.body.scrollWidth;')['result']['value']
h = self.driver.Runtime.evaluate(expression='document.body.scrollHeight;')['result']['value']
return {'height': h, 'width': w}
def get(self, def get(self,
url: str, url: str,
@ -97,25 +120,33 @@ class ChromePage(BasePage):
self.driver.DOM.getDocument() self.driver.DOM.getDocument()
return self._url_available return self._url_available
def get_cookies(self, as_dict: bool = False): def get_cookies(self, as_dict: bool = False) -> Union[list, dict]:
return self.driver.Network.getCookies() cookies = self.driver.Network.getCookies()['cookies']
if as_dict:
return {cookie['name']: cookie['value'] for cookie in cookies}
else:
return cookies
def ele(self, loc_or_ele: Union[Tuple[str, str], str, ChromeElement], timeout: float = None): def ele(self,
loc_or_ele: Union[Tuple[str, str], str, ChromeElement],
timeout: float = None) -> Union[ChromeElement, str, None]:
return self._ele(loc_or_ele, timeout=timeout) return self._ele(loc_or_ele, timeout=timeout)
def eles(self, loc_or_ele: Union[Tuple[str, str], str, ChromeElement], timeout: float = None): def eles(self,
loc_or_ele: Union[Tuple[str, str], str, ChromeElement],
timeout: float = None) -> List[Union[ChromeElement, str]]:
return self._ele(loc_or_ele, timeout=timeout, single=False) return self._ele(loc_or_ele, timeout=timeout, single=False)
def s_ele(self): # def s_ele(self):
pass # pass
#
def s_eles(self): # def s_eles(self):
pass # pass
def _ele(self, def _ele(self,
loc_or_ele: Union[Tuple[str, str], str, ChromeElement], loc_or_ele: Union[Tuple[str, str], str, ChromeElement],
timeout: float = None, timeout: float = None,
single: bool = True): single: bool = True) -> Union[ChromeElement, str, None, List[Union[ChromeElement, str]]]:
if isinstance(loc_or_ele, (str, tuple)): if isinstance(loc_or_ele, (str, tuple)):
loc = get_loc(loc_or_ele)[1] loc = get_loc(loc_or_ele)[1]
elif isinstance(loc_or_ele, ChromeElement): elif isinstance(loc_or_ele, ChromeElement):
@ -143,6 +174,49 @@ class ChromePage(BasePage):
else: else:
return [ChromeElement(self, node_id=i) for i in nodeIds['nodeIds']] return [ChromeElement(self, node_id=i) for i in nodeIds['nodeIds']]
def screenshot(self, path: str = None,
filename: str = None,
as_bytes: bool = False,
full_page: bool = True) -> Union[str, bytes]:
"""截取页面可见范围截图 \n
:param path: 保存路径
:param filename: 图片文件名不传入时以页面title命名
:param as_bytes: 是否已字节形式返回图片为True时上面两个参数失效
:param full_page: 是否整页截图
:return: 图片完整路径或字节文本
"""
from base64 import b64decode
hw = self.size
if full_page:
vp = {'x': 0, 'y': 0, 'width': hw['width'], 'height': hw['height'], 'scale': 1}
png = self.driver.Page.captureScreenshot(captureBeyondViewport=True, clip=vp)['data']
else:
png = self.driver.Page.captureScreenshot(captureBeyondViewport=True)['data']
png = b64decode(png)
if as_bytes:
return png
from DataRecorder import ByteRecorder
name = filename or self.title
if not name.lower().endswith('.png'):
name = f'{name}.png'
path = Path(path or '.').absolute()
path.mkdir(parents=True, exist_ok=True)
img_path = str(get_usable_path(f'{path}{sep}{name}'))
b = ByteRecorder(img_path)
b.add_data(png)
b.record()
return img_path
def scroll_to_see(self, loc_or_ele: Union[str, tuple, ChromeElement]) -> None:
"""滚动页面直到元素可见 \n
:param loc_or_ele: 元素的定位信息可以是loc元组或查询字符串详见ele函数注释
:return: None
"""
node_id = self.ele(loc_or_ele).node_id
self.driver.DOM.scrollIntoViewIfNeeded(nodeId=node_id)
def refresh(self, ignore_cache: bool = False) -> None: def refresh(self, ignore_cache: bool = False) -> None:
"""刷新当前页面 \n """刷新当前页面 \n
:param ignore_cache: 是否忽略缓存 :param ignore_cache: 是否忽略缓存
@ -309,6 +383,20 @@ class ChromePage(BasePage):
if cookies: if cookies:
self.driver.Network.clearBrowserCookies() self.driver.Network.clearBrowserCookies()
def check_page(self):
pass
# @property
# def active_ele(self):
# pass
def _connect_debugger(self, tab_handle: str):
self.driver = Tab(id=tab_handle, type='page',
webSocketDebuggerUrl=f'ws://{self.debugger_address}/devtools/page/{tab_handle}')
self.driver.start()
self.driver.DOM.enable()
self.driver.DOM.getDocument()
def _d_connect(self, def _d_connect(self,
to_url: str, to_url: str,
times: int = 0, times: int = 0,
@ -356,9 +444,6 @@ class ChromePage(BasePage):
return is_ok return is_ok
def check_page(self):
pass
def _get_tabs(handles: list, num_or_handles: Union[int, str, list, tuple, set]) -> set: def _get_tabs(handles: list, num_or_handles: Union[int, str, list, tuple, set]) -> set:
"""返回指定标签页handle组成的set \n """返回指定标签页handle组成的set \n

View File

@ -1,11 +1,11 @@
[paths] [paths]
chromedriver_path = chromedriver_path = D:\coding\Chrome92\chromedriver.exe
tmp_path = tmp_path =
[chrome_options] [chrome_options]
debugger_address = 127.0.0.1:9222 debugger_address = 127.0.0.1:9222
binary_location = binary_location = D:\coding\Chrome92\chrome.exe
arguments = ['--no-sandbox', '--disable-gpu', '--ignore-certificate-errors', '--disable-infobars', '--disable-popup-blocking'] arguments = ['--no-sandbox', '--disable-gpu', '--ignore-certificate-errors', '--disable-infobars', '--disable-popup-blocking', '--user-data-dir=D:\\coding\\Chrome92\\user_data']
extensions = [] extensions = []
experimental_options = {'prefs': {'profile.default_content_settings.popups': 0, 'profile.default_content_setting_values': {'notifications': 2}, 'plugins.plugins_list': [{'enabled': False, 'name': 'Chrome PDF Viewer'}]}, 'useAutomationExtension': False, 'excludeSwitches': ['enable-automation']} experimental_options = {'prefs': {'profile.default_content_settings.popups': 0, 'profile.default_content_setting_values': {'notifications': 2}, 'plugins.plugins_list': [{'enabled': False, 'name': 'Chrome PDF Viewer'}]}, 'useAutomationExtension': False, 'excludeSwitches': ['enable-automation']}
timeouts = {'implicit': 10.0, 'pageLoad': 30.0, 'script': 30.0} timeouts = {'implicit': 10.0, 'pageLoad': 30.0, 'script': 30.0}

View File

@ -6,7 +6,6 @@
""" """
from re import match, DOTALL from re import match, DOTALL
from typing import Union, List, Tuple from typing import Union, List, Tuple
from urllib.parse import urlparse, urljoin, urlunparse
from lxml.etree import tostring from lxml.etree import tostring
from lxml.html import HtmlElement, fromstring from lxml.html import HtmlElement, fromstring

View File

@ -6,3 +6,4 @@ cssselect
DownloadKit DownloadKit
FlowViewer FlowViewer
pychrome pychrome
DataRecorder