mirror of
https://gitee.com/g1879/DrissionPage.git
synced 2024-12-10 04:00:23 +08:00
Pre Merge pull request !10 from 熊猫同学/master
This commit is contained in:
commit
671fa6fafe
@ -96,7 +96,12 @@ def str_to_loc(loc: str) -> tuple:
|
|||||||
text:search_text - 文本含有search_text的元素 \n
|
text:search_text - 文本含有search_text的元素 \n
|
||||||
text=search_text - 文本等于search_text的元素 \n
|
text=search_text - 文本等于search_text的元素 \n
|
||||||
xpath://div[@class="ele_class"] - 用xpath查找 \n
|
xpath://div[@class="ele_class"] - 用xpath查找 \n
|
||||||
css:div.ele_class - 用css selector查找
|
css:div.ele_class - 用css selector查找 \n
|
||||||
|
xpath://div[@class="ele_class"] - 等同于 x://div[@class="ele_class"] \n
|
||||||
|
css:div.ele_class - 等同于 c:div.ele_class \n
|
||||||
|
tag:div - 等同于 t:div \n
|
||||||
|
text:search_text - 等同于 tx:search_text \n
|
||||||
|
text=search_text - 等同于 tx=search_text \n
|
||||||
"""
|
"""
|
||||||
loc_by = 'xpath'
|
loc_by = 'xpath'
|
||||||
|
|
||||||
@ -107,12 +112,18 @@ def str_to_loc(loc: str) -> tuple:
|
|||||||
else:
|
else:
|
||||||
loc = loc.replace('.', '@class=', 1)
|
loc = loc.replace('.', '@class=', 1)
|
||||||
|
|
||||||
if loc.startswith('#'):
|
elif loc.startswith('#'):
|
||||||
if loc.startswith(('#=', '#:',)):
|
if loc.startswith(('#=', '#:',)):
|
||||||
loc = loc.replace('#', '@id', 1)
|
loc = loc.replace('#', '@id', 1)
|
||||||
else:
|
else:
|
||||||
loc = loc.replace('#', '@id=', 1)
|
loc = loc.replace('#', '@id=', 1)
|
||||||
|
|
||||||
|
elif loc.startswith(('t:', 't=')):
|
||||||
|
loc = f'tag:{loc[2:]}'
|
||||||
|
|
||||||
|
elif loc.startswith(('tx:', 'tx=')):
|
||||||
|
loc = f'text{loc[2:]}'
|
||||||
|
|
||||||
# 根据属性查找
|
# 根据属性查找
|
||||||
if loc.startswith('@'):
|
if loc.startswith('@'):
|
||||||
r = re_SPLIT(r'([:=])', loc[1:], maxsplit=1)
|
r = re_SPLIT(r'([:=])', loc[1:], maxsplit=1)
|
||||||
@ -123,7 +134,7 @@ def str_to_loc(loc: str) -> tuple:
|
|||||||
loc_str = f'//*[@{loc[1:]}]'
|
loc_str = f'//*[@{loc[1:]}]'
|
||||||
|
|
||||||
# 根据tag name查找
|
# 根据tag name查找
|
||||||
elif loc.startswith(('tag=', 'tag:')):
|
elif loc.startswith(('tag:', 'tag=')):
|
||||||
if '@' not in loc[4:]:
|
if '@' not in loc[4:]:
|
||||||
loc_str = f'//*[name()="{loc[4:]}"]'
|
loc_str = f'//*[name()="{loc[4:]}"]'
|
||||||
else:
|
else:
|
||||||
@ -131,13 +142,13 @@ def str_to_loc(loc: str) -> tuple:
|
|||||||
r = re_SPLIT(r'([:=])', at_lst[1], maxsplit=1)
|
r = re_SPLIT(r'([:=])', at_lst[1], maxsplit=1)
|
||||||
if len(r) == 3:
|
if len(r) == 3:
|
||||||
mode = 'exact' if r[1] == '=' else 'fuzzy'
|
mode = 'exact' if r[1] == '=' else 'fuzzy'
|
||||||
arg_str = r[0] if r[0] == 'text()' else f'@{r[0]}'
|
arg_str = 'text()' if r[0] in ('text()', 'tx()') else f'@{r[0]}'
|
||||||
loc_str = _make_xpath_str(at_lst[0], arg_str, r[2], mode)
|
loc_str = _make_xpath_str(at_lst[0], arg_str, r[2], mode)
|
||||||
else:
|
else:
|
||||||
loc_str = f'//*[name()="{at_lst[0]}" and @{r[0]}]'
|
loc_str = f'//*[name()="{at_lst[0]}" and @{r[0]}]'
|
||||||
|
|
||||||
# 根据文本查找
|
# 根据文本查找
|
||||||
elif loc.startswith(('text=', 'text:')):
|
elif loc.startswith(('text:', 'text=')):
|
||||||
if len(loc) > 5:
|
if len(loc) > 5:
|
||||||
mode = 'exact' if loc[4] == '=' else 'fuzzy'
|
mode = 'exact' if loc[4] == '=' else 'fuzzy'
|
||||||
loc_str = _make_xpath_str('*', 'text()', loc[5:], mode)
|
loc_str = _make_xpath_str('*', 'text()', loc[5:], mode)
|
||||||
@ -145,13 +156,18 @@ def str_to_loc(loc: str) -> tuple:
|
|||||||
loc_str = '//*[not(text())]'
|
loc_str = '//*[not(text())]'
|
||||||
|
|
||||||
# 用xpath查找
|
# 用xpath查找
|
||||||
elif loc.startswith(('xpath=', 'xpath:')):
|
elif loc.startswith(('xpath:', 'xpath=')):
|
||||||
loc_str = loc[6:]
|
loc_str = loc[6:]
|
||||||
|
elif loc.startswith(('x:', 'x=')):
|
||||||
|
loc_str = loc[2:]
|
||||||
|
|
||||||
# 用css selector查找
|
# 用css selector查找
|
||||||
elif loc.startswith(('css=', 'css:')):
|
elif loc.startswith(('css:', 'css=')):
|
||||||
loc_by = 'css selector'
|
loc_by = 'css selector'
|
||||||
loc_str = loc[4:]
|
loc_str = loc[4:]
|
||||||
|
elif loc.startswith(('c:', 'c=')):
|
||||||
|
loc_by = 'css selector'
|
||||||
|
loc_str = loc[2:]
|
||||||
|
|
||||||
# 根据文本模糊查找
|
# 根据文本模糊查找
|
||||||
else:
|
else:
|
||||||
@ -177,14 +193,18 @@ def _make_xpath_str(tag: str, arg: str, val: str, mode: str = 'fuzzy') -> str:
|
|||||||
return f'//*[{tag_name}{arg}={_make_search_str(val)}]'
|
return f'//*[{tag_name}{arg}={_make_search_str(val)}]'
|
||||||
|
|
||||||
elif mode == 'fuzzy':
|
elif mode == 'fuzzy':
|
||||||
return f"//*[{tag_name}contains({arg},{_make_search_str(val)})]"
|
if arg == 'text()':
|
||||||
|
tag_name = '' if tag == '*' else f'{tag}/'
|
||||||
|
return f'//{tag_name}text()[contains(., {_make_search_str(val)})]/..'
|
||||||
|
else:
|
||||||
|
return f"//*[{tag_name}contains({arg},{_make_search_str(val)})]"
|
||||||
|
|
||||||
else:
|
else:
|
||||||
raise ValueError("Argument mode can only be 'exact' or 'fuzzy'.")
|
raise ValueError("Argument mode can only be 'exact' or 'fuzzy'.")
|
||||||
|
|
||||||
|
|
||||||
def _make_search_str(search_str: str) -> str:
|
def _make_search_str(search_str: str) -> str:
|
||||||
"""将"转义,不知何故不能直接用\来转义 \n
|
"""将"转义,不知何故不能直接用 \ 来转义 \n
|
||||||
:param search_str: 查询字符串
|
:param search_str: 查询字符串
|
||||||
:return: 把"转义后的字符串
|
:return: 把"转义后的字符串
|
||||||
"""
|
"""
|
||||||
@ -201,9 +221,15 @@ def _make_search_str(search_str: str) -> str:
|
|||||||
return search_str
|
return search_str
|
||||||
|
|
||||||
|
|
||||||
def format_html(text: str) -> str:
|
def format_html(text: str, trans: bool = True) -> str:
|
||||||
"""处理html编码字符"""
|
"""处理html编码字符"""
|
||||||
return unescape(text).replace('\xa0', ' ') if text else text
|
if not text:
|
||||||
|
return text
|
||||||
|
|
||||||
|
if trans:
|
||||||
|
text = unescape(text)
|
||||||
|
|
||||||
|
return text.replace('\xa0', ' ')
|
||||||
|
|
||||||
|
|
||||||
def translate_loc(loc: tuple) -> tuple:
|
def translate_loc(loc: tuple) -> tuple:
|
||||||
@ -291,3 +317,25 @@ def unzip(zip_path: str, to_path: str) -> Union[list, None]:
|
|||||||
|
|
||||||
with ZipFile(zip_path, 'r') as f:
|
with ZipFile(zip_path, 'r') as f:
|
||||||
return [f.extract(f.namelist()[0], path=to_path)]
|
return [f.extract(f.namelist()[0], path=to_path)]
|
||||||
|
|
||||||
|
|
||||||
|
def get_exe_path_from_port(port: Union[str, int]) -> Union[str, None]:
|
||||||
|
"""获取端口号第一条进程的可执行文件路径 \n
|
||||||
|
:param port: 端口号
|
||||||
|
:return: 可执行文件的绝对路径
|
||||||
|
"""
|
||||||
|
from os import popen
|
||||||
|
from time import perf_counter
|
||||||
|
process = popen(f'netstat -ano |findstr {port}').read().split('\n')[0]
|
||||||
|
t = perf_counter()
|
||||||
|
|
||||||
|
while not process and perf_counter() - t < 10:
|
||||||
|
process = popen(f'netstat -ano |findstr {port}').read().split('\n')[0]
|
||||||
|
|
||||||
|
processid = process[process.rfind(' ') + 1:]
|
||||||
|
|
||||||
|
if not processid:
|
||||||
|
return
|
||||||
|
else:
|
||||||
|
file_lst = popen(f'wmic process where processid={processid} get executablepath').read().split('\n')
|
||||||
|
return file_lst[2].strip() if len(file_lst) > 2 else None
|
||||||
|
@ -8,11 +8,10 @@
|
|||||||
from configparser import RawConfigParser, NoSectionError, NoOptionError
|
from configparser import RawConfigParser, NoSectionError, NoOptionError
|
||||||
from http.cookiejar import Cookie
|
from http.cookiejar import Cookie
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any, Union
|
|
||||||
|
|
||||||
from requests.cookies import RequestsCookieJar
|
from requests.cookies import RequestsCookieJar
|
||||||
from selenium import webdriver
|
from selenium import webdriver
|
||||||
from selenium.webdriver.chrome.options import Options
|
from selenium.webdriver.chrome.options import Options
|
||||||
|
from typing import Any, Union
|
||||||
|
|
||||||
|
|
||||||
class OptionsManager(object):
|
class OptionsManager(object):
|
||||||
@ -22,7 +21,7 @@ class OptionsManager(object):
|
|||||||
"""初始化,读取配置文件,如没有设置临时文件夹,则设置并新建 \n
|
"""初始化,读取配置文件,如没有设置临时文件夹,则设置并新建 \n
|
||||||
:param path: ini文件的路径,默认读取模块文件夹下的
|
:param path: ini文件的路径,默认读取模块文件夹下的
|
||||||
"""
|
"""
|
||||||
self.ini_path = path or str(Path(__file__).parent / 'configs.ini')
|
self.ini_path = str(Path(__file__).parent / 'configs.ini') if path == 'default' or path is None else path
|
||||||
self._conf = RawConfigParser()
|
self._conf = RawConfigParser()
|
||||||
self._conf.read(self.ini_path, encoding='utf-8')
|
self._conf.read(self.ini_path, encoding='utf-8')
|
||||||
|
|
||||||
@ -399,7 +398,7 @@ class SessionOptions(object):
|
|||||||
path = path / 'config.ini' if path.is_dir() else path
|
path = path / 'config.ini' if path.is_dir() else path
|
||||||
|
|
||||||
if path.exists():
|
if path.exists():
|
||||||
om = OptionsManager(path)
|
om = OptionsManager(str(path))
|
||||||
else:
|
else:
|
||||||
om = OptionsManager(self.ini_path or str(Path(__file__).parent / 'configs.ini'))
|
om = OptionsManager(self.ini_path or str(Path(__file__).parent / 'configs.ini'))
|
||||||
|
|
||||||
@ -466,7 +465,7 @@ class DriverOptions(Options):
|
|||||||
path = path / 'config.ini' if path.is_dir() else path
|
path = path / 'config.ini' if path.is_dir() else path
|
||||||
|
|
||||||
if path.exists():
|
if path.exists():
|
||||||
om = OptionsManager(path)
|
om = OptionsManager(str(path))
|
||||||
else:
|
else:
|
||||||
om = OptionsManager(self.ini_path or str(Path(__file__).parent / 'configs.ini'))
|
om = OptionsManager(self.ini_path or str(Path(__file__).parent / 'configs.ini'))
|
||||||
|
|
||||||
@ -667,8 +666,7 @@ def _dict_to_chrome_options(options: dict) -> Options:
|
|||||||
|
|
||||||
for i in options['experimental_options']:
|
for i in options['experimental_options']:
|
||||||
chrome_options.add_experimental_option(i, options['experimental_options'][i])
|
chrome_options.add_experimental_option(i, options['experimental_options'][i])
|
||||||
# if options.get('capabilities' ,None):
|
|
||||||
# pass # 未知怎么用
|
|
||||||
return chrome_options
|
return chrome_options
|
||||||
|
|
||||||
|
|
||||||
@ -683,8 +681,9 @@ def _chrome_options_to_dict(options: Union[dict, DriverOptions, Options, None])
|
|||||||
re_dict = dict()
|
re_dict = dict()
|
||||||
attrs = ['debugger_address', 'binary_location', 'arguments', 'extensions', 'experimental_options', 'driver_path']
|
attrs = ['debugger_address', 'binary_location', 'arguments', 'extensions', 'experimental_options', 'driver_path']
|
||||||
|
|
||||||
|
options_dir = options.__dir__()
|
||||||
for attr in attrs:
|
for attr in attrs:
|
||||||
re_dict[attr] = options.__getattribute__(f'_{attr}')
|
re_dict[attr] = options.__getattribute__(f'_{attr}') if attr in options_dir else None
|
||||||
|
|
||||||
return re_dict
|
return re_dict
|
||||||
|
|
||||||
@ -739,10 +738,11 @@ def _cookie_to_dict(cookie: Union[Cookie, str, dict]) -> dict:
|
|||||||
attr_val = attr.lstrip().split('=')
|
attr_val = attr.lstrip().split('=')
|
||||||
|
|
||||||
if key == 0:
|
if key == 0:
|
||||||
|
# TODO: 检查
|
||||||
cookie_dict['name'] = attr_val[0]
|
cookie_dict['name'] = attr_val[0]
|
||||||
cookie_dict['value'] = attr_val[1]
|
cookie_dict['value'] = attr_val[1] if len(attr_val) == 2 else ''
|
||||||
else:
|
else:
|
||||||
cookie_dict[attr_val[0]] = attr_val[1]
|
cookie_dict[attr_val[0]] = attr_val[1] if len(attr_val) == 2 else ''
|
||||||
|
|
||||||
return cookie_dict
|
return cookie_dict
|
||||||
|
|
||||||
|
@ -4,16 +4,15 @@
|
|||||||
@Contact : g1879@qq.com
|
@Contact : g1879@qq.com
|
||||||
@File : drission.py
|
@File : drission.py
|
||||||
"""
|
"""
|
||||||
from sys import exit
|
|
||||||
from typing import Union
|
|
||||||
|
|
||||||
from requests import Session
|
from requests import Session
|
||||||
from requests.cookies import RequestsCookieJar
|
from requests.cookies import RequestsCookieJar
|
||||||
from selenium import webdriver
|
from selenium import webdriver
|
||||||
from selenium.common.exceptions import SessionNotCreatedException, WebDriverException
|
from selenium.common.exceptions import SessionNotCreatedException, WebDriverException
|
||||||
from selenium.webdriver.chrome.options import Options
|
from selenium.webdriver.chrome.options import Options
|
||||||
from selenium.webdriver.chrome.webdriver import WebDriver
|
from selenium.webdriver.chrome.webdriver import WebDriver
|
||||||
|
from sys import exit
|
||||||
from tldextract import extract
|
from tldextract import extract
|
||||||
|
from typing import Union
|
||||||
|
|
||||||
from .config import (_dict_to_chrome_options, _session_options_to_dict,
|
from .config import (_dict_to_chrome_options, _session_options_to_dict,
|
||||||
SessionOptions, DriverOptions, _chrome_options_to_dict, OptionsManager, _cookies_to_tuple)
|
SessionOptions, DriverOptions, _chrome_options_to_dict, OptionsManager, _cookies_to_tuple)
|
||||||
@ -35,6 +34,7 @@ class Drission(object):
|
|||||||
"""
|
"""
|
||||||
self._session = None
|
self._session = None
|
||||||
self._driver = None
|
self._driver = None
|
||||||
|
self._debugger = None
|
||||||
self._proxy = proxy
|
self._proxy = proxy
|
||||||
|
|
||||||
om = OptionsManager(ini_path) if session_or_options is None or driver_or_options is None else None
|
om = OptionsManager(ini_path) if session_or_options is None or driver_or_options is None else None
|
||||||
@ -83,37 +83,38 @@ class Drission(object):
|
|||||||
如设置了本地调试浏览器,可自动接入或打开浏览器进程。
|
如设置了本地调试浏览器,可自动接入或打开浏览器进程。
|
||||||
"""
|
"""
|
||||||
if self._driver is None:
|
if self._driver is None:
|
||||||
if isinstance(self._driver_options, dict):
|
if not isinstance(self._driver_options, dict):
|
||||||
options = _dict_to_chrome_options(self._driver_options)
|
|
||||||
else:
|
|
||||||
raise TypeError('Driver options invalid')
|
raise TypeError('Driver options invalid')
|
||||||
|
|
||||||
if self._proxy:
|
options = _dict_to_chrome_options(self._driver_options)
|
||||||
|
|
||||||
|
if not self._driver_options.get('debugger_address', None) and self._proxy:
|
||||||
options.add_argument(f'--proxy-server={self._proxy["http"]}')
|
options.add_argument(f'--proxy-server={self._proxy["http"]}')
|
||||||
|
|
||||||
driver_path = self._driver_options.get('driver_path', None) or 'chromedriver'
|
driver_path = self._driver_options.get('driver_path', None) or 'chromedriver'
|
||||||
|
chrome_path = self._driver_options.get('binary_location', None) or 'chrome.exe'
|
||||||
|
|
||||||
|
# -----------若指定debug端口且该端口未在使用中,则先启动浏览器进程-----------
|
||||||
|
if options.debugger_address and _check_port(options.debugger_address) is False:
|
||||||
|
from subprocess import Popen
|
||||||
|
port = options.debugger_address[options.debugger_address.rfind(':') + 1:]
|
||||||
|
|
||||||
|
# 启动浏览器进程,同时返回该进程使用的 chrome.exe 路径
|
||||||
|
chrome_path, self._debugger = _create_chrome(chrome_path, port,
|
||||||
|
self._driver_options['arguments'], self._proxy)
|
||||||
|
|
||||||
|
# -----------创建WebDriver对象-----------
|
||||||
|
self._driver = _create_driver(chrome_path, driver_path, options)
|
||||||
|
|
||||||
|
# 反反爬设置
|
||||||
try:
|
try:
|
||||||
if options.debugger_address and _check_port(options.debugger_address) is False:
|
self._driver.execute_script('Object.defineProperty(navigator,"webdriver",{get:() => Chrome,});')
|
||||||
from subprocess import Popen
|
except:
|
||||||
port = options.debugger_address.split(':')[-1]
|
pass
|
||||||
chrome_path = self._driver_options.get('binary_location', None) or 'chrome.exe'
|
|
||||||
Popen(f'{chrome_path} --remote-debugging-port={port}', shell=False)
|
|
||||||
|
|
||||||
self._driver = webdriver.Chrome(driver_path, options=options)
|
# self._driver.execute_cdp_cmd(
|
||||||
|
# 'Page.addScriptToEvaluateOnNewDocument',
|
||||||
except (WebDriverException, SessionNotCreatedException):
|
# {'source': 'Object.defineProperty(navigator,"webdriver",{get:() => Chrome,});'})
|
||||||
print('未指定chromedriver路径或版本与Chrome不匹配,可执行easy_set.get_match_driver()自动下载匹配的版本。')
|
|
||||||
exit(0)
|
|
||||||
|
|
||||||
# 反爬设置,似乎没用
|
|
||||||
self._driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
|
|
||||||
"source": """
|
|
||||||
Object.defineProperty(navigator, 'webdriver', {
|
|
||||||
get: () => Chrome
|
|
||||||
})
|
|
||||||
"""
|
|
||||||
})
|
|
||||||
|
|
||||||
return self._driver
|
return self._driver
|
||||||
|
|
||||||
@ -163,6 +164,41 @@ class Drission(object):
|
|||||||
for cookie in cookies:
|
for cookie in cookies:
|
||||||
self.set_cookies(cookie, set_driver=True)
|
self.set_cookies(cookie, set_driver=True)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def debugger_progress(self):
|
||||||
|
"""调试浏览器进程"""
|
||||||
|
return self._debugger
|
||||||
|
|
||||||
|
def kill_browser(self) -> None:
|
||||||
|
"""关闭浏览器进程(如果可以)"""
|
||||||
|
if self.debugger_progress:
|
||||||
|
self.debugger_progress.kill()
|
||||||
|
return
|
||||||
|
|
||||||
|
address = self.driver_options.get('debugger_address', '').split(':')
|
||||||
|
if len(address) == 1:
|
||||||
|
self.close_driver()
|
||||||
|
|
||||||
|
elif len(address) == 2:
|
||||||
|
ip, port = address
|
||||||
|
if ip not in ('127.0.0.1', 'localhost') or not port.isdigit():
|
||||||
|
return
|
||||||
|
|
||||||
|
from os import popen
|
||||||
|
progresses = popen(f'netstat -nao | findstr :{port}').read().split('\n')
|
||||||
|
txt = ''
|
||||||
|
for progress in progresses:
|
||||||
|
if 'LISTENING' in progress:
|
||||||
|
txt = progress
|
||||||
|
break
|
||||||
|
|
||||||
|
if not txt:
|
||||||
|
return
|
||||||
|
|
||||||
|
pid = txt[txt.rfind(' ') + 1:]
|
||||||
|
if popen(f'tasklist | findstr {pid}').read().lower().startswith('chrome.exe'):
|
||||||
|
popen(f'taskkill /pid {pid} /F')
|
||||||
|
|
||||||
def set_cookies(self,
|
def set_cookies(self,
|
||||||
cookies: Union[RequestsCookieJar, list, tuple, str, dict],
|
cookies: Union[RequestsCookieJar, list, tuple, str, dict],
|
||||||
set_session: bool = False,
|
set_session: bool = False,
|
||||||
@ -181,7 +217,8 @@ class Drission(object):
|
|||||||
|
|
||||||
# 添加cookie到session
|
# 添加cookie到session
|
||||||
if set_session:
|
if set_session:
|
||||||
kwargs = {x: cookie[x] for x in cookie if x not in ('name', 'value', 'httpOnly', 'expiry')}
|
kwargs = {x: cookie[x] for x in cookie
|
||||||
|
if x.lower() not in ('name', 'value', 'httponly', 'expiry', 'samesite')}
|
||||||
|
|
||||||
if 'expiry' in cookie:
|
if 'expiry' in cookie:
|
||||||
kwargs['expires'] = cookie['expiry']
|
kwargs['expires'] = cookie['expiry']
|
||||||
@ -214,9 +251,19 @@ class Drission(object):
|
|||||||
self.driver.get(cookie_domain if cookie_domain.startswith('http://')
|
self.driver.get(cookie_domain if cookie_domain.startswith('http://')
|
||||||
else f'http://{cookie_domain}')
|
else f'http://{cookie_domain}')
|
||||||
|
|
||||||
|
# 避免selenium自动添加.后无法正确覆盖已有cookie
|
||||||
|
if cookie['domain'][0] != '.':
|
||||||
|
c = self.driver.get_cookie(cookie['name'])
|
||||||
|
if c and c['domain'] == cookie['domain']:
|
||||||
|
self.driver.delete_cookie(cookie['name'])
|
||||||
|
|
||||||
self.driver.add_cookie(cookie)
|
self.driver.add_cookie(cookie)
|
||||||
|
|
||||||
def _set_session(self, data: dict) -> None:
|
def _set_session(self, data: dict) -> None:
|
||||||
|
"""根据传入字典对session进行设置 \n
|
||||||
|
:param data: session配置字典
|
||||||
|
:return: None
|
||||||
|
"""
|
||||||
if self._session is None:
|
if self._session is None:
|
||||||
self._session = Session()
|
self._session = Session()
|
||||||
|
|
||||||
@ -303,7 +350,7 @@ class Drission(object):
|
|||||||
|
|
||||||
|
|
||||||
def _check_port(debugger_address: str) -> Union[bool, None]:
|
def _check_port(debugger_address: str) -> Union[bool, None]:
|
||||||
"""检查端口是否可用 \n
|
"""检查端口是否被占用 \n
|
||||||
:param debugger_address: 浏览器地址及端口
|
:param debugger_address: 浏览器地址及端口
|
||||||
:return: bool
|
:return: bool
|
||||||
"""
|
"""
|
||||||
@ -322,3 +369,89 @@ def _check_port(debugger_address: str) -> Union[bool, None]:
|
|||||||
return True
|
return True
|
||||||
except socket.error:
|
except socket.error:
|
||||||
return False
|
return False
|
||||||
|
finally:
|
||||||
|
if s:
|
||||||
|
s.close()
|
||||||
|
|
||||||
|
|
||||||
|
def _create_chrome(chrome_path: str, port: str, args: list, proxy: dict) -> tuple:
|
||||||
|
"""创建 chrome 进程 \n
|
||||||
|
:param chrome_path: chrome.exe 路径
|
||||||
|
:param port: 进程运行的端口号
|
||||||
|
:param args: chrome 配置参数
|
||||||
|
:return: chrome.exe 路径和进程对象组成的元组
|
||||||
|
"""
|
||||||
|
from subprocess import Popen
|
||||||
|
|
||||||
|
# ----------为路径加上双引号,避免路径中的空格产生异常----------
|
||||||
|
args1 = []
|
||||||
|
for arg in args:
|
||||||
|
if arg.startswith(('--user-data-dir', '--disk-cache-dir')):
|
||||||
|
index = arg.find('=') + 1
|
||||||
|
args1.append(f'{arg[:index]}"{arg[index:].strip()}"')
|
||||||
|
else:
|
||||||
|
args1.append(arg)
|
||||||
|
|
||||||
|
args = ' '.join(set(args1))
|
||||||
|
|
||||||
|
if proxy:
|
||||||
|
args = f'{args} --proxy-server={proxy["http"]}'
|
||||||
|
|
||||||
|
# ----------创建浏览器进程----------
|
||||||
|
try:
|
||||||
|
debugger = Popen(f'{chrome_path} --remote-debugging-port={port} {args}', shell=False)
|
||||||
|
|
||||||
|
if chrome_path == 'chrome.exe':
|
||||||
|
from common import get_exe_path_from_port
|
||||||
|
chrome_path = get_exe_path_from_port(port)
|
||||||
|
|
||||||
|
# 传入的路径找不到,主动在ini文件、注册表、系统变量中找
|
||||||
|
except FileNotFoundError:
|
||||||
|
from DrissionPage.easy_set import _get_chrome_path
|
||||||
|
chrome_path = _get_chrome_path(show_msg=False)
|
||||||
|
|
||||||
|
if not chrome_path:
|
||||||
|
raise FileNotFoundError('无法找到chrome.exe路径,请手动配置。')
|
||||||
|
|
||||||
|
debugger = Popen(f'"{chrome_path}" --remote-debugging-port={port} {args}', shell=False)
|
||||||
|
|
||||||
|
return chrome_path, debugger
|
||||||
|
|
||||||
|
|
||||||
|
def _create_driver(chrome_path: str, driver_path: str, options: Options) -> WebDriver:
|
||||||
|
"""创建 WebDriver 对象 \n
|
||||||
|
:param chrome_path: chrome.exe 路径
|
||||||
|
:param driver_path: chromedriver.exe 路径
|
||||||
|
:param options: Options 对象
|
||||||
|
:return: WebDriver 对象
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
return webdriver.Chrome(driver_path, options=options)
|
||||||
|
|
||||||
|
# 若版本不对,获取对应 chromedriver 再试
|
||||||
|
except (WebDriverException, SessionNotCreatedException):
|
||||||
|
from .easy_set import get_match_driver
|
||||||
|
chrome_path = None if chrome_path == 'chrome.exe' else chrome_path
|
||||||
|
driver_path = get_match_driver(chrome_path=chrome_path, check_version=False, show_msg=False)
|
||||||
|
|
||||||
|
if driver_path:
|
||||||
|
try:
|
||||||
|
return webdriver.Chrome(driver_path, options=options)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# 当找不到 driver 且 chrome_path 为 None 时,说明安装的版本过高,改在系统路径中查找
|
||||||
|
elif chrome_path is None and driver_path is None:
|
||||||
|
from DrissionPage.easy_set import _get_chrome_path
|
||||||
|
chrome_path = _get_chrome_path(show_msg=False, from_ini=False, from_regedit=False)
|
||||||
|
driver_path = get_match_driver(chrome_path=chrome_path, check_version=False, show_msg=False)
|
||||||
|
|
||||||
|
if driver_path:
|
||||||
|
options.binary_location = chrome_path
|
||||||
|
try:
|
||||||
|
return webdriver.Chrome(driver_path, options=options)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
print('无法启动,请检查chromedriver版本与Chrome是否匹配,并手动设置。')
|
||||||
|
exit(0)
|
||||||
|
@ -4,15 +4,15 @@
|
|||||||
@Contact : g1879@qq.com
|
@Contact : g1879@qq.com
|
||||||
@File : driver_element.py
|
@File : driver_element.py
|
||||||
"""
|
"""
|
||||||
|
import re
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from time import sleep
|
|
||||||
from typing import Union, List, Any, Tuple
|
|
||||||
|
|
||||||
from selenium.common.exceptions import TimeoutException, JavascriptException, InvalidElementStateException
|
from selenium.common.exceptions import TimeoutException, JavascriptException, InvalidElementStateException
|
||||||
from selenium.webdriver.chrome.webdriver import WebDriver
|
from selenium.webdriver.chrome.webdriver import WebDriver
|
||||||
from selenium.webdriver.remote.webelement import WebElement
|
from selenium.webdriver.remote.webelement import WebElement
|
||||||
from selenium.webdriver.support import expected_conditions as ec
|
from selenium.webdriver.support import expected_conditions as ec
|
||||||
from selenium.webdriver.support.wait import WebDriverWait
|
from selenium.webdriver.support.wait import WebDriverWait
|
||||||
|
from time import sleep
|
||||||
|
from typing import Union, List, Any, Tuple
|
||||||
|
|
||||||
from .common import DrissionElement, str_to_loc, get_available_file_name, translate_loc, format_html
|
from .common import DrissionElement, str_to_loc, get_available_file_name, translate_loc, format_html
|
||||||
|
|
||||||
@ -22,6 +22,7 @@ class DriverElement(DrissionElement):
|
|||||||
|
|
||||||
def __init__(self, ele: WebElement, page=None):
|
def __init__(self, ele: WebElement, page=None):
|
||||||
super().__init__(ele, page)
|
super().__init__(ele, page)
|
||||||
|
self._select = None
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
attrs = [f"{attr}='{self.attrs[attr]}'" for attr in self.attrs]
|
attrs = [f"{attr}='{self.attrs[attr]}'" for attr in self.attrs]
|
||||||
@ -54,7 +55,7 @@ class DriverElement(DrissionElement):
|
|||||||
@property
|
@property
|
||||||
def tag(self) -> str:
|
def tag(self) -> str:
|
||||||
"""返回元素类型"""
|
"""返回元素类型"""
|
||||||
return self._inner_ele.tag_name
|
return self._inner_ele.tag_name.lower()
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def attrs(self) -> dict:
|
def attrs(self) -> dict:
|
||||||
@ -78,7 +79,17 @@ class DriverElement(DrissionElement):
|
|||||||
@property
|
@property
|
||||||
def text(self) -> str:
|
def text(self) -> str:
|
||||||
"""返回元素内所有文本"""
|
"""返回元素内所有文本"""
|
||||||
return self.attr('innerText')
|
# return format_html(self.inner_ele.get_attribute('innerText'), False)
|
||||||
|
re_str = self.inner_ele.get_attribute('innerText')
|
||||||
|
re_str = re.sub(r'\n{2,}', '\n', re_str)
|
||||||
|
re_str = re.sub(r' {2,}', ' ', re_str)
|
||||||
|
|
||||||
|
return format_html(re_str.strip('\n '), False)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def raw_text(self) -> str:
|
||||||
|
"""返回未格式化处理的元素内文本"""
|
||||||
|
return self.inner_ele.get_attribute('innerText')
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def link(self) -> str:
|
def link(self) -> str:
|
||||||
@ -109,6 +120,11 @@ class DriverElement(DrissionElement):
|
|||||||
"""返回前一个兄弟元素"""
|
"""返回前一个兄弟元素"""
|
||||||
return self._get_brother(1, 'ele', 'prev')
|
return self._get_brother(1, 'ele', 'prev')
|
||||||
|
|
||||||
|
@property
|
||||||
|
def comments(self) -> list:
|
||||||
|
"""返回元素注释文本组成的列表"""
|
||||||
|
return self.eles('xpath:.//comment()')
|
||||||
|
|
||||||
# -----------------driver独占属性-------------------
|
# -----------------driver独占属性-------------------
|
||||||
@property
|
@property
|
||||||
def size(self) -> dict:
|
def size(self) -> dict:
|
||||||
@ -128,6 +144,11 @@ class DriverElement(DrissionElement):
|
|||||||
from .shadow_root_element import ShadowRootElement
|
from .shadow_root_element import ShadowRootElement
|
||||||
return ShadowRootElement(shadow, self)
|
return ShadowRootElement(shadow, self)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def sr(self):
|
||||||
|
"""返回当前元素的shadow_root元素对象"""
|
||||||
|
return self.shadow_root
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def before(self) -> str:
|
def before(self) -> str:
|
||||||
"""返回当前元素的::before伪元素内容"""
|
"""返回当前元素的::before伪元素内容"""
|
||||||
@ -138,16 +159,30 @@ class DriverElement(DrissionElement):
|
|||||||
"""返回当前元素的::after伪元素内容"""
|
"""返回当前元素的::after伪元素内容"""
|
||||||
return self.get_style_property('content', 'after')
|
return self.get_style_property('content', 'after')
|
||||||
|
|
||||||
|
@property
|
||||||
|
def select(self):
|
||||||
|
"""返回专门处理下拉列表的Select类,非下拉列表元素返回False"""
|
||||||
|
if self._select is None:
|
||||||
|
if self.tag != 'select':
|
||||||
|
self._select = False
|
||||||
|
else:
|
||||||
|
self._select = Select(self)
|
||||||
|
|
||||||
|
return self._select
|
||||||
|
|
||||||
# -----------------共有函数-------------------
|
# -----------------共有函数-------------------
|
||||||
|
|
||||||
def texts(self, text_node_only: bool = False) -> list:
|
def texts(self, text_node_only: bool = False) -> list:
|
||||||
"""返回元素内所有直接子节点的文本,包括元素和文本节点 \n
|
"""返回元素内所有直接子节点的文本,包括元素和文本节点 \n
|
||||||
:param text_node_only: 是否只返回文本节点
|
:param text_node_only: 是否只返回文本节点
|
||||||
:return: 文本列表
|
:return: 文本列表
|
||||||
"""
|
"""
|
||||||
if text_node_only:
|
if text_node_only:
|
||||||
return self.eles('xpath:./text()')
|
texts = self.eles('xpath:/text()')
|
||||||
else:
|
else:
|
||||||
return [x if isinstance(x, str) else x.text for x in self.eles('xpath:./node()')]
|
texts = [x if isinstance(x, str) else x.text for x in self.eles('xpath:./text() | *')]
|
||||||
|
|
||||||
|
return [x.strip(' ') for x in texts if x and x.replace('\n', '').replace('\t', '').replace(' ', '') != '']
|
||||||
|
|
||||||
def parents(self, num: int = 1):
|
def parents(self, num: int = 1):
|
||||||
"""返回上面第num级父元素 \n
|
"""返回上面第num级父元素 \n
|
||||||
@ -155,7 +190,7 @@ class DriverElement(DrissionElement):
|
|||||||
:return: DriverElement对象
|
:return: DriverElement对象
|
||||||
"""
|
"""
|
||||||
loc = 'xpath', f'.{"/.." * num}'
|
loc = 'xpath', f'.{"/.." * num}'
|
||||||
return self.ele(loc, timeout=0.1)
|
return self.ele(loc, timeout=0)
|
||||||
|
|
||||||
def nexts(self, num: int = 1, mode: str = 'ele'):
|
def nexts(self, num: int = 1, mode: str = 'ele'):
|
||||||
"""返回后面第num个兄弟元素或节点文本 \n
|
"""返回后面第num个兄弟元素或节点文本 \n
|
||||||
@ -178,7 +213,10 @@ class DriverElement(DrissionElement):
|
|||||||
:param attr: 属性名
|
:param attr: 属性名
|
||||||
:return: 属性值文本
|
:return: 属性值文本
|
||||||
"""
|
"""
|
||||||
attr = 'innerText' if attr == 'text' else attr
|
# attr = 'innerText' if attr == 'text' else attr
|
||||||
|
if attr in ('text', 'innerText'):
|
||||||
|
return self.text
|
||||||
|
|
||||||
return format_html(self.inner_ele.get_attribute(attr))
|
return format_html(self.inner_ele.get_attribute(attr))
|
||||||
|
|
||||||
def ele(self,
|
def ele(self,
|
||||||
@ -188,27 +226,34 @@ class DriverElement(DrissionElement):
|
|||||||
"""返回当前元素下级符合条件的子元素、属性或节点文本,默认返回第一个 \n
|
"""返回当前元素下级符合条件的子元素、属性或节点文本,默认返回第一个 \n
|
||||||
示例: \n
|
示例: \n
|
||||||
- 用loc元组查找: \n
|
- 用loc元组查找: \n
|
||||||
ele.ele((By.CLASS_NAME, 'ele_class')) - 返回第一个class为ele_class的子元素 \n
|
ele.ele((By.CLASS_NAME, 'ele_class')) - 返回第一个class为ele_class的子元素 \n
|
||||||
- 用查询字符串查找: \n
|
- 用查询字符串查找: \n
|
||||||
查找方式:属性、tag name和属性、文本、xpath、css selector、id、class \n
|
查找方式:属性、tag name和属性、文本、xpath、css selector、id、class \n
|
||||||
@表示属性,.表示class,#表示id,=表示精确匹配,:表示模糊匹配,无控制字符串时默认搜索该字符串 \n
|
@表示属性,.表示class,#表示id,=表示精确匹配,:表示模糊匹配,无控制字符串时默认搜索该字符串 \n
|
||||||
ele.ele('.ele_class') - 返回第一个 class 为 ele_class 的子元素 \n
|
ele.ele('.ele_class') - 返回第一个 class 为 ele_class 的子元素 \n
|
||||||
ele.ele('.:ele_class') - 返回第一个 class 中含有 ele_class 的子元素 \n
|
ele.ele('.:ele_class') - 返回第一个 class 中含有 ele_class 的子元素 \n
|
||||||
ele.ele('#ele_id') - 返回第一个 id 为 ele_id 的子元素 \n
|
ele.ele('#ele_id') - 返回第一个 id 为 ele_id 的子元素 \n
|
||||||
ele.ele('#:ele_id') - 返回第一个 id 中含有 ele_id 的子元素 \n
|
ele.ele('#:ele_id') - 返回第一个 id 中含有 ele_id 的子元素 \n
|
||||||
ele.ele('@class:ele_class') - 返回第一个class含有ele_class的子元素 \n
|
ele.ele('@class:ele_class') - 返回第一个class含有ele_class的子元素 \n
|
||||||
ele.ele('@name=ele_name') - 返回第一个name等于ele_name的子元素 \n
|
ele.ele('@name=ele_name') - 返回第一个name等于ele_name的子元素 \n
|
||||||
ele.ele('@placeholder') - 返回第一个带placeholder属性的子元素 \n
|
ele.ele('@placeholder') - 返回第一个带placeholder属性的子元素 \n
|
||||||
ele.ele('tag:p') - 返回第一个<p>子元素 \n
|
ele.ele('tag:p') - 返回第一个<p>子元素 \n
|
||||||
ele.ele('tag:div@class:ele_class') - 返回第一个class含有ele_class的div子元素 \n
|
ele.ele('tag:div@class:ele_class') - 返回第一个class含有ele_class的div子元素 \n
|
||||||
ele.ele('tag:div@class=ele_class') - 返回第一个class等于ele_class的div子元素 \n
|
ele.ele('tag:div@class=ele_class') - 返回第一个class等于ele_class的div子元素 \n
|
||||||
ele.ele('tag:div@text():some_text') - 返回第一个文本含有some_text的div子元素 \n
|
ele.ele('tag:div@text():some_text') - 返回第一个文本含有some_text的div子元素 \n
|
||||||
ele.ele('tag:div@text()=some_text') - 返回第一个文本等于some_text的div子元素 \n
|
ele.ele('tag:div@text()=some_text') - 返回第一个文本等于some_text的div子元素 \n
|
||||||
ele.ele('text:some_text') - 返回第一个文本含有some_text的子元素 \n
|
ele.ele('text:some_text') - 返回第一个文本含有some_text的子元素 \n
|
||||||
ele.ele('some_text') - 返回第一个文本含有some_text的子元素(等价于上一行) \n
|
ele.ele('some_text') - 返回第一个文本含有some_text的子元素(等价于上一行) \n
|
||||||
ele.ele('text=some_text') - 返回第一个文本等于some_text的子元素 \n
|
ele.ele('text=some_text') - 返回第一个文本等于some_text的子元素 \n
|
||||||
ele.ele('xpath://div[@class="ele_class"]') - 返回第一个符合xpath的子元素 \n
|
ele.ele('xpath://div[@class="ele_class"]') - 返回第一个符合xpath的子元素 \n
|
||||||
ele.ele('css:div.ele_class') - 返回第一个符合css selector的子元素 \n
|
ele.ele('css:div.ele_class') - 返回第一个符合css selector的子元素 \n
|
||||||
|
- 查询字符串还有最精简模式,用x代替xpath、c代替css、t代替tag、tx代替text: \n
|
||||||
|
ele.ele('x://div[@class="ele_class"]') - 等同于 ele.ele('xpath://div[@class="ele_class"]') \n
|
||||||
|
ele.ele('c:div.ele_class') - 等同于 ele.ele('css:div.ele_class') \n
|
||||||
|
ele.ele('t:div') - 等同于 ele.ele('tag:div') \n
|
||||||
|
ele.ele('t:div@tx()=some_text') - 等同于 ele.ele('tag:div@text()=some_text') \n
|
||||||
|
ele.ele('tx:some_text') - 等同于 ele.ele('text:some_text') \n
|
||||||
|
ele.ele('tx=some_text') - 等同于 ele.ele('text=some_text')
|
||||||
:param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串
|
:param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串
|
||||||
:param mode: 'single' 或 'all',对应查找一个或全部
|
:param mode: 'single' 或 'all',对应查找一个或全部
|
||||||
:param timeout: 查找元素超时时间
|
:param timeout: 查找元素超时时间
|
||||||
@ -241,30 +286,37 @@ class DriverElement(DrissionElement):
|
|||||||
def eles(self,
|
def eles(self,
|
||||||
loc_or_str: Union[Tuple[str, str], str],
|
loc_or_str: Union[Tuple[str, str], str],
|
||||||
timeout: float = None):
|
timeout: float = None):
|
||||||
"""返回当前元素下级所有符合条件的子元素、属性或节点文本 \n
|
"""返回当前元素下级所有符合条件的子元素、属性或节点文本 \n
|
||||||
示例: \n
|
示例: \n
|
||||||
- 用loc元组查找: \n
|
- 用loc元组查找: \n
|
||||||
ele.eles((By.CLASS_NAME, 'ele_class')) - 返回所有class为ele_class的子元素 \n
|
ele.eles((By.CLASS_NAME, 'ele_class')) - 返回所有class为ele_class的子元素 \n
|
||||||
- 用查询字符串查找: \n
|
- 用查询字符串查找: \n
|
||||||
查找方式:属性、tag name和属性、文本、xpath、css selector、id、class \n
|
查找方式:属性、tag name和属性、文本、xpath、css selector、id、class \n
|
||||||
@表示属性,.表示class,#表示id,=表示精确匹配,:表示模糊匹配,无控制字符串时默认搜索该字符串 \n
|
@表示属性,.表示class,#表示id,=表示精确匹配,:表示模糊匹配,无控制字符串时默认搜索该字符串 \n
|
||||||
ele.eles('.ele_class') - 返回所有 class 为 ele_class 的子元素 \n
|
ele.eles('.ele_class') - 返回所有 class 为 ele_class 的子元素 \n
|
||||||
ele.eles('.:ele_class') - 返回所有 class 中含有 ele_class 的子元素 \n
|
ele.eles('.:ele_class') - 返回所有 class 中含有 ele_class 的子元素 \n
|
||||||
ele.eles('#ele_id') - 返回所有 id 为 ele_id 的子元素 \n
|
ele.eles('#ele_id') - 返回所有 id 为 ele_id 的子元素 \n
|
||||||
ele.eles('#:ele_id') - 返回所有 id 中含有 ele_id 的子元素 \n
|
ele.eles('#:ele_id') - 返回所有 id 中含有 ele_id 的子元素 \n
|
||||||
ele.eles('@class:ele_class') - 返回所有class含有ele_class的子元素 \n
|
ele.eles('@class:ele_class') - 返回所有class含有ele_class的子元素 \n
|
||||||
ele.eles('@name=ele_name') - 返回所有name等于ele_name的子元素 \n
|
ele.eles('@name=ele_name') - 返回所有name等于ele_name的子元素 \n
|
||||||
ele.eles('@placeholder') - 返回所有带placeholder属性的子元素 \n
|
ele.eles('@placeholder') - 返回所有带placeholder属性的子元素 \n
|
||||||
ele.eles('tag:p') - 返回所有<p>子元素 \n
|
ele.eles('tag:p') - 返回所有<p>子元素 \n
|
||||||
ele.eles('tag:div@class:ele_class') - 返回所有class含有ele_class的div子元素 \n
|
ele.eles('tag:div@class:ele_class') - 返回所有class含有ele_class的div子元素 \n
|
||||||
ele.eles('tag:div@class=ele_class') - 返回所有class等于ele_class的div子元素 \n
|
ele.eles('tag:div@class=ele_class') - 返回所有class等于ele_class的div子元素 \n
|
||||||
ele.eles('tag:div@text():some_text') - 返回所有文本含有some_text的div子元素 \n
|
ele.eles('tag:div@text():some_text') - 返回所有文本含有some_text的div子元素 \n
|
||||||
ele.eles('tag:div@text()=some_text') - 返回所有文本等于some_text的div子元素 \n
|
ele.eles('tag:div@text()=some_text') - 返回所有文本等于some_text的div子元素 \n
|
||||||
ele.eles('text:some_text') - 返回所有文本含有some_text的子元素 \n
|
ele.eles('text:some_text') - 返回所有文本含有some_text的子元素 \n
|
||||||
ele.eles('some_text') - 返回所有文本含有some_text的子元素(等价于上一行) \n
|
ele.eles('some_text') - 返回所有文本含有some_text的子元素(等价于上一行) \n
|
||||||
ele.eles('text=some_text') - 返回所有文本等于some_text的子元素 \n
|
ele.eles('text=some_text') - 返回所有文本等于some_text的子元素 \n
|
||||||
ele.eles('xpath://div[@class="ele_class"]') - 返回所有符合xpath的子元素 \n
|
ele.eles('xpath://div[@class="ele_class"]') - 返回所有符合xpath的子元素 \n
|
||||||
ele.eles('css:div.ele_class') - 返回所有符合css selector的子元素 \n
|
ele.eles('css:div.ele_class') - 返回所有符合css selector的子元素 \n
|
||||||
|
- 查询字符串还有最精简模式,用x代替xpath、c代替css、t代替tag、tx代替text: \n
|
||||||
|
ele.eles('x://div[@class="ele_class"]') - 等同于 ele.eles('xpath://div[@class="ele_class"]') \n
|
||||||
|
ele.eles('c:div.ele_class') - 等同于 ele.eles('css:div.ele_class') \n
|
||||||
|
ele.eles('t:div') - 等同于 ele.eles('tag:div') \n
|
||||||
|
ele.eles('t:div@tx()=some_text') - 等同于 ele.eles('tag:div@text()=some_text') \n
|
||||||
|
ele.eles('tx:some_text') - 等同于 ele.eles('text:some_text') \n
|
||||||
|
ele.eles('tx=some_text') - 等同于 ele.eles('text=some_text')
|
||||||
:param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串
|
:param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串
|
||||||
:param timeout: 查找元素超时时间
|
:param timeout: 查找元素超时时间
|
||||||
:return: DriverElement对象组成的列表
|
:return: DriverElement对象组成的列表
|
||||||
@ -285,14 +337,14 @@ class DriverElement(DrissionElement):
|
|||||||
|
|
||||||
return None if r == 'none' else r
|
return None if r == 'none' else r
|
||||||
|
|
||||||
def click(self, by_js=None) -> bool:
|
def click(self, by_js: bool = None) -> bool:
|
||||||
"""点击元素 \n
|
"""点击元素 \n
|
||||||
尝试点击10次,若都失败就改用js点击 \n
|
尝试点击3次,若都失败就改用js点击 \n
|
||||||
:param by_js: 是否用js点击,为True时直接用js点击,为False时重试失败也不会改用js
|
:param by_js: 是否用js点击,为True时直接用js点击,为False时重试失败也不会改用js
|
||||||
:return: 是否点击成功
|
:return: 是否点击成功
|
||||||
"""
|
"""
|
||||||
if not by_js:
|
if not by_js:
|
||||||
for _ in range(10):
|
for _ in range(3):
|
||||||
try:
|
try:
|
||||||
self.inner_ele.click()
|
self.inner_ele.click()
|
||||||
return True
|
return True
|
||||||
@ -306,17 +358,45 @@ class DriverElement(DrissionElement):
|
|||||||
|
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def input(self, value: str, clear: bool = True) -> bool:
|
def click_at(self, x: Union[int, str] = None, y: Union[int, str] = None, by_js=False) -> None:
|
||||||
"""输入文本 \n
|
"""带偏移量点击本元素,相对于左上角坐标。不传入x或y值时点击元素中点 \n
|
||||||
:param value: 文本值
|
:param x: 相对元素左上角坐标的x轴偏移量
|
||||||
|
:param y: 相对元素左上角坐标的y轴偏移量
|
||||||
|
:param by_js: 是否用js点击
|
||||||
|
:return: None
|
||||||
|
"""
|
||||||
|
if by_js:
|
||||||
|
x = self.location['x'] + int(x) if x is not None else self.location['x'] + self.size['width'] // 2
|
||||||
|
y = self.location['y'] + int(y) if y is not None else self.location['y'] + self.size['height'] // 2
|
||||||
|
js = f"""
|
||||||
|
var ev = document.createEvent('HTMLEvents');
|
||||||
|
ev.clientX = {x};
|
||||||
|
ev.clientY = {y};
|
||||||
|
ev.initEvent('click', false, true);
|
||||||
|
arguments[0].dispatchEvent(ev);
|
||||||
|
"""
|
||||||
|
self.run_script(js)
|
||||||
|
|
||||||
|
else:
|
||||||
|
x = int(x) if x is not None else self.size['width'] // 2
|
||||||
|
y = int(y) if y is not None else self.size['height'] // 2
|
||||||
|
|
||||||
|
from selenium.webdriver import ActionChains
|
||||||
|
ActionChains(self.page.driver).move_to_element_with_offset(self.inner_ele, x, y).click().perform()
|
||||||
|
|
||||||
|
def input(self, value: Union[str, tuple], clear: bool = True) -> bool:
|
||||||
|
"""输入文本或组合键 \n
|
||||||
|
:param value: 文本值或按键组合
|
||||||
:param clear: 输入前是否清空文本框
|
:param clear: 输入前是否清空文本框
|
||||||
:return: 是否输入成功
|
:return: 是否输入成功
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
if clear:
|
if clear:
|
||||||
self.clear()
|
self.clear()
|
||||||
self.inner_ele.send_keys(value)
|
|
||||||
|
self.inner_ele.send_keys(*value)
|
||||||
return True
|
return True
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(e)
|
print(e)
|
||||||
return False
|
return False
|
||||||
@ -367,7 +447,8 @@ class DriverElement(DrissionElement):
|
|||||||
name = filename or self.tag
|
name = filename or self.tag
|
||||||
path = Path(path).absolute()
|
path = Path(path).absolute()
|
||||||
path.mkdir(parents=True, exist_ok=True)
|
path.mkdir(parents=True, exist_ok=True)
|
||||||
name = get_available_file_name(str(path), f'{name}.png')
|
name = f'{name}.png' if not name.endswith('.png') else name
|
||||||
|
name = get_available_file_name(str(path), name)
|
||||||
|
|
||||||
# 等待元素加载完成
|
# 等待元素加载完成
|
||||||
if self.tag == 'img':
|
if self.tag == 'img':
|
||||||
@ -381,21 +462,6 @@ class DriverElement(DrissionElement):
|
|||||||
|
|
||||||
return img_path
|
return img_path
|
||||||
|
|
||||||
def select(self, text: str) -> bool:
|
|
||||||
"""选择下拉列表中子元素 \n
|
|
||||||
:param text: 要选择的文本
|
|
||||||
:return: 是否选择成功
|
|
||||||
"""
|
|
||||||
from selenium.webdriver.support.select import Select
|
|
||||||
ele = Select(self.inner_ele)
|
|
||||||
|
|
||||||
try:
|
|
||||||
ele.select_by_visible_text(text)
|
|
||||||
return True
|
|
||||||
except Exception as e:
|
|
||||||
print(e)
|
|
||||||
return False
|
|
||||||
|
|
||||||
def set_attr(self, attr: str, value: str) -> bool:
|
def set_attr(self, attr: str, value: str) -> bool:
|
||||||
"""设置元素属性 \n
|
"""设置元素属性 \n
|
||||||
:param attr: 属性名
|
:param attr: 属性名
|
||||||
@ -403,7 +469,7 @@ class DriverElement(DrissionElement):
|
|||||||
:return: 是否设置成功
|
:return: 是否设置成功
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
self.run_script(f"arguments[0].{attr} = '{value}';")
|
self.run_script(f"arguments[0].setAttribute(arguments[1], arguments[2]);", attr, value)
|
||||||
return True
|
return True
|
||||||
except:
|
except:
|
||||||
return False
|
return False
|
||||||
@ -493,12 +559,14 @@ class DriverElement(DrissionElement):
|
|||||||
if(nth>1){path = '/' + tag + '[' + nth + ']' + path;}
|
if(nth>1){path = '/' + tag + '[' + nth + ']' + path;}
|
||||||
else{path = '/' + tag + path;}'''
|
else{path = '/' + tag + path;}'''
|
||||||
txt5 = '''return path;'''
|
txt5 = '''return path;'''
|
||||||
|
|
||||||
elif mode == 'css':
|
elif mode == 'css':
|
||||||
txt1 = ''
|
txt1 = ''
|
||||||
# txt2 = '''return '#' + el.id + path;'''
|
# txt2 = '''return '#' + el.id + path;'''
|
||||||
txt3 = ''
|
txt3 = ''
|
||||||
txt4 = '''path = '>' + ":nth-child(" + nth + ")" + path;'''
|
txt4 = '''path = '>' + ":nth-child(" + nth + ")" + path;'''
|
||||||
txt5 = '''return path.substr(1);'''
|
txt5 = '''return path.substr(1);'''
|
||||||
|
|
||||||
else:
|
else:
|
||||||
raise ValueError(f"Argument mode can only be 'xpath' or 'css', not '{mode}'.")
|
raise ValueError(f"Argument mode can only be 'xpath' or 'css', not '{mode}'.")
|
||||||
|
|
||||||
@ -514,7 +582,6 @@ class DriverElement(DrissionElement):
|
|||||||
sib = sib.previousSibling;
|
sib = sib.previousSibling;
|
||||||
}
|
}
|
||||||
''' + txt4 + '''
|
''' + txt4 + '''
|
||||||
|
|
||||||
el = el.parentNode;
|
el = el.parentNode;
|
||||||
}
|
}
|
||||||
''' + txt5 + '''
|
''' + txt5 + '''
|
||||||
@ -548,13 +615,15 @@ class DriverElement(DrissionElement):
|
|||||||
else:
|
else:
|
||||||
raise ValueError(f"Argument direction can only be 'next' or 'prev', not '{direction}'.")
|
raise ValueError(f"Argument direction can only be 'next' or 'prev', not '{direction}'.")
|
||||||
|
|
||||||
|
timeout = 0 if direction == 'prev' else .5
|
||||||
|
|
||||||
# 获取节点
|
# 获取节点
|
||||||
ele_or_node = self.ele(f'xpath:./{direction_txt}-sibling::{node_txt}[{num}]', timeout=0.1)
|
ele_or_node = self.ele(f'xpath:./{direction_txt}-sibling::{node_txt}[{num}]', timeout=timeout)
|
||||||
|
|
||||||
# 跳过元素间的换行符
|
# 跳过元素间的换行符
|
||||||
while ele_or_node == '\n':
|
while isinstance(ele_or_node, str) and ele_or_node.replace('\n', '').replace('\t', '').replace(' ', '') == '':
|
||||||
num += 1
|
num += 1
|
||||||
ele_or_node = self.ele(f'xpath:./{direction_txt}-sibling::{node_txt}[{num}]', timeout=0.1)
|
ele_or_node = self.ele(f'xpath:./{direction_txt}-sibling::{node_txt}[{num}]', timeout=timeout)
|
||||||
|
|
||||||
return ele_or_node
|
return ele_or_node
|
||||||
|
|
||||||
@ -572,7 +641,7 @@ def execute_driver_find(page_or_ele,
|
|||||||
:return: 返回DriverElement元素或它们组成的列表
|
:return: 返回DriverElement元素或它们组成的列表
|
||||||
"""
|
"""
|
||||||
mode = mode or 'single'
|
mode = mode or 'single'
|
||||||
if mode not in ['single', 'all']:
|
if mode not in ('single', 'all'):
|
||||||
raise ValueError(f"Argument mode can only be 'single' or 'all', not '{mode}'.")
|
raise ValueError(f"Argument mode can only be 'single' or 'all', not '{mode}'.")
|
||||||
|
|
||||||
if isinstance(page_or_ele, DrissionElement):
|
if isinstance(page_or_ele, DrissionElement):
|
||||||
@ -582,15 +651,19 @@ def execute_driver_find(page_or_ele,
|
|||||||
page = page_or_ele
|
page = page_or_ele
|
||||||
driver = page_or_ele.driver
|
driver = page_or_ele.driver
|
||||||
|
|
||||||
try:
|
# 设置等待对象
|
||||||
if timeout and timeout != page.timeout:
|
if timeout is not None and timeout != page.timeout:
|
||||||
wait = WebDriverWait(driver, timeout=timeout)
|
wait = WebDriverWait(driver, timeout=timeout)
|
||||||
else:
|
else:
|
||||||
page.wait._driver = driver
|
page.wait._driver = driver
|
||||||
wait = page.wait
|
wait = page.wait
|
||||||
|
|
||||||
|
try:
|
||||||
|
# 使用xpath查找
|
||||||
if loc[0] == 'xpath':
|
if loc[0] == 'xpath':
|
||||||
return wait.until(ElementsByXpath(page, loc[1], mode, timeout))
|
return wait.until(ElementsByXpath(page, loc[1], mode, timeout))
|
||||||
|
|
||||||
|
# 使用css selector查找
|
||||||
else:
|
else:
|
||||||
if mode == 'single':
|
if mode == 'single':
|
||||||
return DriverElement(wait.until(ec.presence_of_element_located(loc)), page)
|
return DriverElement(wait.until(ec.presence_of_element_located(loc)), page)
|
||||||
@ -627,7 +700,7 @@ class ElementsByXpath(object):
|
|||||||
"""用js通过xpath获取元素、节点或属性
|
"""用js通过xpath获取元素、节点或属性
|
||||||
:param node: 'document' 或 元素对象
|
:param node: 'document' 或 元素对象
|
||||||
:param xpath_txt: xpath语句
|
:param xpath_txt: xpath语句
|
||||||
:param type_txt: resultType,参考https://developer.mozilla.org/zh-CN/docs/Web/API/Document/evaluate
|
:param type_txt: resultType,参考 https://developer.mozilla.org/zh-CN/docs/Web/API/Document/evaluate
|
||||||
:return: 元素对象或属性、文本字符串
|
:return: 元素对象或属性、文本字符串
|
||||||
"""
|
"""
|
||||||
node_txt = 'document' if not node or node == 'document' else 'arguments[0]'
|
node_txt = 'document' if not node or node == 'document' else 'arguments[0]'
|
||||||
@ -638,6 +711,7 @@ class ElementsByXpath(object):
|
|||||||
return_txt = '''
|
return_txt = '''
|
||||||
if(e.singleNodeValue.constructor.name=="Text"){return e.singleNodeValue.data;}
|
if(e.singleNodeValue.constructor.name=="Text"){return e.singleNodeValue.data;}
|
||||||
else if(e.singleNodeValue.constructor.name=="Attr"){return e.singleNodeValue.nodeValue;}
|
else if(e.singleNodeValue.constructor.name=="Attr"){return e.singleNodeValue.nodeValue;}
|
||||||
|
else if(e.singleNodeValue.constructor.name=="Comment"){return e.singleNodeValue.nodeValue;}
|
||||||
else{return e.singleNodeValue;}
|
else{return e.singleNodeValue;}
|
||||||
'''
|
'''
|
||||||
|
|
||||||
@ -648,6 +722,7 @@ class ElementsByXpath(object):
|
|||||||
for(var i = 0; i <e.snapshotLength ; i++){
|
for(var i = 0; i <e.snapshotLength ; i++){
|
||||||
if(e.snapshotItem(i).constructor.name=="Text"){a.push(e.snapshotItem(i).data);}
|
if(e.snapshotItem(i).constructor.name=="Text"){a.push(e.snapshotItem(i).data);}
|
||||||
else if(e.snapshotItem(i).constructor.name=="Attr"){a.push(e.snapshotItem(i).nodeValue);}
|
else if(e.snapshotItem(i).constructor.name=="Attr"){a.push(e.snapshotItem(i).nodeValue);}
|
||||||
|
else if(e.snapshotItem(i).constructor.name=="Comment"){a.push(e.snapshotItem(i).nodeValue);}
|
||||||
else{a.push(e.snapshotItem(i));}
|
else{a.push(e.snapshotItem(i));}
|
||||||
}
|
}
|
||||||
"""
|
"""
|
||||||
@ -661,18 +736,18 @@ class ElementsByXpath(object):
|
|||||||
return_txt = 'return e.singleNodeValue;'
|
return_txt = 'return e.singleNodeValue;'
|
||||||
|
|
||||||
js = """
|
js = """
|
||||||
var e=document.evaluate('""" + xpath_txt + """', """ + node_txt + """, null, """ + type_txt + """,null);
|
var e=document.evaluate(arguments[1], """ + node_txt + """, null, """ + type_txt + """,null);
|
||||||
""" + for_txt + """
|
""" + for_txt + """
|
||||||
""" + return_txt + """
|
""" + return_txt + """
|
||||||
"""
|
"""
|
||||||
return driver.execute_script(js, node)
|
return driver.execute_script(js, node, xpath_txt)
|
||||||
|
|
||||||
if isinstance(ele_or_driver, WebDriver):
|
if isinstance(ele_or_driver, WebDriver):
|
||||||
driver, the_node = ele_or_driver, 'document'
|
driver, the_node = ele_or_driver, 'document'
|
||||||
else:
|
else:
|
||||||
driver, the_node = ele_or_driver.parent, ele_or_driver
|
driver, the_node = ele_or_driver.parent, ele_or_driver
|
||||||
|
|
||||||
# 把lxml元素对象包装成DriverElement对象并按需要返回第一个或全部
|
# 把lxml元素对象包装成DriverElement对象并按需要返回第一个或全部
|
||||||
if self.mode == 'single':
|
if self.mode == 'single':
|
||||||
try:
|
try:
|
||||||
e = get_nodes(the_node, xpath_txt=self.xpath, type_txt='9')
|
e = get_nodes(the_node, xpath_txt=self.xpath, type_txt='9')
|
||||||
@ -685,11 +760,171 @@ class ElementsByXpath(object):
|
|||||||
return e
|
return e
|
||||||
|
|
||||||
# 找不到目标时
|
# 找不到目标时
|
||||||
except JavascriptException:
|
except JavascriptException as err:
|
||||||
return None
|
if 'The result is not a node set' in err.msg:
|
||||||
|
try:
|
||||||
|
return get_nodes(the_node, xpath_txt=self.xpath, type_txt='1')
|
||||||
|
except JavascriptException:
|
||||||
|
return None
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
|
||||||
elif self.mode == 'all':
|
elif self.mode == 'all':
|
||||||
return ([DriverElement(x, self.page) if isinstance(x, WebElement)
|
return ([DriverElement(x, self.page) if isinstance(x, WebElement)
|
||||||
else format_html(x)
|
else format_html(x)
|
||||||
for x in get_nodes(the_node, xpath_txt=self.xpath)
|
for x in get_nodes(the_node, xpath_txt=self.xpath)
|
||||||
if x != '\n'])
|
if x != '\n'])
|
||||||
|
|
||||||
|
|
||||||
|
class Select(object):
|
||||||
|
"""Select 类专门用于处理 d 模式下 select 标签"""
|
||||||
|
|
||||||
|
def __init__(self, ele: DriverElement):
|
||||||
|
"""初始化 \n
|
||||||
|
:param ele: select 元素对象
|
||||||
|
"""
|
||||||
|
if ele.tag != 'select':
|
||||||
|
raise TypeError(f"Select only works on <select> elements, not on {ele.tag}")
|
||||||
|
|
||||||
|
from selenium.webdriver.support.select import Select as sl
|
||||||
|
self.inner_ele = ele
|
||||||
|
self.select_ele = sl(ele.inner_ele)
|
||||||
|
|
||||||
|
def __call__(self,
|
||||||
|
text_value_index: Union[str, int, list, tuple] = None,
|
||||||
|
para_type: str = 'text',
|
||||||
|
deselect: bool = False) -> bool:
|
||||||
|
"""选定或取消选定下拉列表中子元素 \n
|
||||||
|
:param text_value_index: 根据文本、值选或序号择选项,若允许多选,传入list或tuple可多选
|
||||||
|
:param para_type: 参数类型,可选 'text'、'value'、'index'
|
||||||
|
:param deselect: 是否取消选择
|
||||||
|
:return: 是否选择成功
|
||||||
|
"""
|
||||||
|
return self.select(text_value_index, para_type, deselect)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def is_multi(self) -> bool:
|
||||||
|
"""返回是否多选表单"""
|
||||||
|
return self.select_ele.is_multiple
|
||||||
|
|
||||||
|
@property
|
||||||
|
def options(self) -> List[DriverElement]:
|
||||||
|
"""返回所有选项元素组成的列表"""
|
||||||
|
return self.inner_ele.eles('tag:option')
|
||||||
|
|
||||||
|
@property
|
||||||
|
def selected_option(self) -> Union[DriverElement, None]:
|
||||||
|
"""返回第一个被选中的option元素 \n
|
||||||
|
:return: DriverElement对象或None
|
||||||
|
"""
|
||||||
|
ele = self.inner_ele.run_script('return arguments[0].options[arguments[0].selectedIndex];')
|
||||||
|
return None if ele is None else DriverElement(ele, self.inner_ele.page)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def selected_options(self) -> List[DriverElement]:
|
||||||
|
"""返回所有被选中的option元素列表 \n
|
||||||
|
:return: DriverElement对象组成的列表
|
||||||
|
"""
|
||||||
|
return [x for x in self.options if x.is_selected()]
|
||||||
|
|
||||||
|
def clear(self) -> None:
|
||||||
|
"""清除所有已选项"""
|
||||||
|
self.select_ele.deselect_all()
|
||||||
|
|
||||||
|
def select(self,
|
||||||
|
text_value_index: Union[str, int, list, tuple] = None,
|
||||||
|
para_type: str = 'text',
|
||||||
|
deselect: bool = False) -> bool:
|
||||||
|
"""选定或取消选定下拉列表中子元素 \n
|
||||||
|
:param text_value_index: 根据文本、值选或序号择选项,若允许多选,传入list或tuple可多选
|
||||||
|
:param para_type: 参数类型,可选 'text'、'value'、'index'
|
||||||
|
:param deselect: 是否取消选择
|
||||||
|
:return: 是否选择成功
|
||||||
|
"""
|
||||||
|
if not self.is_multi and isinstance(text_value_index, (list, tuple)):
|
||||||
|
raise TypeError('单选下拉列表不能传入list和tuple')
|
||||||
|
|
||||||
|
if isinstance(text_value_index, (str, int)):
|
||||||
|
try:
|
||||||
|
if para_type == 'text':
|
||||||
|
if deselect:
|
||||||
|
self.select_ele.deselect_by_visible_text(text_value_index)
|
||||||
|
else:
|
||||||
|
self.select_ele.select_by_visible_text(text_value_index)
|
||||||
|
elif para_type == 'value':
|
||||||
|
if deselect:
|
||||||
|
self.select_ele.deselect_by_value(text_value_index)
|
||||||
|
else:
|
||||||
|
self.select_ele.select_by_value(text_value_index)
|
||||||
|
elif para_type == 'index':
|
||||||
|
if deselect:
|
||||||
|
self.select_ele.deselect_by_index(int(text_value_index))
|
||||||
|
else:
|
||||||
|
self.select_ele.select_by_index(int(text_value_index))
|
||||||
|
else:
|
||||||
|
raise ValueError('para_type参数只能传入"text"、"value"或"index"。')
|
||||||
|
return True
|
||||||
|
|
||||||
|
except:
|
||||||
|
return False
|
||||||
|
|
||||||
|
elif isinstance(text_value_index, (list, tuple)):
|
||||||
|
self.select_multi(text_value_index, para_type, deselect)
|
||||||
|
|
||||||
|
else:
|
||||||
|
raise TypeError('只能传入str、int、list和tuple类型。')
|
||||||
|
|
||||||
|
def select_multi(self,
|
||||||
|
text_value_index: Union[list, tuple] = None,
|
||||||
|
para_type: str = 'text',
|
||||||
|
deselect: bool = False) -> Union[bool, list]:
|
||||||
|
"""选定或取消选定下拉列表中多个子元素 \n
|
||||||
|
:param text_value_index: 根据文本、值选或序号择选多项
|
||||||
|
:param para_type: 参数类型,可选 'text'、'value'、'index'
|
||||||
|
:param deselect: 是否取消选择
|
||||||
|
:return: 是否选择成功
|
||||||
|
"""
|
||||||
|
if para_type not in ('text', 'value', 'index'):
|
||||||
|
raise ValueError('para_type参数只能传入“text”、“value”或“index”')
|
||||||
|
|
||||||
|
if isinstance(text_value_index, (list, tuple)):
|
||||||
|
fail_list = []
|
||||||
|
for i in text_value_index:
|
||||||
|
if not isinstance(i, (int, str)):
|
||||||
|
raise TypeError('列表只能由str或int组成')
|
||||||
|
|
||||||
|
if not self.select(i, para_type, deselect):
|
||||||
|
fail_list.append(i)
|
||||||
|
|
||||||
|
return fail_list or True
|
||||||
|
|
||||||
|
else:
|
||||||
|
raise TypeError('只能传入list或tuple类型。')
|
||||||
|
|
||||||
|
def deselect(self,
|
||||||
|
text_value_index: Union[str, int, list, tuple] = None,
|
||||||
|
para_type: str = 'text') -> bool:
|
||||||
|
"""取消选定下拉列表中子元素 \n
|
||||||
|
:param text_value_index: 根据文本、值选或序号择选项,若允许多选,传入list或tuple可多选
|
||||||
|
:param para_type: 参数类型,可选 'text'、'value'、'index'
|
||||||
|
:return: 是否选择成功
|
||||||
|
"""
|
||||||
|
return self.select(text_value_index, para_type, True)
|
||||||
|
|
||||||
|
def deselect_multi(self,
|
||||||
|
text_value_index: Union[list, tuple] = None,
|
||||||
|
para_type: str = 'text') -> Union[bool, list]:
|
||||||
|
"""取消选定下拉列表中多个子元素 \n
|
||||||
|
:param text_value_index: 根据文本、值选或序号取消择选多项
|
||||||
|
:param para_type: 参数类型,可选 'text'、'value'、'index'
|
||||||
|
:return: 是否选择成功
|
||||||
|
"""
|
||||||
|
return self.select_multi(text_value_index, para_type, True)
|
||||||
|
|
||||||
|
def invert(self) -> None:
|
||||||
|
"""反选"""
|
||||||
|
if not self.is_multi:
|
||||||
|
raise NotImplementedError("You may only deselect options of a multi-select")
|
||||||
|
|
||||||
|
for i in self.options:
|
||||||
|
i.click()
|
||||||
|
@ -5,15 +5,15 @@
|
|||||||
@File : driver_page.py
|
@File : driver_page.py
|
||||||
"""
|
"""
|
||||||
from glob import glob
|
from glob import glob
|
||||||
from pathlib import Path
|
|
||||||
from time import time, sleep
|
|
||||||
from typing import Union, List, Any, Tuple
|
|
||||||
from urllib.parse import quote
|
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
from selenium.common.exceptions import NoAlertPresentException
|
from selenium.common.exceptions import NoAlertPresentException
|
||||||
from selenium.webdriver.chrome.webdriver import WebDriver
|
from selenium.webdriver.chrome.webdriver import WebDriver
|
||||||
from selenium.webdriver.remote.webelement import WebElement
|
from selenium.webdriver.remote.webelement import WebElement
|
||||||
from selenium.webdriver.support.wait import WebDriverWait
|
from selenium.webdriver.support.wait import WebDriverWait
|
||||||
|
from time import time, sleep
|
||||||
|
from typing import Union, List, Any, Tuple
|
||||||
|
from urllib.parse import quote
|
||||||
|
|
||||||
from .common import str_to_loc, get_available_file_name, translate_loc, format_html
|
from .common import str_to_loc, get_available_file_name, translate_loc, format_html
|
||||||
from .driver_element import DriverElement, execute_driver_find
|
from .driver_element import DriverElement, execute_driver_find
|
||||||
@ -30,6 +30,9 @@ class DriverPage(object):
|
|||||||
self._url_available = None
|
self._url_available = None
|
||||||
self._wait = None
|
self._wait = None
|
||||||
|
|
||||||
|
self.retry_times = 3
|
||||||
|
self.retry_interval = 2
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def driver(self) -> WebDriver:
|
def driver(self) -> WebDriver:
|
||||||
return self._driver
|
return self._driver
|
||||||
@ -97,24 +100,37 @@ class DriverPage(object):
|
|||||||
:param show_errmsg: 是否抛出异常
|
:param show_errmsg: 是否抛出异常
|
||||||
:return: 是否成功
|
:return: 是否成功
|
||||||
"""
|
"""
|
||||||
self.driver.get(to_url)
|
err = None
|
||||||
is_ok = self.check_page()
|
is_ok = False
|
||||||
while times and is_ok is False:
|
|
||||||
sleep(interval)
|
for _ in range(times + 1):
|
||||||
self.driver.get(to_url)
|
try:
|
||||||
is_ok = self.check_page()
|
self.driver.get(to_url)
|
||||||
times -= 1
|
go_ok = True
|
||||||
|
except Exception as e:
|
||||||
|
err = e
|
||||||
|
go_ok = False
|
||||||
|
|
||||||
|
is_ok = self.check_page() if go_ok else False
|
||||||
|
|
||||||
|
if is_ok is not False:
|
||||||
|
break
|
||||||
|
|
||||||
|
if _ < times:
|
||||||
|
sleep(interval)
|
||||||
|
print(f'重试 {to_url}')
|
||||||
|
|
||||||
if is_ok is False and show_errmsg:
|
if is_ok is False and show_errmsg:
|
||||||
raise ConnectionError('Connect error.')
|
raise err if err is not None else ConnectionError('Connect error.')
|
||||||
|
|
||||||
return is_ok
|
return is_ok
|
||||||
|
|
||||||
def get(self,
|
def get(self,
|
||||||
url: str,
|
url: str,
|
||||||
go_anyway: bool = False,
|
go_anyway: bool = False,
|
||||||
show_errmsg: bool = False,
|
show_errmsg: bool = False,
|
||||||
retry: int = 0,
|
retry: int = None,
|
||||||
interval: float = 1,
|
interval: float = None) -> Union[None, bool]:
|
||||||
) -> Union[None, bool]:
|
|
||||||
"""访问url \n
|
"""访问url \n
|
||||||
:param url: 目标url
|
:param url: 目标url
|
||||||
:param go_anyway: 若目标url与当前url一致,是否强制跳转
|
:param go_anyway: 若目标url与当前url一致,是否强制跳转
|
||||||
@ -123,11 +139,21 @@ class DriverPage(object):
|
|||||||
:param interval: 重试间隔(秒)
|
:param interval: 重试间隔(秒)
|
||||||
:return: 目标url是否可用
|
:return: 目标url是否可用
|
||||||
"""
|
"""
|
||||||
to_url = quote(url, safe='/:&?=%;#@')
|
to_url = quote(url, safe='/:&?=%;#@+!')
|
||||||
|
retry = int(retry) if retry is not None else int(self.retry_times)
|
||||||
|
interval = int(interval) if interval is not None else int(self.retry_interval)
|
||||||
|
|
||||||
if not url or (not go_anyway and self.url == to_url):
|
if not url or (not go_anyway and self.url == to_url):
|
||||||
return
|
return
|
||||||
|
|
||||||
self._url = to_url
|
self._url = to_url
|
||||||
self._url_available = self._try_to_connect(to_url, times=retry, interval=interval, show_errmsg=show_errmsg)
|
self._url_available = self._try_to_connect(to_url, times=retry, interval=interval, show_errmsg=show_errmsg)
|
||||||
|
|
||||||
|
try:
|
||||||
|
self._driver.execute_script('Object.defineProperty(navigator,"webdriver",{get:() => Chrome,});')
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
return self._url_available
|
return self._url_available
|
||||||
|
|
||||||
def ele(self,
|
def ele(self,
|
||||||
@ -189,7 +215,6 @@ class DriverPage(object):
|
|||||||
else:
|
else:
|
||||||
raise ValueError('Argument loc_or_str can only be tuple, str, DriverElement, DriverElement.')
|
raise ValueError('Argument loc_or_str can only be tuple, str, DriverElement, DriverElement.')
|
||||||
|
|
||||||
timeout = timeout or self.timeout
|
|
||||||
return execute_driver_find(self, loc_or_ele, mode, timeout)
|
return execute_driver_find(self, loc_or_ele, mode, timeout)
|
||||||
|
|
||||||
def eles(self,
|
def eles(self,
|
||||||
@ -239,7 +264,7 @@ class DriverPage(object):
|
|||||||
:param timeout: 等待超时时间
|
:param timeout: 等待超时时间
|
||||||
:return: 等待是否成功
|
:return: 等待是否成功
|
||||||
"""
|
"""
|
||||||
if mode.lower() not in ['del', 'display', 'hidden']:
|
if mode.lower() not in ('del', 'display', 'hidden'):
|
||||||
raise ValueError('Argument mode can only be "del", "display", "hidden"')
|
raise ValueError('Argument mode can only be "del", "display", "hidden"')
|
||||||
|
|
||||||
from selenium.webdriver.support.wait import WebDriverWait
|
from selenium.webdriver.support.wait import WebDriverWait
|
||||||
@ -396,18 +421,18 @@ class DriverPage(object):
|
|||||||
tab = self.driver.window_handles[tab] if isinstance(tab, int) else tab
|
tab = self.driver.window_handles[tab] if isinstance(tab, int) else tab
|
||||||
self.driver.switch_to.window(tab)
|
self.driver.switch_to.window(tab)
|
||||||
|
|
||||||
def to_iframe(self, loc_or_ele: Union[int, str, tuple, WebElement, DriverElement] = 'main') -> None:
|
def to_frame(self, loc_or_ele: Union[int, str, tuple, WebElement, DriverElement] = 'main') -> None:
|
||||||
"""跳转到iframe \n
|
"""跳转到frame \n
|
||||||
可接收iframe序号(0开始)、id或name、查询字符串、loc元组、WebElement对象、DriverElement对象, \n
|
可接收frame序号(0开始)、id或name、查询字符串、loc元组、WebElement对象、DriverElement对象, \n
|
||||||
传入'main'跳到最高层,传入'parent'跳到上一层 \n
|
传入'main'跳到最高层,传入'parent'跳到上一层 \n
|
||||||
示例: \n
|
示例: \n
|
||||||
to_iframe('tag:iframe') - 通过传入iframe的查询字符串定位 \n
|
to_frame('tag:iframe') - 通过传入frame的查询字符串定位 \n
|
||||||
to_iframe('iframe_id') - 通过iframe的id属性定位 \n
|
to_frame('iframe_id') - 通过frame的id属性定位 \n
|
||||||
to_iframe('iframe_name') - 通过iframe的name属性定位 \n
|
to_frame('iframe_name') - 通过frame的name属性定位 \n
|
||||||
to_iframe(iframe_element) - 通过传入元素对象定位 \n
|
to_frame(iframe_element) - 通过传入元素对象定位 \n
|
||||||
to_iframe(0) - 通过iframe的序号定位 \n
|
to_frame(0) - 通过frame的序号定位 \n
|
||||||
to_iframe('main') - 跳到最高层 \n
|
to_frame('main') - 跳到最高层 \n
|
||||||
to_iframe('parent') - 跳到上一层 \n
|
to_frame('parent') - 跳到上一层 \n
|
||||||
:param loc_or_ele: iframe的定位信息
|
:param loc_or_ele: iframe的定位信息
|
||||||
:return: None
|
:return: None
|
||||||
"""
|
"""
|
||||||
@ -425,7 +450,7 @@ class DriverPage(object):
|
|||||||
self.driver.switch_to.parent_frame()
|
self.driver.switch_to.parent_frame()
|
||||||
|
|
||||||
# 传入id或name
|
# 传入id或name
|
||||||
elif ':' not in loc_or_ele and '=' not in loc_or_ele:
|
elif ':' not in loc_or_ele and '=' not in loc_or_ele and not loc_or_ele.startswith(('#', '.')):
|
||||||
self.driver.switch_to.frame(loc_or_ele)
|
self.driver.switch_to.frame(loc_or_ele)
|
||||||
|
|
||||||
# 传入控制字符串
|
# 传入控制字符串
|
||||||
@ -466,8 +491,8 @@ class DriverPage(object):
|
|||||||
ele.run_script("arguments[0].scrollIntoView();")
|
ele.run_script("arguments[0].scrollIntoView();")
|
||||||
|
|
||||||
def scroll_to(self, mode: str = 'bottom', pixel: int = 300) -> None:
|
def scroll_to(self, mode: str = 'bottom', pixel: int = 300) -> None:
|
||||||
"""按参数指示方式滚动页面 \n
|
"""按参数指示方式滚动页面 \n
|
||||||
:param mode: 可选滚动方向:'top', 'bottom', 'rightmost', 'leftmost', 'up', 'down', 'left', 'right'
|
:param mode: 可选滚动方向:'top', 'bottom', 'half', 'rightmost', 'leftmost', 'up', 'down', 'left', 'right'
|
||||||
:param pixel: 滚动的像素
|
:param pixel: 滚动的像素
|
||||||
:return: None
|
:return: None
|
||||||
"""
|
"""
|
||||||
@ -478,6 +503,10 @@ class DriverPage(object):
|
|||||||
self.driver.execute_script(
|
self.driver.execute_script(
|
||||||
"window.scrollTo(document.documentElement.scrollLeft,document.body.scrollHeight);")
|
"window.scrollTo(document.documentElement.scrollLeft,document.body.scrollHeight);")
|
||||||
|
|
||||||
|
elif mode == 'half':
|
||||||
|
self.driver.execute_script(
|
||||||
|
"window.scrollTo(document.documentElement.scrollLeft,document.body.scrollHeight/2);")
|
||||||
|
|
||||||
elif mode == 'rightmost':
|
elif mode == 'rightmost':
|
||||||
self.driver.execute_script("window.scrollTo(document.body.scrollWidth,document.documentElement.scrollTop);")
|
self.driver.execute_script("window.scrollTo(document.body.scrollWidth,document.documentElement.scrollTop);")
|
||||||
|
|
||||||
@ -498,7 +527,7 @@ class DriverPage(object):
|
|||||||
|
|
||||||
else:
|
else:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
"Argument mode can only be 'top', 'bottom', 'rightmost', 'leftmost', 'up', 'down', 'left', 'right'.")
|
"Argument mode can only be 'top', 'bottom', 'half', 'rightmost', 'leftmost', 'up', 'down', 'left', 'right'.")
|
||||||
|
|
||||||
def refresh(self) -> None:
|
def refresh(self) -> None:
|
||||||
"""刷新当前页面"""
|
"""刷新当前页面"""
|
||||||
@ -509,16 +538,19 @@ class DriverPage(object):
|
|||||||
self.driver.back()
|
self.driver.back()
|
||||||
|
|
||||||
def set_window_size(self, x: int = None, y: int = None) -> None:
|
def set_window_size(self, x: int = None, y: int = None) -> None:
|
||||||
"""设置浏览器窗口大小,默认最大化 \n
|
"""设置浏览器窗口大小,默认最大化,任一参数为0最小化 \n
|
||||||
:param x: 浏览器窗口高
|
:param x: 浏览器窗口高
|
||||||
:param y: 浏览器窗口宽
|
:param y: 浏览器窗口宽
|
||||||
:return: None
|
:return: None
|
||||||
"""
|
"""
|
||||||
if not x and not y:
|
if x is None and y is None:
|
||||||
self.driver.maximize_window()
|
self.driver.maximize_window()
|
||||||
|
|
||||||
|
elif x == 0 or y == 0:
|
||||||
|
self.driver.minimize_window()
|
||||||
|
|
||||||
else:
|
else:
|
||||||
if x <= 0 or y <= 0:
|
if x < 0 or y < 0:
|
||||||
raise ValueError('Arguments x and y must greater than 0.')
|
raise ValueError('Arguments x and y must greater than 0.')
|
||||||
|
|
||||||
new_x = x or self.driver.get_window_size()['width']
|
new_x = x or self.driver.get_window_size()['width']
|
||||||
|
@ -5,12 +5,12 @@
|
|||||||
@File : driver_page.py
|
@File : driver_page.py
|
||||||
"""
|
"""
|
||||||
from os import popen
|
from os import popen
|
||||||
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from pprint import pprint
|
from pprint import pprint
|
||||||
from re import search as RE_SEARCH
|
from re import search as RE_SEARCH
|
||||||
from typing import Union
|
|
||||||
|
|
||||||
from selenium import webdriver
|
from selenium import webdriver
|
||||||
|
from typing import Union
|
||||||
|
|
||||||
from DrissionPage.config import OptionsManager, DriverOptions
|
from DrissionPage.config import OptionsManager, DriverOptions
|
||||||
from DrissionPage.drission import Drission
|
from DrissionPage.drission import Drission
|
||||||
@ -190,27 +190,33 @@ def check_driver_version(driver_path: str = None, chrome_path: str = None) -> bo
|
|||||||
|
|
||||||
|
|
||||||
# -------------------------自动识别chrome版本号并下载对应driver------------------------
|
# -------------------------自动识别chrome版本号并下载对应driver------------------------
|
||||||
def get_match_driver(ini_path: str = None,
|
def get_match_driver(ini_path: Union[str, None] = 'default',
|
||||||
save_path: str = None,
|
save_path: str = None,
|
||||||
chrome_path: str = None) -> None:
|
chrome_path: str = None,
|
||||||
|
show_msg: bool = True,
|
||||||
|
check_version: bool = True) -> Union[str, None]:
|
||||||
"""自动识别chrome版本并下载匹配的driver \n
|
"""自动识别chrome版本并下载匹配的driver \n
|
||||||
:param ini_path: 要读取和修改的ini文件路径
|
:param ini_path: 要读取和修改的ini文件路径
|
||||||
:param save_path: chromedriver保存路径
|
:param save_path: chromedriver保存路径
|
||||||
:param chrome_path: 指定chrome.exe位置
|
:param chrome_path: 指定chrome.exe位置
|
||||||
|
:param show_msg: 是否打印信息
|
||||||
|
:param check_version: 是否检查版本匹配
|
||||||
:return: None
|
:return: None
|
||||||
"""
|
"""
|
||||||
save_path = save_path or str(Path(__file__).parent)
|
save_path = save_path or str(Path(__file__).parent)
|
||||||
|
|
||||||
chrome_path = chrome_path or _get_chrome_path(ini_path)
|
chrome_path = chrome_path or _get_chrome_path(ini_path, show_msg)
|
||||||
chrome_path = Path(chrome_path).absolute() if chrome_path else None
|
chrome_path = Path(chrome_path).absolute() if chrome_path else None
|
||||||
print('chrome.exe路径', chrome_path, '\n')
|
if show_msg:
|
||||||
|
print('chrome.exe路径', chrome_path)
|
||||||
|
|
||||||
ver = _get_chrome_version(chrome_path)
|
ver = _get_chrome_version(str(chrome_path))
|
||||||
print('version', ver, '\n')
|
if show_msg:
|
||||||
|
print('version', ver)
|
||||||
|
|
||||||
zip_path = _download_driver(ver, save_path)
|
zip_path = _download_driver(ver, save_path, show_msg=show_msg)
|
||||||
|
|
||||||
if not zip_path:
|
if not zip_path and show_msg:
|
||||||
print('没有找到对应版本的driver。')
|
print('没有找到对应版本的driver。')
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@ -218,52 +224,89 @@ def get_match_driver(ini_path: str = None,
|
|||||||
except TypeError:
|
except TypeError:
|
||||||
driver_path = None
|
driver_path = None
|
||||||
|
|
||||||
print('\n解压路径', driver_path, '\n')
|
if show_msg:
|
||||||
|
print('解压路径', driver_path)
|
||||||
|
|
||||||
if driver_path:
|
if driver_path:
|
||||||
Path(zip_path).unlink()
|
Path(zip_path).unlink()
|
||||||
set_paths(driver_path=driver_path, chrome_path=str(chrome_path), ini_path=ini_path, check_version=False)
|
if ini_path:
|
||||||
|
set_paths(driver_path=driver_path, chrome_path=str(chrome_path), ini_path=ini_path, check_version=False)
|
||||||
|
|
||||||
if not check_driver_version(driver_path, chrome_path):
|
if check_version:
|
||||||
print('获取失败,请手动配置。')
|
if not check_driver_version(driver_path, chrome_path) and show_msg:
|
||||||
|
print('获取失败,请手动配置。')
|
||||||
else:
|
else:
|
||||||
print('获取失败,请手动配置。')
|
if show_msg:
|
||||||
|
print('获取失败,请手动配置。')
|
||||||
|
|
||||||
|
return driver_path
|
||||||
|
|
||||||
|
|
||||||
def _get_chrome_path(ini_path: str = None) -> Union[str, None]:
|
def _get_chrome_path(ini_path: str = None,
|
||||||
|
show_msg: bool = True,
|
||||||
|
from_ini: bool = True,
|
||||||
|
from_regedit: bool = True,
|
||||||
|
from_system_path: bool = True, ) -> Union[str, None]:
|
||||||
"""从ini文件或系统变量中获取chrome.exe的路径 \n
|
"""从ini文件或系统变量中获取chrome.exe的路径 \n
|
||||||
:param ini_path: ini文件路径
|
:param ini_path: ini文件路径
|
||||||
:return: chrome.exe路径
|
:return: chrome.exe路径
|
||||||
"""
|
"""
|
||||||
# -----------从ini文件中获取--------------
|
# -----------从ini文件中获取--------------
|
||||||
try:
|
if ini_path and from_ini:
|
||||||
path = OptionsManager(ini_path).chrome_options['binary_location']
|
try:
|
||||||
except KeyError:
|
path = OptionsManager(ini_path).chrome_options['binary_location']
|
||||||
return None
|
except KeyError:
|
||||||
|
path = None
|
||||||
|
else:
|
||||||
|
path = None
|
||||||
|
|
||||||
if path and Path(path).is_file():
|
if path and Path(path).is_file():
|
||||||
print('ini文件中', end='')
|
print('ini文件中', end='')
|
||||||
return str(path)
|
return str(path)
|
||||||
|
|
||||||
# -----------从系统路径中获取--------------
|
# -----------从注册表中获取--------------
|
||||||
paths = popen('set path').read().lower()
|
if from_regedit:
|
||||||
r = RE_SEARCH(r'[^;]*chrome[^;]*', paths)
|
import winreg
|
||||||
|
try:
|
||||||
|
key = winreg.OpenKey(winreg.HKEY_LOCAL_MACHINE,
|
||||||
|
r'SOFTWARE\Microsoft\Windows\CurrentVersion\App Paths\chrome.exe',
|
||||||
|
reserved=0, access=winreg.KEY_READ)
|
||||||
|
k = winreg.EnumValue(key, 0)
|
||||||
|
winreg.CloseKey(key)
|
||||||
|
|
||||||
if r:
|
if show_msg:
|
||||||
path = Path(r.group(0)) if 'chrome.exe' in r.group(0) else Path(r.group(0)) / 'chrome.exe'
|
print('注册表中', end='')
|
||||||
|
|
||||||
if path.exists():
|
return k[1]
|
||||||
print('系统中', end='')
|
|
||||||
return str(path)
|
|
||||||
|
|
||||||
paths = paths.split(';')
|
except FileNotFoundError:
|
||||||
|
pass
|
||||||
|
|
||||||
for path in paths:
|
# -----------从系统变量中获取--------------
|
||||||
path = Path(path) / 'chrome.exe'
|
if from_system_path:
|
||||||
|
paths = popen('set path').read().lower()
|
||||||
|
r = RE_SEARCH(r'[^;]*chrome[^;]*', paths)
|
||||||
|
|
||||||
if path.exists():
|
if r:
|
||||||
print('系统中', end='')
|
path = Path(r.group(0)) if 'chrome.exe' in r.group(0) else Path(r.group(0)) / 'chrome.exe'
|
||||||
return str(path)
|
|
||||||
|
if path.exists():
|
||||||
|
if show_msg:
|
||||||
|
print('系统变量中', end='')
|
||||||
|
return str(path)
|
||||||
|
|
||||||
|
paths = paths.split(';')
|
||||||
|
|
||||||
|
for path in paths:
|
||||||
|
path = Path(path) / 'chrome.exe'
|
||||||
|
|
||||||
|
try:
|
||||||
|
if path.exists():
|
||||||
|
if show_msg:
|
||||||
|
print('系统变量中', end='')
|
||||||
|
return str(path)
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
def _get_chrome_version(path: str) -> Union[str, None]:
|
def _get_chrome_version(path: str) -> Union[str, None]:
|
||||||
@ -283,7 +326,7 @@ def _get_chrome_version(path: str) -> Union[str, None]:
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
def _download_driver(version: str, save_path: str = None) -> Union[str, None]:
|
def _download_driver(version: str, save_path: str = None, show_msg: bool = True) -> Union[str, None]:
|
||||||
"""根据传入的版本号到镜像网站查找,下载最相近的 \n
|
"""根据传入的版本号到镜像网站查找,下载最相近的 \n
|
||||||
:param version: 本地版本号
|
:param version: 本地版本号
|
||||||
:return: 保存地址
|
:return: 保存地址
|
||||||
@ -317,7 +360,7 @@ def _download_driver(version: str, save_path: str = None) -> Union[str, None]:
|
|||||||
if remote_ver:
|
if remote_ver:
|
||||||
url = f'https://cdn.npm.taobao.org/dist/chromedriver/{remote_ver}chromedriver_win32.zip'
|
url = f'https://cdn.npm.taobao.org/dist/chromedriver/{remote_ver}chromedriver_win32.zip'
|
||||||
save_path = save_path or Path(__file__).parent
|
save_path = save_path or Path(__file__).parent
|
||||||
result = page.download(url, save_path, file_exists='overwrite', show_msg=True)
|
result = page.download(url, save_path, file_exists='overwrite', show_msg=show_msg)
|
||||||
|
|
||||||
if result[0]:
|
if result[0]:
|
||||||
return result[1]
|
return result[1]
|
||||||
|
@ -47,7 +47,7 @@ class MixPage(Null, SessionPage, DriverPage):
|
|||||||
:param session_options: requests设置,没有传入drission参数时会用这个设置新建Drission对象
|
:param session_options: requests设置,没有传入drission参数时会用这个设置新建Drission对象
|
||||||
"""
|
"""
|
||||||
super().__init__()
|
super().__init__()
|
||||||
if drission in ['s', 'd', 'S', 'D']:
|
if drission in ('s', 'd', 'S', 'D'):
|
||||||
mode = drission.lower()
|
mode = drission.lower()
|
||||||
drission = None
|
drission = None
|
||||||
|
|
||||||
@ -58,6 +58,9 @@ class MixPage(Null, SessionPage, DriverPage):
|
|||||||
self._url_available = None
|
self._url_available = None
|
||||||
self._mode = mode
|
self._mode = mode
|
||||||
|
|
||||||
|
self.retry_times = 3
|
||||||
|
self.retry_interval = 2
|
||||||
|
|
||||||
if mode == 's':
|
if mode == 's':
|
||||||
self._driver = None
|
self._driver = None
|
||||||
self._session = True
|
self._session = True
|
||||||
@ -71,7 +74,7 @@ class MixPage(Null, SessionPage, DriverPage):
|
|||||||
loc_or_str: Union[Tuple[str, str], str, DriverElement, SessionElement, WebElement],
|
loc_or_str: Union[Tuple[str, str], str, DriverElement, SessionElement, WebElement],
|
||||||
mode: str = 'single',
|
mode: str = 'single',
|
||||||
timeout: float = None):
|
timeout: float = None):
|
||||||
return self.ele(loc_or_str, mode, timeout or self.timeout)
|
return self.ele(loc_or_str, mode, timeout)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def url(self) -> Union[str, None]:
|
def url(self) -> Union[str, None]:
|
||||||
@ -236,8 +239,8 @@ class MixPage(Null, SessionPage, DriverPage):
|
|||||||
data: dict = None,
|
data: dict = None,
|
||||||
go_anyway: bool = False,
|
go_anyway: bool = False,
|
||||||
show_errmsg: bool = False,
|
show_errmsg: bool = False,
|
||||||
retry: int = 2,
|
retry: int = None,
|
||||||
interval: float = 1,
|
interval: float = None,
|
||||||
**kwargs) -> Union[bool, None]:
|
**kwargs) -> Union[bool, None]:
|
||||||
"""用post方式跳转到url,会切换到s模式 \n
|
"""用post方式跳转到url,会切换到s模式 \n
|
||||||
:param url: 目标url
|
:param url: 目标url
|
||||||
@ -260,6 +263,8 @@ class MixPage(Null, SessionPage, DriverPage):
|
|||||||
post_data: dict = None,
|
post_data: dict = None,
|
||||||
show_msg: bool = False,
|
show_msg: bool = False,
|
||||||
show_errmsg: bool = False,
|
show_errmsg: bool = False,
|
||||||
|
retry: int = None,
|
||||||
|
interval: float = None,
|
||||||
**kwargs) -> Tuple[bool, str]:
|
**kwargs) -> Tuple[bool, str]:
|
||||||
"""下载一个文件 \n
|
"""下载一个文件 \n
|
||||||
d模式下下载前先同步cookies \n
|
d模式下下载前先同步cookies \n
|
||||||
@ -270,12 +275,16 @@ class MixPage(Null, SessionPage, DriverPage):
|
|||||||
:param post_data: post方式的数据
|
:param post_data: post方式的数据
|
||||||
:param show_msg: 是否显示下载信息
|
:param show_msg: 是否显示下载信息
|
||||||
:param show_errmsg: 是否显示和抛出异常
|
:param show_errmsg: 是否显示和抛出异常
|
||||||
|
:param retry: 重试次数
|
||||||
|
:param interval: 重试间隔时间
|
||||||
:param kwargs: 连接参数
|
:param kwargs: 连接参数
|
||||||
:return: 下载是否成功(bool)和状态信息(成功时信息为文件路径)的元组
|
:return: 下载是否成功(bool)和状态信息(成功时信息为文件路径)的元组
|
||||||
"""
|
"""
|
||||||
if self.mode == 'd':
|
if self.mode == 'd':
|
||||||
self.cookies_to_session()
|
self.cookies_to_session()
|
||||||
return super().download(file_url, goal_path, rename, file_exists, post_data, show_msg, show_errmsg, **kwargs)
|
|
||||||
|
return super().download(file_url, goal_path, rename, file_exists, post_data, show_msg, show_errmsg, retry,
|
||||||
|
interval, **kwargs)
|
||||||
|
|
||||||
# ----------------重写DriverPage的函数-----------------------
|
# ----------------重写DriverPage的函数-----------------------
|
||||||
|
|
||||||
@ -319,8 +328,8 @@ class MixPage(Null, SessionPage, DriverPage):
|
|||||||
url: str,
|
url: str,
|
||||||
go_anyway=False,
|
go_anyway=False,
|
||||||
show_errmsg: bool = False,
|
show_errmsg: bool = False,
|
||||||
retry: int = 2,
|
retry: int = None,
|
||||||
interval: float = 1,
|
interval: float = None,
|
||||||
**kwargs) -> Union[bool, None]:
|
**kwargs) -> Union[bool, None]:
|
||||||
"""跳转到一个url \n
|
"""跳转到一个url \n
|
||||||
跳转前先同步cookies,跳转后判断目标url是否可用
|
跳转前先同步cookies,跳转后判断目标url是否可用
|
||||||
@ -342,32 +351,39 @@ class MixPage(Null, SessionPage, DriverPage):
|
|||||||
mode: str = None,
|
mode: str = None,
|
||||||
timeout: float = None) \
|
timeout: float = None) \
|
||||||
-> Union[DriverElement, SessionElement, str, List[SessionElement], List[DriverElement]]:
|
-> Union[DriverElement, SessionElement, str, List[SessionElement], List[DriverElement]]:
|
||||||
"""返回页面中符合条件的元素、属性或节点文本,默认返回第一个 \n
|
"""返回页面中符合条件的元素、属性或节点文本,默认返回第一个 \n
|
||||||
示例: \n
|
示例: \n
|
||||||
- 接收到元素对象时: \n
|
- 接收到元素对象时: \n
|
||||||
返回元素对象对象 \n
|
返回元素对象对象 \n
|
||||||
- 用loc元组查找: \n
|
- 用loc元组查找: \n
|
||||||
ele.ele((By.CLASS_NAME, 'ele_class')) - 返回第一个class为ele_class的子元素 \n
|
ele.ele((By.CLASS_NAME, 'ele_class')) - 返回第一个class为ele_class的子元素 \n
|
||||||
- 用查询字符串查找: \n
|
- 用查询字符串查找: \n
|
||||||
查找方式:属性、tag name和属性、文本、xpath、css selector、id、class \n
|
查找方式:属性、tag name和属性、文本、xpath、css selector、id、class \n
|
||||||
@表示属性,.表示class,#表示id,=表示精确匹配,:表示模糊匹配,无控制字符串时默认搜索该字符串 \n
|
@表示属性,.表示class,#表示id,=表示精确匹配,:表示模糊匹配,无控制字符串时默认搜索该字符串 \n
|
||||||
page.ele('.ele_class') - 返回第一个 class 为 ele_class 的元素 \n
|
page.ele('.ele_class') - 返回第一个 class 为 ele_class 的元素 \n
|
||||||
page.ele('.:ele_class') - 返回第一个 class 中含有 ele_class 的元素 \n
|
page.ele('.:ele_class') - 返回第一个 class 中含有 ele_class 的元素 \n
|
||||||
page.ele('#ele_id') - 返回第一个 id 为 ele_id 的元素 \n
|
page.ele('#ele_id') - 返回第一个 id 为 ele_id 的元素 \n
|
||||||
page.ele('#:ele_id') - 返回第一个 id 中含有 ele_id 的元素 \n
|
page.ele('#:ele_id') - 返回第一个 id 中含有 ele_id 的元素 \n
|
||||||
page.ele('@class:ele_class') - 返回第一个class含有ele_class的元素 \n
|
page.ele('@class:ele_class') - 返回第一个class含有ele_class的元素 \n
|
||||||
page.ele('@name=ele_name') - 返回第一个name等于ele_name的元素 \n
|
page.ele('@name=ele_name') - 返回第一个name等于ele_name的元素 \n
|
||||||
page.ele('@placeholder') - 返回第一个带placeholder属性的元素 \n
|
page.ele('@placeholder') - 返回第一个带placeholder属性的元素 \n
|
||||||
page.ele('tag:p') - 返回第一个<p>元素 \n
|
page.ele('tag:p') - 返回第一个<p>元素 \n
|
||||||
page.ele('tag:div@class:ele_class') - 返回第一个class含有ele_class的div元素 \n
|
page.ele('tag:div@class:ele_class') - 返回第一个class含有ele_class的div元素 \n
|
||||||
page.ele('tag:div@class=ele_class') - 返回第一个class等于ele_class的div元素 \n
|
page.ele('tag:div@class=ele_class') - 返回第一个class等于ele_class的div元素 \n
|
||||||
page.ele('tag:div@text():some_text') - 返回第一个文本含有some_text的div元素 \n
|
page.ele('tag:div@text():some_text') - 返回第一个文本含有some_text的div元素 \n
|
||||||
page.ele('tag:div@text()=some_text') - 返回第一个文本等于some_text的div元素 \n
|
page.ele('tag:div@text()=some_text') - 返回第一个文本等于some_text的div元素 \n
|
||||||
page.ele('text:some_text') - 返回第一个文本含有some_text的元素 \n
|
page.ele('text:some_text') - 返回第一个文本含有some_text的元素 \n
|
||||||
page.ele('some_text') - 返回第一个文本含有some_text的元素(等价于上一行) \n
|
page.ele('some_text') - 返回第一个文本含有some_text的元素(等价于上一行) \n
|
||||||
page.ele('text=some_text') - 返回第一个文本等于some_text的元素 \n
|
page.ele('text=some_text') - 返回第一个文本等于some_text的元素 \n
|
||||||
page.ele('xpath://div[@class="ele_class"]') - 返回第一个符合xpath的元素 \n
|
page.ele('xpath://div[@class="ele_class"]') - 返回第一个符合xpath的元素 \n
|
||||||
page.ele('css:div.ele_class') - 返回第一个符合css selector的元素 \n
|
page.ele('css:div.ele_class') - 返回第一个符合css selector的元素 \n
|
||||||
|
- 查询字符串还有最精简模式,用x代替xpath、c代替css、t代替tag、tx代替text: \n
|
||||||
|
page.ele('x://div[@class="ele_class"]') - 等同于 page.ele('xpath://div[@class="ele_class"]') \n
|
||||||
|
page.ele('c:div.ele_class') - 等同于 page.ele('css:div.ele_class') \n
|
||||||
|
page.ele('t:div') - 等同于 page.ele('tag:div') \n
|
||||||
|
page.ele('t:div@tx()=some_text') - 等同于 page.ele('tag:div@text()=some_text') \n
|
||||||
|
page.ele('tx:some_text') - 等同于 page.ele('text:some_text') \n
|
||||||
|
page.ele('tx=some_text') - 等同于 page.ele('text=some_text')
|
||||||
:param loc_or_ele: 元素的定位信息,可以是元素对象,loc元组,或查询字符串
|
:param loc_or_ele: 元素的定位信息,可以是元素对象,loc元组,或查询字符串
|
||||||
:param mode: 'single' 或 'all‘,对应查找一个或全部
|
:param mode: 'single' 或 'all‘,对应查找一个或全部
|
||||||
:param timeout: 查找元素超时时间,d模式专用
|
:param timeout: 查找元素超时时间,d模式专用
|
||||||
@ -376,36 +392,42 @@ class MixPage(Null, SessionPage, DriverPage):
|
|||||||
if self._mode == 's':
|
if self._mode == 's':
|
||||||
return super().ele(loc_or_ele, mode=mode)
|
return super().ele(loc_or_ele, mode=mode)
|
||||||
elif self._mode == 'd':
|
elif self._mode == 'd':
|
||||||
timeout = timeout or self.timeout
|
|
||||||
return super(SessionPage, self).ele(loc_or_ele, mode=mode, timeout=timeout)
|
return super(SessionPage, self).ele(loc_or_ele, mode=mode, timeout=timeout)
|
||||||
|
|
||||||
def eles(self,
|
def eles(self,
|
||||||
loc_or_str: Union[Tuple[str, str], str],
|
loc_or_str: Union[Tuple[str, str], str],
|
||||||
timeout: float = None) -> Union[List[DriverElement], List[SessionElement]]:
|
timeout: float = None) -> Union[List[DriverElement], List[SessionElement]]:
|
||||||
"""返回页面中所有符合条件的元素、属性或节点文本 \n
|
"""返回页面中所有符合条件的元素、属性或节点文本 \n
|
||||||
示例: \n
|
示例: \n
|
||||||
- 用loc元组查找: \n
|
- 用loc元组查找: \n
|
||||||
page.eles((By.CLASS_NAME, 'ele_class')) - 返回所有class为ele_class的元素 \n
|
page.eles((By.CLASS_NAME, 'ele_class')) - 返回所有class为ele_class的元素 \n
|
||||||
- 用查询字符串查找: \n
|
- 用查询字符串查找: \n
|
||||||
查找方式:属性、tag name和属性、文本、xpath、css selector、id、class \n
|
查找方式:属性、tag name和属性、文本、xpath、css selector、id、class \n
|
||||||
@表示属性,.表示class,#表示id,=表示精确匹配,:表示模糊匹配,无控制字符串时默认搜索该字符串 \n
|
@表示属性,.表示class,#表示id,=表示精确匹配,:表示模糊匹配,无控制字符串时默认搜索该字符串 \n
|
||||||
page.eles('.ele_class') - 返回所有 class 为 ele_class 的元素 \n
|
page.eles('.ele_class') - 返回所有 class 为 ele_class 的元素 \n
|
||||||
page.eles('.:ele_class') - 返回所有 class 中含有 ele_class 的元素 \n
|
page.eles('.:ele_class') - 返回所有 class 中含有 ele_class 的元素 \n
|
||||||
page.eles('#ele_id') - 返回所有 id 为 ele_id 的元素 \n
|
page.eles('#ele_id') - 返回所有 id 为 ele_id 的元素 \n
|
||||||
page.eles('#:ele_id') - 返回所有 id 中含有 ele_id 的元素 \n
|
page.eles('#:ele_id') - 返回所有 id 中含有 ele_id 的元素 \n
|
||||||
page.eles('@class:ele_class') - 返回所有class含有ele_class的元素 \n
|
page.eles('@class:ele_class') - 返回所有class含有ele_class的元素 \n
|
||||||
page.eles('@name=ele_name') - 返回所有name等于ele_name的元素 \n
|
page.eles('@name=ele_name') - 返回所有name等于ele_name的元素 \n
|
||||||
page.eles('@placeholder') - 返回所有带placeholder属性的元素 \n
|
page.eles('@placeholder') - 返回所有带placeholder属性的元素 \n
|
||||||
page.eles('tag:p') - 返回所有<p>元素 \n
|
page.eles('tag:p') - 返回所有<p>元素 \n
|
||||||
page.eles('tag:div@class:ele_class') - 返回所有class含有ele_class的div元素 \n
|
page.eles('tag:div@class:ele_class') - 返回所有class含有ele_class的div元素 \n
|
||||||
page.eles('tag:div@class=ele_class') - 返回所有class等于ele_class的div元素 \n
|
page.eles('tag:div@class=ele_class') - 返回所有class等于ele_class的div元素 \n
|
||||||
page.eles('tag:div@text():some_text') - 返回所有文本含有some_text的div元素 \n
|
page.eles('tag:div@text():some_text') - 返回所有文本含有some_text的div元素 \n
|
||||||
page.eles('tag:div@text()=some_text') - 返回所有文本等于some_text的div元素 \n
|
page.eles('tag:div@text()=some_text') - 返回所有文本等于some_text的div元素 \n
|
||||||
page.eles('text:some_text') - 返回所有文本含有some_text的元素 \n
|
page.eles('text:some_text') - 返回所有文本含有some_text的元素 \n
|
||||||
page.eles('some_text') - 返回所有文本含有some_text的元素(等价于上一行) \n
|
page.eles('some_text') - 返回所有文本含有some_text的元素(等价于上一行) \n
|
||||||
page.eles('text=some_text') - 返回所有文本等于some_text的元素 \n
|
page.eles('text=some_text') - 返回所有文本等于some_text的元素 \n
|
||||||
page.eles('xpath://div[@class="ele_class"]') - 返回所有符合xpath的元素 \n
|
page.eles('xpath://div[@class="ele_class"]') - 返回所有符合xpath的元素 \n
|
||||||
page.eles('css:div.ele_class') - 返回所有符合css selector的元素 \n
|
page.eles('css:div.ele_class') - 返回所有符合css selector的元素 \n
|
||||||
|
- 查询字符串还有最精简模式,用x代替xpath、c代替css、t代替tag、tx代替text: \n
|
||||||
|
page.eles('x://div[@class="ele_class"]') - 等同于 page.eles('xpath://div[@class="ele_class"]') \n
|
||||||
|
page.eles('c:div.ele_class') - 等同于 page.eles('css:div.ele_class') \n
|
||||||
|
page.eles('t:div') - 等同于 page.eles('tag:div') \n
|
||||||
|
page.eles('t:div@tx()=some_text') - 等同于 page.eles('tag:div@text()=some_text') \n
|
||||||
|
page.eles('tx:some_text') - 等同于 page.eles('text:some_text') \n
|
||||||
|
page.eles('tx=some_text') - 等同于 page.eles('text=some_text')
|
||||||
:param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串
|
:param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串
|
||||||
:param timeout: 查找元素超时时间,d模式专用
|
:param timeout: 查找元素超时时间,d模式专用
|
||||||
:return: 元素对象或属性、文本节点文本组成的列表
|
:return: 元素对象或属性、文本节点文本组成的列表
|
||||||
|
@ -36,9 +36,8 @@ class SessionElement(DrissionElement):
|
|||||||
@property
|
@property
|
||||||
def html(self) -> str:
|
def html(self) -> str:
|
||||||
"""返回元素outerHTML文本"""
|
"""返回元素outerHTML文本"""
|
||||||
# tostring()会把跟紧元素的文本节点也带上,因此要去掉
|
|
||||||
html = format_html(tostring(self._inner_ele, method="html").decode())
|
html = format_html(tostring(self._inner_ele, method="html").decode())
|
||||||
return html[:html.rfind('>') + 1]
|
return html[:html.rfind('>') + 1] # tostring()会把跟紧元素的文本节点也带上,因此要去掉
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def inner_html(self) -> str:
|
def inner_html(self) -> str:
|
||||||
@ -46,6 +45,48 @@ class SessionElement(DrissionElement):
|
|||||||
r = re.match(r'<.*?>(.*)</.*?>', self.html, flags=re.DOTALL)
|
r = re.match(r'<.*?>(.*)</.*?>', self.html, flags=re.DOTALL)
|
||||||
return '' if not r else r.group(1)
|
return '' if not r else r.group(1)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def text(self) -> str:
|
||||||
|
"""返回元素内所有文本"""
|
||||||
|
|
||||||
|
# 为尽量保证与浏览器结果一致,弄得比较复杂
|
||||||
|
def get_node(ele, pre: bool = False):
|
||||||
|
str_list = []
|
||||||
|
if ele.tag == 'pre':
|
||||||
|
pre = True
|
||||||
|
|
||||||
|
current_tag = None
|
||||||
|
for el in ele.eles('xpath:./text() | *'):
|
||||||
|
if current_tag in ('br', 'p') and str_list and str_list[-1] != '\n':
|
||||||
|
str_list.append('\n')
|
||||||
|
|
||||||
|
if isinstance(el, str):
|
||||||
|
if el.replace(' ', '').replace('\n', '') != '':
|
||||||
|
if pre:
|
||||||
|
str_list.append(el)
|
||||||
|
else:
|
||||||
|
str_list.append(el.replace('\n', ' ').strip(' \t'))
|
||||||
|
|
||||||
|
elif '\n' in el and str_list and str_list[-1] != '\n':
|
||||||
|
str_list.append('\n')
|
||||||
|
else:
|
||||||
|
str_list.append(' ')
|
||||||
|
current_tag = None
|
||||||
|
else:
|
||||||
|
str_list.extend(get_node(el, pre))
|
||||||
|
current_tag = el.tag
|
||||||
|
|
||||||
|
return str_list
|
||||||
|
|
||||||
|
re_str = ''.join(get_node(self))
|
||||||
|
re_str = re.sub(r' {2,}', ' ', re_str)
|
||||||
|
return format_html(re_str, False)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def raw_text(self) -> str:
|
||||||
|
"""返回未格式化处理的元素内文本"""
|
||||||
|
return str(self._inner_ele.text_content())
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def tag(self) -> str:
|
def tag(self) -> str:
|
||||||
"""返回元素类型"""
|
"""返回元素类型"""
|
||||||
@ -56,11 +97,6 @@ class SessionElement(DrissionElement):
|
|||||||
"""返回元素所有属性及值"""
|
"""返回元素所有属性及值"""
|
||||||
return {attr: self.attr(attr) for attr, val in self.inner_ele.items()}
|
return {attr: self.attr(attr) for attr, val in self.inner_ele.items()}
|
||||||
|
|
||||||
@property
|
|
||||||
def text(self) -> str:
|
|
||||||
"""返回元素内所有文本"""
|
|
||||||
return str(self._inner_ele.text_content())
|
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def link(self) -> str:
|
def link(self) -> str:
|
||||||
"""返回href或src绝对url"""
|
"""返回href或src绝对url"""
|
||||||
@ -91,26 +127,22 @@ class SessionElement(DrissionElement):
|
|||||||
"""返回前一个兄弟元素"""
|
"""返回前一个兄弟元素"""
|
||||||
return self._get_brother(1, 'ele', 'prev')
|
return self._get_brother(1, 'ele', 'prev')
|
||||||
|
|
||||||
|
@property
|
||||||
|
def comments(self):
|
||||||
|
return self.eles('xpath:.//comment()')
|
||||||
|
|
||||||
def texts(self, text_node_only: bool = False) -> list:
|
def texts(self, text_node_only: bool = False) -> list:
|
||||||
"""返回元素内所有直接子节点的文本,包括元素和文本节点 \n
|
"""返回元素内所有直接子节点的文本,包括元素和文本节点 \n
|
||||||
:param text_node_only: 是否只返回文本节点
|
:param text_node_only: 是否只返回文本节点
|
||||||
:return: 文本列表
|
:return: 文本列表
|
||||||
"""
|
"""
|
||||||
if text_node_only:
|
if text_node_only:
|
||||||
return self.eles('xpath:/text()')
|
texts = self.eles('xpath:/text()')
|
||||||
else:
|
else:
|
||||||
texts = []
|
texts = [x if isinstance(x, str) else x.text for x in self.eles('xpath:./text() | *')]
|
||||||
|
|
||||||
for node in self.eles('xpath:/node()'):
|
return [format_html(x.strip(' ')) for x in texts if
|
||||||
if isinstance(node, str):
|
x and x.replace('\n', '').replace('\t', '').replace(' ', '') != '']
|
||||||
text = node
|
|
||||||
else:
|
|
||||||
text = node.text
|
|
||||||
|
|
||||||
if text:
|
|
||||||
texts.append(text)
|
|
||||||
|
|
||||||
return texts
|
|
||||||
|
|
||||||
def parents(self, num: int = 1):
|
def parents(self, num: int = 1):
|
||||||
"""返回上面第num级父元素 \n
|
"""返回上面第num级父元素 \n
|
||||||
@ -155,7 +187,7 @@ class SessionElement(DrissionElement):
|
|||||||
elif attr == 'src':
|
elif attr == 'src':
|
||||||
return self._make_absolute(self.inner_ele.get('src'))
|
return self._make_absolute(self.inner_ele.get('src'))
|
||||||
|
|
||||||
elif attr in ['text', 'innerText']:
|
elif attr in ('text', 'innerText'):
|
||||||
return self.text
|
return self.text
|
||||||
|
|
||||||
elif attr == 'outerHTML':
|
elif attr == 'outerHTML':
|
||||||
@ -168,30 +200,37 @@ class SessionElement(DrissionElement):
|
|||||||
return self.inner_ele.get(attr)
|
return self.inner_ele.get(attr)
|
||||||
|
|
||||||
def ele(self, loc_or_str: Union[Tuple[str, str], str], mode: str = None):
|
def ele(self, loc_or_str: Union[Tuple[str, str], str], mode: str = None):
|
||||||
"""返回当前元素下级符合条件的子元素、属性或节点文本,默认返回第一个 \n
|
"""返回当前元素下级符合条件的子元素、属性或节点文本,默认返回第一个 \n
|
||||||
示例: \n
|
示例: \n
|
||||||
- 用loc元组查找: \n
|
- 用loc元组查找: \n
|
||||||
ele.ele((By.CLASS_NAME, 'ele_class')) - 返回第一个class为ele_class的子元素 \n
|
ele.ele((By.CLASS_NAME, 'ele_class')) - 返回第一个class为ele_class的子元素 \n
|
||||||
- 用查询字符串查找: \n
|
- 用查询字符串查找: \n
|
||||||
查找方式:属性、tag name和属性、文本、xpath、css selector、id、class \n
|
查找方式:属性、tag name和属性、文本、xpath、css selector、id、class \n
|
||||||
@表示属性,.表示class,#表示id,=表示精确匹配,:表示模糊匹配,无控制字符串时默认搜索该字符串 \n
|
@表示属性,.表示class,#表示id,=表示精确匹配,:表示模糊匹配,无控制字符串时默认搜索该字符串 \n
|
||||||
ele.ele('.ele_class') - 返回第一个 class 为 ele_class 的子元素 \n
|
ele.ele('.ele_class') - 返回第一个 class 为 ele_class 的子元素 \n
|
||||||
ele.ele('.:ele_class') - 返回第一个 class 中含有 ele_class 的子元素 \n
|
ele.ele('.:ele_class') - 返回第一个 class 中含有 ele_class 的子元素 \n
|
||||||
ele.ele('#ele_id') - 返回第一个 id 为 ele_id 的子元素 \n
|
ele.ele('#ele_id') - 返回第一个 id 为 ele_id 的子元素 \n
|
||||||
ele.ele('#:ele_id') - 返回第一个 id 中含有 ele_id 的子元素 \n
|
ele.ele('#:ele_id') - 返回第一个 id 中含有 ele_id 的子元素 \n
|
||||||
ele.ele('@class:ele_class') - 返回第一个class含有ele_class的子元素 \n
|
ele.ele('@class:ele_class') - 返回第一个class含有ele_class的子元素 \n
|
||||||
ele.ele('@name=ele_name') - 返回第一个name等于ele_name的子元素 \n
|
ele.ele('@name=ele_name') - 返回第一个name等于ele_name的子元素 \n
|
||||||
ele.ele('@placeholder') - 返回第一个带placeholder属性的子元素 \n
|
ele.ele('@placeholder') - 返回第一个带placeholder属性的子元素 \n
|
||||||
ele.ele('tag:p') - 返回第一个<p>子元素 \n
|
ele.ele('tag:p') - 返回第一个<p>子元素 \n
|
||||||
ele.ele('tag:div@class:ele_class') - 返回第一个class含有ele_class的div子元素 \n
|
ele.ele('tag:div@class:ele_class') - 返回第一个class含有ele_class的div子元素 \n
|
||||||
ele.ele('tag:div@class=ele_class') - 返回第一个class等于ele_class的div子元素 \n
|
ele.ele('tag:div@class=ele_class') - 返回第一个class等于ele_class的div子元素 \n
|
||||||
ele.ele('tag:div@text():some_text') - 返回第一个文本含有some_text的div子元素 \n
|
ele.ele('tag:div@text():some_text') - 返回第一个文本含有some_text的div子元素 \n
|
||||||
ele.ele('tag:div@text()=some_text') - 返回第一个文本等于some_text的div子元素 \n
|
ele.ele('tag:div@text()=some_text') - 返回第一个文本等于some_text的div子元素 \n
|
||||||
ele.ele('text:some_text') - 返回第一个文本含有some_text的子元素 \n
|
ele.ele('text:some_text') - 返回第一个文本含有some_text的子元素 \n
|
||||||
ele.ele('some_text') - 返回第一个文本含有some_text的子元素(等价于上一行) \n
|
ele.ele('some_text') - 返回第一个文本含有some_text的子元素(等价于上一行) \n
|
||||||
ele.ele('text=some_text') - 返回第一个文本等于some_text的子元素 \n
|
ele.ele('text=some_text') - 返回第一个文本等于some_text的子元素 \n
|
||||||
ele.ele('xpath://div[@class="ele_class"]') - 返回第一个符合xpath的子元素 \n
|
ele.ele('xpath://div[@class="ele_class"]') - 返回第一个符合xpath的子元素 \n
|
||||||
ele.ele('css:div.ele_class') - 返回第一个符合css selector的子元素 \n
|
ele.ele('css:div.ele_class') - 返回第一个符合css selector的子元素 \n
|
||||||
|
- 查询字符串还有最精简模式,用x代替xpath、c代替css、t代替tag、tx代替text: \n
|
||||||
|
ele.ele('x://div[@class="ele_class"]') - 等同于 ele.ele('xpath://div[@class="ele_class"]') \n
|
||||||
|
ele.ele('c:div.ele_class') - 等同于 ele.ele('css:div.ele_class') \n
|
||||||
|
ele.ele('t:div') - 等同于 ele.ele('tag:div') \n
|
||||||
|
ele.ele('t:div@tx()=some_text') - 等同于 ele.ele('tag:div@text()=some_text') \n
|
||||||
|
ele.ele('tx:some_text') - 等同于 ele.ele('text:some_text') \n
|
||||||
|
ele.ele('tx=some_text') - 等同于 ele.ele('text=some_text')
|
||||||
:param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串
|
:param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串
|
||||||
:param mode: 'single' 或 'all‘,对应查找一个或全部
|
:param mode: 'single' 或 'all‘,对应查找一个或全部
|
||||||
:return: SessionElement对象
|
:return: SessionElement对象
|
||||||
@ -222,30 +261,37 @@ class SessionElement(DrissionElement):
|
|||||||
return execute_session_find(element, loc_or_str, mode)
|
return execute_session_find(element, loc_or_str, mode)
|
||||||
|
|
||||||
def eles(self, loc_or_str: Union[Tuple[str, str], str]):
|
def eles(self, loc_or_str: Union[Tuple[str, str], str]):
|
||||||
"""返回当前元素下级所有符合条件的子元素、属性或节点文本 \n
|
"""返回当前元素下级所有符合条件的子元素、属性或节点文本 \n
|
||||||
示例: \n
|
示例: \n
|
||||||
- 用loc元组查找: \n
|
- 用loc元组查找: \n
|
||||||
ele.eles((By.CLASS_NAME, 'ele_class')) - 返回所有class为ele_class的子元素 \n
|
ele.eles((By.CLASS_NAME, 'ele_class')) - 返回所有class为ele_class的子元素 \n
|
||||||
- 用查询字符串查找: \n
|
- 用查询字符串查找: \n
|
||||||
查找方式:属性、tag name和属性、文本、xpath、css selector、id、class \n
|
查找方式:属性、tag name和属性、文本、xpath、css selector、id、class \n
|
||||||
@表示属性,.表示class,#表示id,=表示精确匹配,:表示模糊匹配,无控制字符串时默认搜索该字符串 \n
|
@表示属性,.表示class,#表示id,=表示精确匹配,:表示模糊匹配,无控制字符串时默认搜索该字符串 \n
|
||||||
ele.eles('.ele_class') - 返回所有 class 为 ele_class 的子元素 \n
|
ele.eles('.ele_class') - 返回所有 class 为 ele_class 的子元素 \n
|
||||||
ele.eles('.:ele_class') - 返回所有 class 中含有 ele_class 的子元素 \n
|
ele.eles('.:ele_class') - 返回所有 class 中含有 ele_class 的子元素 \n
|
||||||
ele.eles('#ele_id') - 返回所有 id 为 ele_id 的子元素 \n
|
ele.eles('#ele_id') - 返回所有 id 为 ele_id 的子元素 \n
|
||||||
ele.eles('#:ele_id') - 返回所有 id 中含有 ele_id 的子元素 \n
|
ele.eles('#:ele_id') - 返回所有 id 中含有 ele_id 的子元素 \n
|
||||||
ele.eles('@class:ele_class') - 返回所有class含有ele_class的子元素 \n
|
ele.eles('@class:ele_class') - 返回所有class含有ele_class的子元素 \n
|
||||||
ele.eles('@name=ele_name') - 返回所有name等于ele_name的子元素 \n
|
ele.eles('@name=ele_name') - 返回所有name等于ele_name的子元素 \n
|
||||||
ele.eles('@placeholder') - 返回所有带placeholder属性的子元素 \n
|
ele.eles('@placeholder') - 返回所有带placeholder属性的子元素 \n
|
||||||
ele.eles('tag:p') - 返回所有<p>子元素 \n
|
ele.eles('tag:p') - 返回所有<p>子元素 \n
|
||||||
ele.eles('tag:div@class:ele_class') - 返回所有class含有ele_class的div子元素 \n
|
ele.eles('tag:div@class:ele_class') - 返回所有class含有ele_class的div子元素 \n
|
||||||
ele.eles('tag:div@class=ele_class') - 返回所有class等于ele_class的div子元素 \n
|
ele.eles('tag:div@class=ele_class') - 返回所有class等于ele_class的div子元素 \n
|
||||||
ele.eles('tag:div@text():some_text') - 返回所有文本含有some_text的div子元素 \n
|
ele.eles('tag:div@text():some_text') - 返回所有文本含有some_text的div子元素 \n
|
||||||
ele.eles('tag:div@text()=some_text') - 返回所有文本等于some_text的div子元素 \n
|
ele.eles('tag:div@text()=some_text') - 返回所有文本等于some_text的div子元素 \n
|
||||||
ele.eles('text:some_text') - 返回所有文本含有some_text的子元素 \n
|
ele.eles('text:some_text') - 返回所有文本含有some_text的子元素 \n
|
||||||
ele.eles('some_text') - 返回所有文本含有some_text的子元素(等价于上一行) \n
|
ele.eles('some_text') - 返回所有文本含有some_text的子元素(等价于上一行) \n
|
||||||
ele.eles('text=some_text') - 返回所有文本等于some_text的子元素 \n
|
ele.eles('text=some_text') - 返回所有文本等于some_text的子元素 \n
|
||||||
ele.eles('xpath://div[@class="ele_class"]') - 返回所有符合xpath的子元素 \n
|
ele.eles('xpath://div[@class="ele_class"]') - 返回所有符合xpath的子元素 \n
|
||||||
ele.eles('css:div.ele_class') - 返回所有符合css selector的子元素 \n
|
ele.eles('css:div.ele_class') - 返回所有符合css selector的子元素 \n
|
||||||
|
- 查询字符串还有最精简模式,用x代替xpath、c代替css、t代替tag、tx代替text: \n
|
||||||
|
ele.eles('x://div[@class="ele_class"]') - 等同于 ele.eles('xpath://div[@class="ele_class"]') \n
|
||||||
|
ele.eles('c:div.ele_class') - 等同于 ele.eles('css:div.ele_class') \n
|
||||||
|
ele.eles('t:div') - 等同于 ele.eles('tag:div') \n
|
||||||
|
ele.eles('t:div@tx()=some_text') - 等同于 ele.eles('tag:div@text()=some_text') \n
|
||||||
|
ele.eles('tx:some_text') - 等同于 ele.eles('text:some_text') \n
|
||||||
|
ele.eles('tx=some_text') - 等同于 ele.eles('text=some_text')
|
||||||
:param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串
|
:param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串
|
||||||
:return: SessionElement对象组成的列表
|
:return: SessionElement对象组成的列表
|
||||||
"""
|
"""
|
||||||
@ -284,12 +330,6 @@ class SessionElement(DrissionElement):
|
|||||||
ele = self
|
ele = self
|
||||||
|
|
||||||
while ele:
|
while ele:
|
||||||
# ele_id = ele.attr('id')
|
|
||||||
|
|
||||||
# if ele_id:
|
|
||||||
# return f'#{ele_id}{path_str}' if mode == 'css' else f'//{ele.tag}[@id="{ele_id}"]{path_str}'
|
|
||||||
# else:
|
|
||||||
|
|
||||||
if mode == 'css':
|
if mode == 'css':
|
||||||
brothers = len(ele.eles(f'xpath:./preceding-sibling::*'))
|
brothers = len(ele.eles(f'xpath:./preceding-sibling::*'))
|
||||||
path_str = f'>:nth-child({brothers + 1}){path_str}'
|
path_str = f'>:nth-child({brothers + 1}){path_str}'
|
||||||
@ -302,7 +342,7 @@ class SessionElement(DrissionElement):
|
|||||||
return path_str[1:] if mode == 'css' else path_str
|
return path_str[1:] if mode == 'css' else path_str
|
||||||
|
|
||||||
def _get_brother(self, num: int = 1, mode: str = 'ele', direction: str = 'next'):
|
def _get_brother(self, num: int = 1, mode: str = 'ele', direction: str = 'next'):
|
||||||
"""返回前面第num个兄弟元素或节点 \n
|
"""返回前面或后面第num个兄弟元素或节点 \n
|
||||||
:param num: 前面第几个兄弟元素或节点
|
:param num: 前面第几个兄弟元素或节点
|
||||||
:param mode: 'ele', 'node' 或 'text',匹配元素、节点、或文本节点
|
:param mode: 'ele', 'node' 或 'text',匹配元素、节点、或文本节点
|
||||||
:param direction: 'next' 或 'prev',查找的方向
|
:param direction: 'next' 或 'prev',查找的方向
|
||||||
@ -348,7 +388,7 @@ def execute_session_find(page_or_ele,
|
|||||||
:return: 返回SessionElement元素或列表
|
:return: 返回SessionElement元素或列表
|
||||||
"""
|
"""
|
||||||
mode = mode or 'single'
|
mode = mode or 'single'
|
||||||
if mode not in ['single', 'all']:
|
if mode not in ('single', 'all'):
|
||||||
raise ValueError(f"Argument mode can only be 'single' or 'all', not '{mode}'.")
|
raise ValueError(f"Argument mode can only be 'single' or 'all', not '{mode}'.")
|
||||||
|
|
||||||
# 根据传入对象类型获取页面对象和lxml元素对象
|
# 根据传入对象类型获取页面对象和lxml元素对象
|
||||||
@ -357,7 +397,7 @@ def execute_session_find(page_or_ele,
|
|||||||
page_or_ele = page_or_ele.inner_ele
|
page_or_ele = page_or_ele.inner_ele
|
||||||
else: # 传入的是SessionPage对象
|
else: # 传入的是SessionPage对象
|
||||||
page = page_or_ele
|
page = page_or_ele
|
||||||
page_or_ele = fromstring(page_or_ele.html)
|
page_or_ele = fromstring(re.sub(r' ?', ' ', page_or_ele.response.text))
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# 用lxml内置方法获取lxml的元素对象列表
|
# 用lxml内置方法获取lxml的元素对象列表
|
||||||
@ -368,6 +408,10 @@ def execute_session_find(page_or_ele,
|
|||||||
else:
|
else:
|
||||||
ele = page_or_ele.cssselect(loc[1])
|
ele = page_or_ele.cssselect(loc[1])
|
||||||
|
|
||||||
|
# 结果不是列表,如数字
|
||||||
|
if not isinstance(ele, list):
|
||||||
|
return ele
|
||||||
|
|
||||||
# 把lxml元素对象包装成SessionElement对象并按需要返回第一个或全部
|
# 把lxml元素对象包装成SessionElement对象并按需要返回第一个或全部
|
||||||
if mode == 'single':
|
if mode == 'single':
|
||||||
ele = ele[0] if ele else None
|
ele = ele[0] if ele else None
|
||||||
|
@ -10,11 +10,11 @@ from pathlib import Path
|
|||||||
from random import randint
|
from random import randint
|
||||||
from re import search as re_SEARCH
|
from re import search as re_SEARCH
|
||||||
from re import sub as re_SUB
|
from re import sub as re_SUB
|
||||||
from time import time, sleep
|
|
||||||
from typing import Union, List, Tuple
|
from typing import Union, List, Tuple
|
||||||
from urllib.parse import urlparse, quote, unquote
|
from urllib.parse import urlparse, quote, unquote
|
||||||
|
|
||||||
from requests import Session, Response
|
from requests import Session, Response
|
||||||
|
from time import time, sleep
|
||||||
from tldextract import extract
|
from tldextract import extract
|
||||||
|
|
||||||
from .common import str_to_loc, translate_loc, get_available_file_name, format_html
|
from .common import str_to_loc, translate_loc, get_available_file_name, format_html
|
||||||
@ -33,6 +33,9 @@ class SessionPage(object):
|
|||||||
self._url_available = None
|
self._url_available = None
|
||||||
self._response = None
|
self._response = None
|
||||||
|
|
||||||
|
self.retry_times = 3
|
||||||
|
self.retry_interval = 2
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def session(self) -> Session:
|
def session(self) -> Session:
|
||||||
"""返回session对象"""
|
"""返回session对象"""
|
||||||
@ -194,17 +197,25 @@ class SessionPage(object):
|
|||||||
:param kwargs: 连接参数
|
:param kwargs: 连接参数
|
||||||
:return: HTMLResponse对象
|
:return: HTMLResponse对象
|
||||||
"""
|
"""
|
||||||
r = self._make_response(to_url, mode=mode, show_errmsg=show_errmsg, **kwargs)[0]
|
err = None
|
||||||
|
r = None
|
||||||
|
|
||||||
while times and (not r or r.content == b''):
|
for _ in range(times + 1):
|
||||||
if r is not None and r.status_code in (403, 404):
|
try:
|
||||||
|
r = self._make_response(to_url, mode=mode, show_errmsg=True, **kwargs)[0]
|
||||||
|
except Exception as e:
|
||||||
|
err = e
|
||||||
|
r = None
|
||||||
|
|
||||||
|
if r and (r.content != b'' or r.status_code in (403, 404)):
|
||||||
break
|
break
|
||||||
|
|
||||||
print('重试', to_url)
|
if _ < times:
|
||||||
sleep(interval)
|
sleep(interval)
|
||||||
|
print(f'重试 {to_url}')
|
||||||
|
|
||||||
r = self._make_response(to_url, mode=mode, show_errmsg=show_errmsg, **kwargs)[0]
|
if not r and show_errmsg:
|
||||||
times -= 1
|
raise err if err is not None else ConnectionError('Connect error.')
|
||||||
|
|
||||||
return r
|
return r
|
||||||
|
|
||||||
@ -212,8 +223,8 @@ class SessionPage(object):
|
|||||||
url: str,
|
url: str,
|
||||||
go_anyway: bool = False,
|
go_anyway: bool = False,
|
||||||
show_errmsg: bool = False,
|
show_errmsg: bool = False,
|
||||||
retry: int = 0,
|
retry: int = None,
|
||||||
interval: float = 1,
|
interval: float = None,
|
||||||
**kwargs) -> Union[bool, None]:
|
**kwargs) -> Union[bool, None]:
|
||||||
"""用get方式跳转到url \n
|
"""用get方式跳转到url \n
|
||||||
:param url: 目标url
|
:param url: 目标url
|
||||||
@ -224,7 +235,9 @@ class SessionPage(object):
|
|||||||
:param kwargs: 连接参数
|
:param kwargs: 连接参数
|
||||||
:return: url是否可用
|
:return: url是否可用
|
||||||
"""
|
"""
|
||||||
to_url = quote(url, safe='/:&?=%;#@+')
|
to_url = quote(url, safe='/:&?=%;#@+!')
|
||||||
|
retry = int(retry) if retry is not None else int(self.retry_times)
|
||||||
|
interval = int(interval) if interval is not None else int(self.retry_interval)
|
||||||
|
|
||||||
if not url or (not go_anyway and self.url == to_url):
|
if not url or (not go_anyway and self.url == to_url):
|
||||||
return
|
return
|
||||||
@ -252,8 +265,8 @@ class SessionPage(object):
|
|||||||
data: dict = None,
|
data: dict = None,
|
||||||
go_anyway: bool = True,
|
go_anyway: bool = True,
|
||||||
show_errmsg: bool = False,
|
show_errmsg: bool = False,
|
||||||
retry: int = 0,
|
retry: int = None,
|
||||||
interval: float = 1,
|
interval: float = None,
|
||||||
**kwargs) -> Union[bool, None]:
|
**kwargs) -> Union[bool, None]:
|
||||||
"""用post方式跳转到url \n
|
"""用post方式跳转到url \n
|
||||||
:param url: 目标url
|
:param url: 目标url
|
||||||
@ -265,7 +278,9 @@ class SessionPage(object):
|
|||||||
:param kwargs: 连接参数
|
:param kwargs: 连接参数
|
||||||
:return: url是否可用
|
:return: url是否可用
|
||||||
"""
|
"""
|
||||||
to_url = quote(url, safe='/:&?=%;#@')
|
to_url = quote(url, safe='/:&?=%;#@+!')
|
||||||
|
retry = int(retry) if retry is not None else int(self.retry_times)
|
||||||
|
interval = int(interval) if interval is not None else int(self.retry_interval)
|
||||||
|
|
||||||
if not url or (not go_anyway and self._url == to_url):
|
if not url or (not go_anyway and self._url == to_url):
|
||||||
return
|
return
|
||||||
@ -295,6 +310,8 @@ class SessionPage(object):
|
|||||||
post_data: dict = None,
|
post_data: dict = None,
|
||||||
show_msg: bool = False,
|
show_msg: bool = False,
|
||||||
show_errmsg: bool = False,
|
show_errmsg: bool = False,
|
||||||
|
retry: int = None,
|
||||||
|
interval: float = None,
|
||||||
**kwargs) -> tuple:
|
**kwargs) -> tuple:
|
||||||
"""下载一个文件 \n
|
"""下载一个文件 \n
|
||||||
:param file_url: 文件url
|
:param file_url: 文件url
|
||||||
@ -304,158 +321,189 @@ class SessionPage(object):
|
|||||||
:param post_data: post方式的数据
|
:param post_data: post方式的数据
|
||||||
:param show_msg: 是否显示下载信息
|
:param show_msg: 是否显示下载信息
|
||||||
:param show_errmsg: 是否抛出和显示异常
|
:param show_errmsg: 是否抛出和显示异常
|
||||||
|
:param retry: 重试次数
|
||||||
|
:param interval: 重试间隔时间
|
||||||
:param kwargs: 连接参数
|
:param kwargs: 连接参数
|
||||||
:return: 下载是否成功(bool)和状态信息(成功时信息为文件路径)的元组
|
:return: 下载是否成功(bool)和状态信息(成功时信息为文件路径)的元组
|
||||||
"""
|
"""
|
||||||
# 生成的response不写入self._response,是临时的
|
if file_exists == 'skip' and Path(f'{goal_path}\\{rename}').exists():
|
||||||
kwargs['stream'] = True
|
|
||||||
|
|
||||||
if 'timeout' not in kwargs:
|
|
||||||
kwargs['timeout'] = 20
|
|
||||||
|
|
||||||
mode = 'post' if post_data else 'get'
|
|
||||||
r, info = self._make_response(file_url, mode=mode, data=post_data, show_errmsg=show_errmsg, **kwargs)
|
|
||||||
|
|
||||||
if r is None:
|
|
||||||
if show_msg:
|
if show_msg:
|
||||||
print(info)
|
print(f'{file_url}\n{goal_path}\\{rename}\nSkipped.\n')
|
||||||
|
|
||||||
return False, info
|
|
||||||
|
|
||||||
if not r.ok:
|
|
||||||
if show_errmsg:
|
|
||||||
raise ConnectionError(f'Status code: {r.status_code}.')
|
|
||||||
|
|
||||||
return False, f'Status code: {r.status_code}.'
|
|
||||||
|
|
||||||
# -------------------获取文件名-------------------
|
|
||||||
file_name = ''
|
|
||||||
content_disposition = r.headers.get('content-disposition')
|
|
||||||
|
|
||||||
# 使用header里的文件名
|
|
||||||
if content_disposition:
|
|
||||||
file_name = r.headers[content_disposition[0]].encode('ISO-8859-1').decode('utf-8')
|
|
||||||
file_name = re.search(r'filename *= *"?([^";]+)', file_name)
|
|
||||||
if file_name:
|
|
||||||
file_name = file_name.group(1)
|
|
||||||
|
|
||||||
if file_name[0] == file_name[-1] == "'":
|
|
||||||
file_name = file_name[1:-1]
|
|
||||||
|
|
||||||
# 在url里获取文件名
|
|
||||||
if not file_name and os_PATH.basename(file_url):
|
|
||||||
file_name = os_PATH.basename(file_url).split("?")[0]
|
|
||||||
|
|
||||||
# 找不到则用时间和随机数生成文件名
|
|
||||||
if not file_name:
|
|
||||||
file_name = f'untitled_{time()}_{randint(0, 100)}'
|
|
||||||
|
|
||||||
# 去除非法字符
|
|
||||||
file_name = re_SUB(r'[\\/*:|<>?"]', '', file_name).strip()
|
|
||||||
file_name = unquote(file_name)
|
|
||||||
|
|
||||||
# -------------------重命名,不改变扩展名-------------------
|
|
||||||
if rename:
|
|
||||||
rename = re_SUB(r'[\\/*:|<>?"]', '', rename).strip()
|
|
||||||
ext_name = file_name.split('.')[-1]
|
|
||||||
|
|
||||||
if '.' in rename or ext_name == file_name:
|
|
||||||
full_name = rename
|
|
||||||
else:
|
|
||||||
full_name = f'{rename}.{ext_name}'
|
|
||||||
|
|
||||||
else:
|
|
||||||
full_name = file_name
|
|
||||||
|
|
||||||
# -------------------生成路径-------------------
|
|
||||||
goal_Path = Path(goal_path)
|
|
||||||
goal_path = ''
|
|
||||||
skip = False
|
|
||||||
|
|
||||||
for key, i in enumerate(goal_Path.parts): # 去除路径中的非法字符
|
|
||||||
goal_path += goal_Path.drive if key == 0 and goal_Path.drive else re_SUB(r'[*:|<>?"]', '', i).strip()
|
|
||||||
goal_path += '\\' if i != '\\' and key < len(goal_Path.parts) - 1 else ''
|
|
||||||
|
|
||||||
goal_Path = Path(goal_path)
|
|
||||||
goal_Path.mkdir(parents=True, exist_ok=True)
|
|
||||||
goal_path = goal_Path.absolute()
|
|
||||||
full_path = Path(f'{goal_path}\\{full_name}')
|
|
||||||
|
|
||||||
if full_path.exists():
|
|
||||||
if file_exists == 'rename':
|
|
||||||
full_name = get_available_file_name(goal_path, full_name)
|
|
||||||
full_path = Path(f'{goal_path}\\{full_name}')
|
|
||||||
|
|
||||||
elif file_exists == 'skip':
|
|
||||||
skip = True
|
|
||||||
|
|
||||||
elif file_exists == 'overwrite':
|
|
||||||
pass
|
|
||||||
|
|
||||||
else:
|
|
||||||
raise ValueError("Argument file_exists can only be 'skip', 'overwrite', 'rename'.")
|
|
||||||
|
|
||||||
# -------------------打印要下载的文件-------------------
|
|
||||||
if show_msg:
|
|
||||||
print(file_url)
|
|
||||||
print(full_name if file_name == full_name else f'{file_name} -> {full_name}')
|
|
||||||
print(f'Downloading to: {goal_path}')
|
|
||||||
|
|
||||||
if skip:
|
|
||||||
print('Skipped.\n')
|
|
||||||
|
|
||||||
# -------------------开始下载-------------------
|
|
||||||
if skip:
|
|
||||||
return False, 'Skipped because a file with the same name already exists.'
|
return False, 'Skipped because a file with the same name already exists.'
|
||||||
|
|
||||||
# 获取远程文件大小
|
def do(url: str,
|
||||||
content_length = r.headers.get('content-length')
|
goal: str,
|
||||||
file_size = int(content_length) if content_length else None
|
new_name: str = None,
|
||||||
|
exists: str = 'rename',
|
||||||
|
data: dict = None,
|
||||||
|
msg: bool = False,
|
||||||
|
errmsg: bool = False,
|
||||||
|
**args) -> tuple:
|
||||||
|
args['stream'] = True
|
||||||
|
|
||||||
# 已下载文件大小和下载状态
|
if 'timeout' not in args:
|
||||||
downloaded_size, download_status = 0, False
|
args['timeout'] = 20
|
||||||
|
|
||||||
try:
|
mode = 'post' if data else 'get'
|
||||||
with open(str(full_path), 'wb') as tmpFile:
|
# 生成的response不写入self._response,是临时的
|
||||||
for chunk in r.iter_content(chunk_size=1024):
|
r, info = self._make_response(url, mode=mode, data=data, show_errmsg=errmsg, **args)
|
||||||
if chunk:
|
|
||||||
tmpFile.write(chunk)
|
|
||||||
|
|
||||||
# 如表头有返回文件大小,显示进度
|
if r is None:
|
||||||
if show_msg and file_size:
|
if msg:
|
||||||
downloaded_size += 1024
|
print(info)
|
||||||
rate = downloaded_size / file_size if downloaded_size < file_size else 1
|
|
||||||
print('\r {:.0%} '.format(rate), end="")
|
|
||||||
|
|
||||||
except Exception as e:
|
return False, info
|
||||||
if show_errmsg:
|
|
||||||
raise ConnectionError(e)
|
|
||||||
|
|
||||||
download_status, info = False, f'Download failed.\n{e}'
|
if not r.ok:
|
||||||
|
if errmsg:
|
||||||
|
raise ConnectionError(f'Status code: {r.status_code}.')
|
||||||
|
|
||||||
else:
|
return False, f'Status code: {r.status_code}.'
|
||||||
if full_path.stat().st_size == 0:
|
|
||||||
if show_errmsg:
|
|
||||||
raise ValueError('File size is 0.')
|
|
||||||
|
|
||||||
download_status, info = False, 'File size is 0.'
|
# -------------------获取文件名-------------------
|
||||||
|
file_name = ''
|
||||||
|
content_disposition = r.headers.get('content-disposition')
|
||||||
|
|
||||||
|
# 使用header里的文件名
|
||||||
|
if content_disposition:
|
||||||
|
file_name = content_disposition.encode('ISO-8859-1').decode('utf-8')
|
||||||
|
file_name = re.search(r'filename *= *"?([^";]+)', file_name)
|
||||||
|
|
||||||
|
if file_name:
|
||||||
|
file_name = file_name.group(1)
|
||||||
|
|
||||||
|
if file_name[0] == file_name[-1] == "'":
|
||||||
|
file_name = file_name[1:-1]
|
||||||
|
|
||||||
|
# 在url里获取文件名
|
||||||
|
if not file_name and os_PATH.basename(url):
|
||||||
|
file_name = os_PATH.basename(url).split("?")[0]
|
||||||
|
|
||||||
|
# 找不到则用时间和随机数生成文件名
|
||||||
|
if not file_name:
|
||||||
|
file_name = f'untitled_{time()}_{randint(0, 100)}'
|
||||||
|
|
||||||
|
# 去除非法字符
|
||||||
|
file_name = re_SUB(r'[\\/*:|<>?"]', '', file_name).strip()
|
||||||
|
file_name = unquote(file_name)
|
||||||
|
|
||||||
|
# -------------------重命名,不改变扩展名-------------------
|
||||||
|
if new_name:
|
||||||
|
new_name = re_SUB(r'[\\/*:|<>?"]', '', new_name).strip()
|
||||||
|
ext_name = file_name.split('.')[-1]
|
||||||
|
|
||||||
|
if '.' in new_name or ext_name == file_name:
|
||||||
|
full_name = new_name
|
||||||
|
else:
|
||||||
|
full_name = f'{new_name}.{ext_name}'
|
||||||
|
|
||||||
else:
|
else:
|
||||||
download_status, info = True, 'Success.'
|
full_name = file_name
|
||||||
|
|
||||||
finally:
|
# -------------------生成路径-------------------
|
||||||
# 删除下载出错文件
|
goal_Path = Path(goal)
|
||||||
if not download_status and full_path.exists():
|
goal = ''
|
||||||
full_path.unlink()
|
skip = False
|
||||||
|
|
||||||
r.close()
|
for key, i in enumerate(goal_Path.parts): # 去除路径中的非法字符
|
||||||
|
goal += goal_Path.drive if key == 0 and goal_Path.drive else re_SUB(r'[*:|<>?"]', '', i).strip()
|
||||||
|
goal += '\\' if i != '\\' and key < len(goal_Path.parts) - 1 else ''
|
||||||
|
|
||||||
# -------------------显示并返回值-------------------
|
goal_Path = Path(goal).absolute()
|
||||||
if show_msg:
|
goal_Path.mkdir(parents=True, exist_ok=True)
|
||||||
print(info, '\n')
|
full_path = Path(f'{goal}\\{full_name}')
|
||||||
|
|
||||||
info = f'{goal_path}\\{full_name}' if download_status else info
|
if full_path.exists():
|
||||||
return download_status, info
|
if file_exists == 'rename':
|
||||||
|
full_name = get_available_file_name(goal, full_name)
|
||||||
|
full_path = Path(f'{goal}\\{full_name}')
|
||||||
|
|
||||||
|
elif exists == 'skip':
|
||||||
|
skip = True
|
||||||
|
|
||||||
|
elif exists == 'overwrite':
|
||||||
|
pass
|
||||||
|
|
||||||
|
else:
|
||||||
|
raise ValueError("Argument file_exists can only be 'skip', 'overwrite', 'rename'.")
|
||||||
|
|
||||||
|
# -------------------打印要下载的文件-------------------
|
||||||
|
if msg:
|
||||||
|
print(file_url)
|
||||||
|
print(full_name if file_name == full_name else f'{file_name} -> {full_name}')
|
||||||
|
print(f'Downloading to: {goal}')
|
||||||
|
|
||||||
|
if skip:
|
||||||
|
print('Skipped.\n')
|
||||||
|
|
||||||
|
# -------------------开始下载-------------------
|
||||||
|
if skip:
|
||||||
|
return False, 'Skipped because a file with the same name already exists.'
|
||||||
|
|
||||||
|
# 获取远程文件大小
|
||||||
|
content_length = r.headers.get('content-length')
|
||||||
|
file_size = int(content_length) if content_length else None
|
||||||
|
|
||||||
|
# 已下载文件大小和下载状态
|
||||||
|
downloaded_size, download_status = 0, False
|
||||||
|
|
||||||
|
try:
|
||||||
|
with open(str(full_path), 'wb') as tmpFile:
|
||||||
|
for chunk in r.iter_content(chunk_size=1024):
|
||||||
|
if chunk:
|
||||||
|
tmpFile.write(chunk)
|
||||||
|
|
||||||
|
# 如表头有返回文件大小,显示进度
|
||||||
|
if msg and file_size:
|
||||||
|
downloaded_size += 1024
|
||||||
|
rate = downloaded_size / file_size if downloaded_size < file_size else 1
|
||||||
|
print('\r {:.0%} '.format(rate), end="")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
if errmsg:
|
||||||
|
raise ConnectionError(e)
|
||||||
|
|
||||||
|
download_status, info = False, f'Download failed.\n{e}'
|
||||||
|
|
||||||
|
else:
|
||||||
|
if full_path.stat().st_size == 0:
|
||||||
|
if errmsg:
|
||||||
|
raise ValueError('File size is 0.')
|
||||||
|
|
||||||
|
download_status, info = False, 'File size is 0.'
|
||||||
|
|
||||||
|
else:
|
||||||
|
download_status, info = True, str(full_path)
|
||||||
|
|
||||||
|
finally:
|
||||||
|
# 删除下载出错文件
|
||||||
|
if not download_status and full_path.exists():
|
||||||
|
full_path.unlink()
|
||||||
|
|
||||||
|
r.close()
|
||||||
|
|
||||||
|
# -------------------显示并返回值-------------------
|
||||||
|
if msg:
|
||||||
|
print(info, '\n')
|
||||||
|
|
||||||
|
info = f'{goal}\\{full_name}' if download_status else info
|
||||||
|
return download_status, info
|
||||||
|
|
||||||
|
retry_times = retry or self.retry_times
|
||||||
|
retry_interval = interval or self.retry_interval
|
||||||
|
result = do(file_url, goal_path, rename, file_exists, post_data, show_msg, show_errmsg, **kwargs)
|
||||||
|
|
||||||
|
if not result[0] and not str(result[1]).startswith('Skipped'):
|
||||||
|
for i in range(retry_times):
|
||||||
|
sleep(retry_interval)
|
||||||
|
|
||||||
|
print(f'重试 {file_url}')
|
||||||
|
result = do(file_url, goal_path, rename, file_exists, post_data, show_msg, show_errmsg, **kwargs)
|
||||||
|
if result[0]:
|
||||||
|
break
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
def _make_response(self,
|
def _make_response(self,
|
||||||
url: str,
|
url: str,
|
||||||
@ -469,17 +517,17 @@ class SessionPage(object):
|
|||||||
:param data: post方式要提交的数据
|
:param data: post方式要提交的数据
|
||||||
:param show_errmsg: 是否显示和抛出异常
|
:param show_errmsg: 是否显示和抛出异常
|
||||||
:param kwargs: 其它参数
|
:param kwargs: 其它参数
|
||||||
:return: tuple,第一位为Response或None,第二位为出错信息或'Sussess'
|
:return: tuple,第一位为Response或None,第二位为出错信息或'Success'
|
||||||
"""
|
"""
|
||||||
if not url:
|
if not url:
|
||||||
if show_errmsg:
|
if show_errmsg:
|
||||||
raise ValueError('url is empty.')
|
raise ValueError('url is empty.')
|
||||||
return None, 'url is empty.'
|
return None, 'url is empty.'
|
||||||
|
|
||||||
if mode not in ['get', 'post']:
|
if mode not in ('get', 'post'):
|
||||||
raise ValueError("Argument mode can only be 'get' or 'post'.")
|
raise ValueError("Argument mode can only be 'get' or 'post'.")
|
||||||
|
|
||||||
url = quote(url, safe='/:&?=%;#@+')
|
url = quote(url, safe='/:&?=%;#@+!')
|
||||||
|
|
||||||
# 设置referer和host值
|
# 设置referer和host值
|
||||||
kwargs_set = set(x.lower() for x in kwargs)
|
kwargs_set = set(x.lower() for x in kwargs)
|
||||||
@ -520,14 +568,14 @@ class SessionPage(object):
|
|||||||
else:
|
else:
|
||||||
# ----------------获取并设置编码开始-----------------
|
# ----------------获取并设置编码开始-----------------
|
||||||
# 在headers中获取编码
|
# 在headers中获取编码
|
||||||
content_type = r.headers.get('content-type').lower()
|
content_type = r.headers.get('content-type', '').lower()
|
||||||
charset = re.search(r'charset[=: ]*(.*)?[;]', content_type)
|
charset = re.search(r'charset[=: ]*(.*)?[;]', content_type)
|
||||||
|
|
||||||
if charset:
|
if charset:
|
||||||
r.encoding = charset.group(1)
|
r.encoding = charset.group(1)
|
||||||
|
|
||||||
# 在headers中获取不到编码,且如果是网页
|
# 在headers中获取不到编码,且如果是网页
|
||||||
elif content_type.replace(' ', '').lower().startswith('text/html'):
|
elif content_type.replace(' ', '').startswith('text/html'):
|
||||||
re_result = re_SEARCH(b'<meta.*?charset=[ \\\'"]*([^"\\\' />]+).*?>', r.content)
|
re_result = re_SEARCH(b'<meta.*?charset=[ \\\'"]*([^"\\\' />]+).*?>', r.content)
|
||||||
|
|
||||||
if re_result:
|
if re_result:
|
||||||
|
@ -93,6 +93,12 @@ class ShadowRootElement(DrissionElement):
|
|||||||
ele.ele('some_text') - 返回第一个文本含有some_text的子元素(等价于上一行) \n
|
ele.ele('some_text') - 返回第一个文本含有some_text的子元素(等价于上一行) \n
|
||||||
ele.ele('text=some_text') - 返回第一个文本等于some_text的子元素 \n
|
ele.ele('text=some_text') - 返回第一个文本等于some_text的子元素 \n
|
||||||
ele.ele('css:div.ele_class') - 返回第一个符合css selector的子元素 \n
|
ele.ele('css:div.ele_class') - 返回第一个符合css selector的子元素 \n
|
||||||
|
- 查询字符串还有最精简模式,用c代替css、t代替tag、tx代替text: \n
|
||||||
|
ele.ele('c:div.ele_class') - 等同于 ele.ele('css:div.ele_class') \n
|
||||||
|
ele.ele('t:div') - 等同于 ele.ele('tag:div') \n
|
||||||
|
ele.ele('t:div@tx()=some_text') - 等同于 ele.ele('tag:div@txet()=some_text') \n
|
||||||
|
ele.ele('tx:some_text') - 等同于 ele.ele('text:some_text') \n
|
||||||
|
ele.ele('tx=some_text') - 等同于 ele.ele('text=some_text')
|
||||||
:param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串
|
:param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串
|
||||||
:param mode: 'single' 或 'all',对应查找一个或全部
|
:param mode: 'single' 或 'all',对应查找一个或全部
|
||||||
:param timeout: 查找元素超时时间
|
:param timeout: 查找元素超时时间
|
||||||
@ -137,6 +143,12 @@ class ShadowRootElement(DrissionElement):
|
|||||||
ele.eles('some_text') - 返回所有文本含有some_text的子元素(等价于上一行) \n
|
ele.eles('some_text') - 返回所有文本含有some_text的子元素(等价于上一行) \n
|
||||||
ele.eles('text=some_text') - 返回所有文本等于some_text的子元素 \n
|
ele.eles('text=some_text') - 返回所有文本等于some_text的子元素 \n
|
||||||
ele.eles('css:div.ele_class') - 返回所有符合css selector的子元素 \n
|
ele.eles('css:div.ele_class') - 返回所有符合css selector的子元素 \n
|
||||||
|
- 查询字符串还有最精简模式,用c代替css、t代替tag、tx代替text: \n
|
||||||
|
ele.eles('c:div.ele_class') - 等同于 ele.eles('css:div.ele_class') \n
|
||||||
|
ele.eles('t:div') - 等同于 ele.eles('tag:div') \n
|
||||||
|
ele.eles('t:div@tx()=some_text') - 等同于 ele.eles('tag:div@txet()=some_text') \n
|
||||||
|
ele.eles('tx:some_text') - 等同于 ele.eles('text:some_text') \n
|
||||||
|
ele.eles('tx=some_text') - 等同于 ele.eles('text=some_text')
|
||||||
:param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串
|
:param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串
|
||||||
:param timeout: 查找元素超时时间
|
:param timeout: 查找元素超时时间
|
||||||
:return: DriverElement对象组成的列表
|
:return: DriverElement对象组成的列表
|
||||||
@ -235,12 +247,21 @@ def str_to_css_loc(loc: str) -> tuple:
|
|||||||
else:
|
else:
|
||||||
loc = loc.replace('.', '@class=', 1)
|
loc = loc.replace('.', '@class=', 1)
|
||||||
|
|
||||||
if loc.startswith('#'):
|
elif loc.startswith('#'):
|
||||||
if loc.startswith(('#=', '#:',)):
|
if loc.startswith(('#=', '#:',)):
|
||||||
loc = loc.replace('#', '@id', 1)
|
loc = loc.replace('#', '@id', 1)
|
||||||
else:
|
else:
|
||||||
loc = loc.replace('#', '@id=', 1)
|
loc = loc.replace('#', '@id=', 1)
|
||||||
|
|
||||||
|
elif loc.startswith(('t:', 't=')):
|
||||||
|
loc = f'tag:{loc[2:]}'
|
||||||
|
|
||||||
|
elif loc.startswith(('tx:', 'tx=')):
|
||||||
|
loc = f'text{loc[2:]}'
|
||||||
|
|
||||||
|
elif loc.startswith(('x:', 'x=', 'xpath:', 'xpath=')):
|
||||||
|
raise ValueError('不支持xpath')
|
||||||
|
|
||||||
# 根据属性查找
|
# 根据属性查找
|
||||||
if loc.startswith('@'):
|
if loc.startswith('@'):
|
||||||
r = re_SPLIT(r'([:=])', loc[1:], maxsplit=1)
|
r = re_SPLIT(r'([:=])', loc[1:], maxsplit=1)
|
||||||
@ -261,7 +282,7 @@ def str_to_css_loc(loc: str) -> tuple:
|
|||||||
r = re_SPLIT(r'([:=])', at_lst[1], maxsplit=1)
|
r = re_SPLIT(r'([:=])', at_lst[1], maxsplit=1)
|
||||||
|
|
||||||
if len(r) == 3:
|
if len(r) == 3:
|
||||||
if r[0] == 'text()':
|
if r[0] in ('text()', 'tx()'):
|
||||||
match = 'exact' if r[1] == '=' else 'fuzzy'
|
match = 'exact' if r[1] == '=' else 'fuzzy'
|
||||||
return 'text', r[2], at_lst[0], match
|
return 'text', r[2], at_lst[0], match
|
||||||
mode = '=' if r[1] == '=' else '*='
|
mode = '=' if r[1] == '=' else '*='
|
||||||
@ -273,10 +294,6 @@ def str_to_css_loc(loc: str) -> tuple:
|
|||||||
elif loc.startswith(('css=', 'css:')):
|
elif loc.startswith(('css=', 'css:')):
|
||||||
loc_str = loc[4:]
|
loc_str = loc[4:]
|
||||||
|
|
||||||
# 用xpath查找
|
|
||||||
elif loc.startswith(('xpath=', 'xpath:')):
|
|
||||||
raise ValueError('不支持xpath')
|
|
||||||
|
|
||||||
# 根据文本查找
|
# 根据文本查找
|
||||||
elif loc.startswith(('text=', 'text:')):
|
elif loc.startswith(('text=', 'text:')):
|
||||||
match = 'exact' if loc[4] == '=' else 'fuzzy'
|
match = 'exact' if loc[4] == '=' else 'fuzzy'
|
||||||
|
284
README.en.md
284
README.en.md
@ -318,26 +318,42 @@ The get_match_driver() method in the easy_set tool can automatically identify th
|
|||||||
from DrissionPage import MixPage
|
from DrissionPage import MixPage
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
## Initialization
|
## Initialization
|
||||||
|
|
||||||
If you only use session mode, you can skip this section.
|
If you only use session mode, you can skip this section.
|
||||||
|
|
||||||
Before using selenium, you must configure the path of chrome.exe and chromedriver.exe and ensure that their versions match.
|
Before using selenium, you must configure the path of chrome.exe and chromedriver.exe and ensure that their versions
|
||||||
|
match.
|
||||||
|
In the new version, if the program finds that their versions do not match when running, it will automatically download
|
||||||
|
the corresponding version and set the path. If there is no special need, no manual intervention is required.
|
||||||
|
|
||||||
There are four ways to configure the path:
|
There are four ways to configure the path:
|
||||||
-Use the get_match_driver() method of the easy_set tool (recommended)
|
|
||||||
-Write the path to the ini file of this library
|
|
||||||
-Write two paths to system variables
|
|
||||||
-Manually pass in the path when using
|
|
||||||
|
|
||||||
### Use get_match_driver() method
|
- Run directly, let the program automatically complete the settings (recommended)
|
||||||
|
|
||||||
If you choose the first method, please run the following code before using it for the first time. The program will automatically detect the Chrome version installed on your computer, download the corresponding driver, and record it in the ini file.
|
- Use the get_match_driver() method of the easy_set tool
|
||||||
|
|
||||||
|
- Write the path to the ini file of this library
|
||||||
|
|
||||||
|
- Write two paths to system variables
|
||||||
|
|
||||||
|
- Fill in the path in the code
|
||||||
|
|
||||||
|
**auto configuration**
|
||||||
|
|
||||||
|
In the new version, you don't need to do any configuration, just run the program directly, the program will get the path
|
||||||
|
of chrome.exe in the system, and automatically download the chromedriver.exe that matches the version. No feeling at
|
||||||
|
all. If you need to set the chrome.exe used by yourself, you can use the following method.
|
||||||
|
|
||||||
|
**Use the get_match_driver() method**
|
||||||
|
|
||||||
|
If you choose this method, please run the following code before using it for the first time. The program will
|
||||||
|
automatically detect the chrome version installed on your computer, download the corresponding driver, and record it in
|
||||||
|
the ini file.
|
||||||
|
|
||||||
```python
|
```python
|
||||||
from DrissionPage.easy_set import get_match_driver
|
from DrissionPage.easy_set import get_match_driver
|
||||||
|
|
||||||
get_match_driver()
|
get_match_driver()
|
||||||
```
|
```
|
||||||
|
|
||||||
@ -460,8 +476,8 @@ session_options = {'headers': {'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac O
|
|||||||
# Proxy settings, optional
|
# Proxy settings, optional
|
||||||
proxy = {'http': '127.0.0.1:1080','https': '127.0.0.1:1080'}
|
proxy = {'http': '127.0.0.1:1080','https': '127.0.0.1:1080'}
|
||||||
|
|
||||||
# Incoming configuration, driver_options and session_options are optional, you need to use the corresponding mode to pass in
|
# Incoming configuration, driver_or_options and session_or_options are optional, you need to use the corresponding mode to pass in
|
||||||
drission = Drission(driver_options, session_options, proxy=proxy)
|
drission = Drission(driver_or_options, session_or_options, proxy=proxy)
|
||||||
```
|
```
|
||||||
|
|
||||||
The usage of DriverOptions and SessionOptions is detailed below.
|
The usage of DriverOptions and SessionOptions is detailed below.
|
||||||
@ -548,11 +564,12 @@ When calling a method that only belongs to d mode, it will automatically switch
|
|||||||
|
|
||||||
```python
|
```python
|
||||||
page.set_cookies() # set cookies
|
page.set_cookies() # set cookies
|
||||||
page.get_cookies() # Get cookies, which can be returned by list or dict
|
page.get_cookies() # Get cookies, which can be returned by list or dict
|
||||||
page.change_mode() # Switch mode, it will automatically copy cookies
|
page.change_mode() # Switch mode, it will automatically copy cookies
|
||||||
page.cookies_to_session() # Copy cookies from WebDriver object to Session object
|
page.cookies_to_session() # Copy cookies from WebDriver object to Session object
|
||||||
page.cookies_to_driver() # Copy cookies from Session object to WebDriver object
|
page.cookies_to_driver() # Copy cookies from Session object to WebDriver object
|
||||||
page.get(url, retry, interval, **kwargs) # Use get to access the web page, you can specify the number of retries and the interval
|
page.get(url, retry, interval,
|
||||||
|
**kwargs) # Use get to access the web page, you can specify the number of retries and the interval
|
||||||
page.ele(loc_or_ele, timeout) # Get the first element, node or attribute that meets the conditions
|
page.ele(loc_or_ele, timeout) # Get the first element, node or attribute that meets the conditions
|
||||||
page.eles(loc_or_ele, timeout) # Get all eligible elements, nodes or attributes
|
page.eles(loc_or_ele, timeout) # Get all eligible elements, nodes or attributes
|
||||||
page.download(url, save_path, rename, file_exists, **kwargs) # download file
|
page.download(url, save_path, rename, file_exists, **kwargs) # download file
|
||||||
@ -560,7 +577,8 @@ page.close_driver() # Close the WebDriver object
|
|||||||
page.close_session() # Close the Session object
|
page.close_session() # Close the Session object
|
||||||
|
|
||||||
# s mode unique:
|
# s mode unique:
|
||||||
page.post(url, data, retry, interval, **kwargs) # To access the webpage in post mode, you can specify the number of retries and the interval
|
page.post(url, data, retry, interval,
|
||||||
|
**kwargs) # To access the webpage in post mode, you can specify the number of retries and the interval
|
||||||
|
|
||||||
# d mode unique:
|
# d mode unique:
|
||||||
page.wait_ele(loc_or_ele, mode, timeout) # Wait for the element to be deleted, displayed, and hidden from the dom
|
page.wait_ele(loc_or_ele, mode, timeout) # Wait for the element to be deleted, displayed, and hidden from the dom
|
||||||
@ -569,10 +587,11 @@ page.create_tab(url) # Create and locate a tab page, which is at the end
|
|||||||
page.to_tab(num_or_handle) # Jump to tab page
|
page.to_tab(num_or_handle) # Jump to tab page
|
||||||
page.close_current_tab() # Close the current tab page
|
page.close_current_tab() # Close the current tab page
|
||||||
page.close_other_tabs(num_or_handles) # Close other tabs
|
page.close_other_tabs(num_or_handles) # Close other tabs
|
||||||
page.to_iframe(iframe) # cut into iframe
|
page.to_frame(iframe) # cut into iframe
|
||||||
page.screenshot(path) # Page screenshot
|
page.screenshot(path) # Page screenshot
|
||||||
page.scrool_to_see(element) # Scroll until an element is visible
|
page.scrool_to_see(element) # Scroll until an element is visible
|
||||||
page.scroll_to(mode, pixel) # Scroll the page as indicated by the parameter, and the scroll direction is optional:'top','bottom','rightmost','leftmost','up','down','left', ' right'
|
page.scroll_to(mode,
|
||||||
|
pixel) # Scroll the page as indicated by the parameter, and the scroll direction is optional:'top', 'bottom', 'rightmost', 'leftmost', 'up', 'down', 'left', ' right', 'half'
|
||||||
page.refresh() # refresh the current page
|
page.refresh() # refresh the current page
|
||||||
page.back() # Browser back
|
page.back() # Browser back
|
||||||
page.et_window_size(x, y) # Set the browser window size, maximize by default
|
page.et_window_size(x, y) # Set the browser window size, maximize by default
|
||||||
@ -681,6 +700,7 @@ element.html # Return element outerHTML
|
|||||||
element.inner_html # Return element innerHTML
|
element.inner_html # Return element innerHTML
|
||||||
element.tag # Return element tag name
|
element.tag # Return element tag name
|
||||||
element.text # Return element innerText value
|
element.text # Return element innerText value
|
||||||
|
element.comments # Returns the list of comments within the element
|
||||||
element.link # Returns absolute href or src value of the element.
|
element.link # Returns absolute href or src value of the element.
|
||||||
element.texts() # Returns the text of all direct child nodes in the element, including elements and text nodes, you can specify to return only text nodes
|
element.texts() # Returns the text of all direct child nodes in the element, including elements and text nodes, you can specify to return only text nodes
|
||||||
element.attrs # Return a dictionary of all attributes of the element
|
element.attrs # Return a dictionary of all attributes of the element
|
||||||
@ -717,17 +737,35 @@ Element operation is unique to d mode. Calling the following method will automat
|
|||||||
|
|
||||||
```python
|
```python
|
||||||
element.click(by_js) # Click the element, you can choose whether to click with js
|
element.click(by_js) # Click the element, you can choose whether to click with js
|
||||||
element.input(value) # input text
|
element.click_at(x, y, by_js) # Click this element with offset, relative to the upper left corner coordinate. Click the midpoint of the element when the x or y value is not passed in, and you can choose whether to click with js
|
||||||
element.run_script(js) # Run JavaScript script on the element
|
element.input(value, clear) # Input text or key combination, and input the key combination in tuple format. The clear parameter is whether to clear the element before input.
|
||||||
|
element.run_script(js, *args) # Run JavaScript script on the element
|
||||||
element.submit() # Submit
|
element.submit() # Submit
|
||||||
element.clear() # Clear the element
|
element.clear() # Clear the element
|
||||||
element.screenshot(path, filename) # Take a screenshot of the element
|
element.screenshot(path, filename) # Take a screenshot of the element
|
||||||
element.select(text) # Select the drop- down list based on the text
|
|
||||||
element.set_attr(attr, value) # Set element attribute value
|
element.set_attr(attr, value) # Set element attribute value
|
||||||
element.remove_attr(attr) # remove a element attribute
|
element.remove_attr(attr) # remove a element attribute
|
||||||
element.drag(x, y, speed, shake) # Drag the relative distance of the element, you can set the speed and whether to shake randomly
|
element.drag(x, y, speed, shake) # Drag the relative distance of the element, you can set the speed and whether to shake randomly
|
||||||
element.drag_to(ele_or_loc, speed, shake) # Drag the element to another element or a certain coordinate, you can set the speed and whether to shake randomly
|
element.drag_to(ele_or_loc, speed, shake) # Drag the element to another element or a certain coordinate, you can set the speed and whether to shake randomly
|
||||||
element.hover() # Hover the mouse over the element
|
element.hover() # Hover the mouse over the element
|
||||||
|
|
||||||
|
# select function:
|
||||||
|
element.select.is_multi # Whether to select multiple lists
|
||||||
|
element.select.options # Return all list item objects
|
||||||
|
element.select.selected_option # Return the first selected option element
|
||||||
|
element.select.selected_options # Return a list of all selected option elements
|
||||||
|
|
||||||
|
element.select(text) # Select the drop-down list item based on the text
|
||||||
|
element.select(value,'value') # Select the drop-down list item according to the value
|
||||||
|
element.select(index,'index') # Select the drop-down list item according to the serial number
|
||||||
|
|
||||||
|
element.select.deselect(text) # Deselect drop-down list items based on the text (valid for multiple selection lists)
|
||||||
|
element.select.deselect(value,'value') # Deselect drop-down list items according to value (valid for multiple selection lists)
|
||||||
|
element.select.deselect(index,'index') # Deselect drop-down list items according to the serial number (valid for multiple selection lists)
|
||||||
|
# Note: When the list is a multi-selection list, the first parameter above can receive list or tuple, and select or deselect multiple items at the same time
|
||||||
|
|
||||||
|
element.select.clear() # Clear multiple selection list options
|
||||||
|
element.select.invert() # Invert multiple selection list options
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
@ -914,7 +952,7 @@ do.set_no_imgs(True) # Do not load pictures
|
|||||||
do.set_paths(driver_path='D:\\chromedriver.exe', chrome_path='D:\\chrome.exe') # set path
|
do.set_paths(driver_path='D:\\chromedriver.exe', chrome_path='D:\\chrome.exe') # set path
|
||||||
do.set_headless(False).set_no_imgs(True) # Support chain operation
|
do.set_headless(False).set_no_imgs(True) # Support chain operation
|
||||||
|
|
||||||
drission = Drission(driver_options=do) # Create Drission object with configuration object
|
drission = Drission(driver_or_options=do) # Create Drission object with configuration object
|
||||||
page = MixPage(driver_options=do) # Create MixPage object with configuration object
|
page = MixPage(driver_options=do) # Create MixPage object with configuration object
|
||||||
|
|
||||||
do.save() # save the currently opened ini file
|
do.save() # save the currently opened ini file
|
||||||
@ -949,7 +987,7 @@ so.cookies = ['key1=val1; domain=xxxx','key2=val2; domain=xxxx'] # set cookies
|
|||||||
so.headers = {'User-Agent':'xxxx','Accept-Charset':'xxxx'}
|
so.headers = {'User-Agent':'xxxx','Accept-Charset':'xxxx'}
|
||||||
so.set_a_header('Connection','keep-alive')
|
so.set_a_header('Connection','keep-alive')
|
||||||
|
|
||||||
drission = Drission(session_options=so) # Create Drission object with configuration object
|
drission = Drission(session_or_options=so) # Create Drission object with configuration object
|
||||||
page = MixPage(session_options=so) # Create MixPage object with configuration object
|
page = MixPage(session_options=so) # Create MixPage object with configuration object
|
||||||
|
|
||||||
so.save() # Save the currently opened ini file
|
so.save() # Save the currently opened ini file
|
||||||
@ -1211,15 +1249,23 @@ Return or set the driver configuration.
|
|||||||
|
|
||||||
Returns: dict
|
Returns: dict
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
### session_options
|
### session_options
|
||||||
|
|
||||||
Return to session configuration.
|
Return session configuration.
|
||||||
|
|
||||||
Returns: dict
|
Returns: dict
|
||||||
|
|
||||||
|
### proxy
|
||||||
|
|
||||||
|
Return proxy configuration.
|
||||||
|
|
||||||
|
Returns: dict
|
||||||
|
|
||||||
|
### debugger_progress
|
||||||
|
|
||||||
|
Return debug browser process.
|
||||||
|
|
||||||
|
Returns: debug browser process
|
||||||
|
|
||||||
### session_options()
|
### session_options()
|
||||||
|
|
||||||
@ -1227,16 +1273,6 @@ Set the session configuration.
|
|||||||
|
|
||||||
Returns: None
|
Returns: None
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
### proxy
|
|
||||||
|
|
||||||
Return to proxy configuration.
|
|
||||||
|
|
||||||
Returns: dict
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
### cookies_to_session()
|
### cookies_to_session()
|
||||||
|
|
||||||
Copy the cookies of the driver object to the session object.
|
Copy the cookies of the driver object to the session object.
|
||||||
@ -1304,23 +1340,25 @@ Close the session and set it to None.
|
|||||||
|
|
||||||
Returns: None
|
Returns: None
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
### close()
|
### close()
|
||||||
|
|
||||||
Close the driver and session.
|
Close the driver and session.
|
||||||
|
|
||||||
Returns: None
|
Returns: None
|
||||||
|
|
||||||
|
### kill_browser()
|
||||||
|
|
||||||
|
Close the browser process (if possible).
|
||||||
|
|
||||||
## MixPage Class
|
## MixPage Class
|
||||||
|
|
||||||
### class MixPage()
|
### class MixPage()
|
||||||
|
|
||||||
MixPage encapsulates the common functions of page operation and can seamlessly switch between driver and session modes. Cookies are automatically synchronized when switching.
|
MixPage encapsulates the common functions of page operation and can seamlessly switch between driver and session modes.
|
||||||
The function of obtaining information is shared by the two modes, and the function of operating page elements is only available in mode d. Calling a function unique to a certain mode will automatically switch to that mode.
|
Cookies are automatically synchronized when switching. The function of obtaining information is shared by the two modes,
|
||||||
It inherits from DriverPage and SessionPage classes, these functions are implemented by these two classes, and MixPage exists as a scheduling role.
|
and the function of operating page elements is only available in mode d. Calling a function unique to a certain mode
|
||||||
|
will automatically switch to that mode. It inherits from DriverPage and SessionPage classes, these functions are
|
||||||
|
implemented by these two classes, and MixPage exists as a scheduling role.
|
||||||
|
|
||||||
Parameter Description:
|
Parameter Description:
|
||||||
|
|
||||||
@ -1707,9 +1745,7 @@ Parameter Description:
|
|||||||
|
|
||||||
Returns: None
|
Returns: None
|
||||||
|
|
||||||
|
### to_frame()
|
||||||
|
|
||||||
### to_iframe()
|
|
||||||
|
|
||||||
Jump to iframe, jump to the highest level by default, compatible with selenium native parameters.
|
Jump to iframe, jump to the highest level by default, compatible with selenium native parameters.
|
||||||
|
|
||||||
@ -1718,13 +1754,14 @@ Parameter Description:
|
|||||||
- loc_or_ele:[int, str, tuple, WebElement, DriverElement] - Find the condition of iframe element, can receive iframe serial number (starting at 0), id or name, query string, loc parameter, WebElement object, DriverElement object, and pass in ' main' jump to the highest level, and pass in'parent' to jump to the upper level
|
- loc_or_ele:[int, str, tuple, WebElement, DriverElement] - Find the condition of iframe element, can receive iframe serial number (starting at 0), id or name, query string, loc parameter, WebElement object, DriverElement object, and pass in ' main' jump to the highest level, and pass in'parent' to jump to the upper level
|
||||||
|
|
||||||
Example:
|
Example:
|
||||||
- to_iframe('tag:iframe')- locate by the query string passed in iframe
|
|
||||||
- to_iframe('iframe_id')- Positioning by the id attribute of the iframe
|
- to_frame('tag:iframe')- locate by the query string passed in iframe
|
||||||
- to_iframe('iframe_name')- locate by the name attribute of iframe
|
- to_frame('iframe_id')- Positioning by the id attribute of the iframe
|
||||||
- to_iframe(iframe_element)- locate by passing in the element object
|
- to_frame('iframe_name')- locate by the name attribute of iframe
|
||||||
- to_iframe(0)- locate by the serial number of the iframe
|
- to_frame(iframe_element)- locate by passing in the element object
|
||||||
- to_iframe('main')- jump to the top level
|
- to_frame(0)- locate by the serial number of the iframe
|
||||||
- to_iframe('parent')- jump to the previous level
|
- to_frame('main')- jump to the top level
|
||||||
|
- to_frame('parent')- jump to the previous level
|
||||||
|
|
||||||
Returns: None
|
Returns: None
|
||||||
|
|
||||||
@ -1889,15 +1926,17 @@ Return all attributes and values of the element in a dictionary.
|
|||||||
|
|
||||||
Returns: dict
|
Returns: dict
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
### text
|
### text
|
||||||
|
|
||||||
Returns the text inside the element.
|
Returns the text inside the element.
|
||||||
|
|
||||||
Returns: str
|
Returns: str
|
||||||
|
|
||||||
|
### comments
|
||||||
|
|
||||||
|
Returns the list of comments within the element
|
||||||
|
|
||||||
|
Returns: list
|
||||||
|
|
||||||
### link
|
### link
|
||||||
|
|
||||||
@ -1905,8 +1944,6 @@ Returns absolute href or src value of the element.
|
|||||||
|
|
||||||
Returns: str
|
Returns: str
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
### css_path
|
### css_path
|
||||||
|
|
||||||
Returns the absolute path of the element css selector.
|
Returns the absolute path of the element css selector.
|
||||||
@ -1977,15 +2014,17 @@ Returns the content of the ::before pseudo- element of the current element
|
|||||||
|
|
||||||
Returns: str
|
Returns: str
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
### after
|
### after
|
||||||
|
|
||||||
Returns the content of the ::after pseudo element of the current element
|
Returns the content of the ::after pseudo element of the current element
|
||||||
|
|
||||||
Returns: str
|
Returns: str
|
||||||
|
|
||||||
|
#### select
|
||||||
|
|
||||||
|
If it is a select element, it returns the Select object, otherwise it returns None.
|
||||||
|
|
||||||
|
Returns: Union[Select, None]
|
||||||
|
|
||||||
### texts()
|
### texts()
|
||||||
|
|
||||||
@ -1993,7 +2032,7 @@ Returns the text of all direct child nodes within the element, including element
|
|||||||
|
|
||||||
Parameter Description:
|
Parameter Description:
|
||||||
|
|
||||||
- text_node_only: bool - whether to return only text nodes
|
- text_node_only: bool - whether to return only text nodes
|
||||||
|
|
||||||
Returns: List[str]
|
Returns: List[str]
|
||||||
|
|
||||||
@ -2143,20 +2182,31 @@ Click on the element. If it is unsuccessful, click in js mode. You can specify w
|
|||||||
|
|
||||||
Parameter Description:
|
Parameter Description:
|
||||||
|
|
||||||
- by_js: bool - whether to click with js
|
- by_js: bool - whether to click with js
|
||||||
|
|
||||||
Returns: bool
|
Returns: bool
|
||||||
|
|
||||||
|
### click_at()
|
||||||
|
|
||||||
|
Click this element with offset, relative to the upper left corner coordinate. Click the midpoint of the element when the
|
||||||
### input()
|
x or y value is not passed in, and you can choose whether to click with js.
|
||||||
|
|
||||||
Enter text and return whether it is successful.
|
|
||||||
|
|
||||||
Parameter Description:
|
Parameter Description:
|
||||||
|
|
||||||
- value: str - text value
|
- x: int - The x-axis offset relative to the upper left corner of the element
|
||||||
- clear: bool - whether to clear the text box before typing
|
- y: int - The y-axis offset relative to the upper left corner of the element
|
||||||
|
- by_js: bool - whether to click with js
|
||||||
|
|
||||||
|
Returns: None
|
||||||
|
|
||||||
|
### input()
|
||||||
|
|
||||||
|
Enter text or key combination and return whether it is successful.
|
||||||
|
|
||||||
|
Parameter Description:
|
||||||
|
|
||||||
|
- value: Union[str, tuple] - Text value or key combination
|
||||||
|
- clear: bool - whether to clear the text box before typing
|
||||||
|
|
||||||
Returns: bool
|
Returns: bool
|
||||||
|
|
||||||
@ -2236,18 +2286,6 @@ Returns: str
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
### select()
|
|
||||||
|
|
||||||
Select from the drop- down list.
|
|
||||||
|
|
||||||
Parameter Description:
|
|
||||||
|
|
||||||
- text: str - option text
|
|
||||||
|
|
||||||
Returns: bool - success
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
### set_attr()
|
### set_attr()
|
||||||
|
|
||||||
Set element attributes.
|
Set element attributes.
|
||||||
@ -2361,15 +2399,17 @@ Returns the names and values of all attributes of the element in dictionary form
|
|||||||
|
|
||||||
Returns: dict
|
Returns: dict
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
### text
|
### text
|
||||||
|
|
||||||
Returns the text within the element, namely innerText.
|
Returns the text within the element, namely innerText.
|
||||||
|
|
||||||
Returns: str
|
Returns: str
|
||||||
|
|
||||||
|
### comments
|
||||||
|
|
||||||
|
Returns the list of comments within the element
|
||||||
|
|
||||||
|
Returns: list
|
||||||
|
|
||||||
### link
|
### link
|
||||||
|
|
||||||
@ -2377,8 +2417,6 @@ Returns absolute href or src value of the element.
|
|||||||
|
|
||||||
Returns: str
|
Returns: str
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
### css_path
|
### css_path
|
||||||
|
|
||||||
Returns the absolute path of the element css selector.
|
Returns the absolute path of the element css selector.
|
||||||
@ -2534,7 +2572,8 @@ Returns: [SessionElement, str]
|
|||||||
|
|
||||||
### eles()
|
### eles()
|
||||||
|
|
||||||
Get the list of elements that meet the conditions according to the query parameters. The query parameter usage method is the same as the ele method.
|
Get the list of elements that meet the conditions according to the query parameters. The query parameter usage method is
|
||||||
|
the same as the ele method.
|
||||||
|
|
||||||
Parameter Description:
|
Parameter Description:
|
||||||
|
|
||||||
@ -2542,7 +2581,92 @@ Parameter Description:
|
|||||||
|
|
||||||
Returns: List[SessionElement or str]
|
Returns: List[SessionElement or str]
|
||||||
|
|
||||||
|
## Select class
|
||||||
|
|
||||||
|
### class Select()
|
||||||
|
|
||||||
|
The Select class is specifically used to process select tags in d mode.
|
||||||
|
|
||||||
|
Parameter Description:
|
||||||
|
|
||||||
|
- ele: DriverElement - select element object
|
||||||
|
|
||||||
|
### is_multi
|
||||||
|
|
||||||
|
Returns: bool - Whether to select multiple lists
|
||||||
|
|
||||||
|
### options
|
||||||
|
|
||||||
|
Returns: List[DriverElement] - a list of all selected option elements
|
||||||
|
|
||||||
|
### selected_option
|
||||||
|
|
||||||
|
Returns: Union[DriverElement, None] - the first option element selected
|
||||||
|
|
||||||
|
### selected_options
|
||||||
|
|
||||||
|
Returns: List[DriverElement] -a list of all selected option elements
|
||||||
|
|
||||||
|
### clear()
|
||||||
|
|
||||||
|
Clear all selected items.
|
||||||
|
|
||||||
|
### select()
|
||||||
|
|
||||||
|
Select or deselect child elements in the drop-down list.
|
||||||
|
|
||||||
|
Parameter Description:
|
||||||
|
|
||||||
|
- text_value_index: Union[str, int, list, tuple] - select options based on text, value or serial number. If multiple
|
||||||
|
selections are allowed, multiple selections can be made by inputting a list or tuple
|
||||||
|
|
||||||
|
- para_type: str - parameter type, optional'text','value','index'
|
||||||
|
|
||||||
|
- deselect: bool - Whether to cancel the selection
|
||||||
|
|
||||||
|
Returns: bool - Whether the selection is successful
|
||||||
|
|
||||||
|
### select_multi()
|
||||||
|
|
||||||
|
Select or deselect multiple child elements in the drop-down list.
|
||||||
|
|
||||||
|
Parameter Description:
|
||||||
|
|
||||||
|
- text_value_index: Union[list, tuple] - Select multiple items based on text, value or serial number
|
||||||
|
|
||||||
|
- para_type: str - parameter type, optional'text','value','index'
|
||||||
|
|
||||||
|
- deselect: bool - Whether to cancel the selection
|
||||||
|
|
||||||
|
Returns: Whether the selection is successful
|
||||||
|
|
||||||
|
### deselect()
|
||||||
|
|
||||||
|
Select or deselect child elements in the drop-down list.
|
||||||
|
|
||||||
|
Parameter Description:
|
||||||
|
|
||||||
|
- text_value_index: Union[str, int, list, tuple] - deselect options based on text, value selection, or serial number.
|
||||||
|
If multiple selections are allowed, multiple selections can be made by inputting a list or tuple
|
||||||
|
|
||||||
|
- para_type: str - parameter type, optional'text','value','index'
|
||||||
|
|
||||||
|
Returns: Whether the selection is successful
|
||||||
|
|
||||||
|
### deselect_multi()
|
||||||
|
|
||||||
|
Select or deselect multiple child elements in the drop-down list.
|
||||||
|
|
||||||
|
Parameter Description:
|
||||||
|
|
||||||
|
- text_value_index: Union[list, tuple] - Select multiple items based on text, value selection or serial number
|
||||||
|
- para_type: str - parameter type, optional'text','value','index'
|
||||||
|
|
||||||
|
Returns: Whether the selection is successful
|
||||||
|
|
||||||
|
### invert()
|
||||||
|
|
||||||
|
Invert the election.
|
||||||
|
|
||||||
## ShadowRootElement class
|
## ShadowRootElement class
|
||||||
|
|
||||||
|
288
README.md
Normal file
288
README.md
Normal file
@ -0,0 +1,288 @@
|
|||||||
|
# 简介
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
|
DrissionPage,即 driver 和 session 组合而成的 page。
|
||||||
|
是个基于 python 的 Web 自动化操作集成工具。
|
||||||
|
它实现了 selenium 和 requests 之间的无缝切换。
|
||||||
|
可以兼顾 selenium 的便利性和 requests 的高效率。
|
||||||
|
它集成了页面常用功能,两种模式系统一致的 API,使用便捷。
|
||||||
|
它用 POM 模式封装了页面元素常用的方法,适合自动化操作功能扩展。
|
||||||
|
更棒的是,它的使用方式非常简洁和人性化,代码量少,对新手友好。
|
||||||
|
|
||||||
|
**项目地址:**
|
||||||
|
|
||||||
|
- https://github.com/g1879/DrissionPage
|
||||||
|
- https://gitee.com/g1879/DrissionPage
|
||||||
|
|
||||||
|
**示例地址:** [使用DrissionPage的网页自动化及爬虫示例](https://gitee.com/g1879/DrissionPage-demos)
|
||||||
|
|
||||||
|
**联系邮箱:** g1879@qq.com
|
||||||
|
|
||||||
|
**交流QQ群:** 897838127
|
||||||
|
|
||||||
|
**理念**
|
||||||
|
|
||||||
|
**简洁、易用 、可扩展**
|
||||||
|
|
||||||
|
**背景**
|
||||||
|
|
||||||
|
requests 爬虫面对要登录的网站时,要分析数据包、JS 源码,构造复杂的请求,往往还要应付验证码、JS 混淆、签名参数等反爬手段,门槛较高。若数据是由 JS 计算生成的,还须重现计算过程,体验不好,开发效率不高。
|
||||||
|
使用 selenium,可以很大程度上绕过这些坑,但 selenium 效率不高。因此,这个库将 selenium 和 requests 合而为一,不同须要时切换相应模式,并提供一种人性化的使用方法,提高开发和运行效率。
|
||||||
|
除了合并两者,本库还以网页为单位封装了常用功能,简化了 selenium 的操作和语句,在用于网页自动化操作时,减少考虑细节,专注功能实现,使用更方便。
|
||||||
|
一切从简,尽量提供简单直接的使用方法,对新手更友好。
|
||||||
|
|
||||||
|
# 特性
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
|
- 以简洁的代码为第一追求。
|
||||||
|
- 允许在 selenium 和 requests 间无缝切换,共享 session。
|
||||||
|
- 两种模式提供一致的 API,使用体验一致。
|
||||||
|
- 人性化的页面元素操作方式,减轻页面分析工作量和编码量。
|
||||||
|
- 对常用功能作了整合和优化,更符合实际使用需要。
|
||||||
|
- 兼容 selenium 代码,便于项目迁移。
|
||||||
|
- 使用 POM 模式封装,便于扩展。
|
||||||
|
- 统一的文件下载方法,弥补浏览器下载的不足。
|
||||||
|
- 简易的配置方法,摆脱繁琐的浏览器配置。
|
||||||
|
|
||||||
|
# 项目结构
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
|
**结构图**
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
**Drission 类**
|
||||||
|
|
||||||
|
管理负责与网页通讯的 WebDriver 对象和 Session 对象,相当于驱动器的角色。
|
||||||
|
|
||||||
|
**MixPage 类**
|
||||||
|
|
||||||
|
MixPage 封装了页面操作的常用功能,它调用 Drission 类中管理的驱动器,对页面进行访问、操作。可在 driver 和 session 模式间切换。切换的时候会自动同步登录状态。
|
||||||
|
|
||||||
|
**DriverElement 类**
|
||||||
|
|
||||||
|
driver 模式下的页面元素类,可对元素进行点击、输入文本、修改属性、运行 js 等操作,也可在其下级搜索后代元素。
|
||||||
|
|
||||||
|
**SessionElement 类**
|
||||||
|
|
||||||
|
session 模式下的页面元素类,可获取元素属性值,也可在其下级搜索后代元素。
|
||||||
|
|
||||||
|
# 简单演示
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
|
**与 selenium 代码对比**
|
||||||
|
|
||||||
|
以下代码实现一模一样的功能,对比两者的代码量:
|
||||||
|
|
||||||
|
- 用显性等待方式查找第一个文本包含 some text 的元素
|
||||||
|
|
||||||
|
```python
|
||||||
|
# 使用 selenium:
|
||||||
|
element = WebDriverWait(driver).until(ec.presence_of_element_located((By.XPATH, '//*[contains(text(), "some text")]')))
|
||||||
|
|
||||||
|
# 使用 DrissionPage:
|
||||||
|
element = page('some text')
|
||||||
|
```
|
||||||
|
|
||||||
|
- 跳转到第一个标签页
|
||||||
|
|
||||||
|
```python
|
||||||
|
# 使用 selenium:
|
||||||
|
driver.switch_to.window(driver.window_handles[0])
|
||||||
|
|
||||||
|
# 使用 DrissionPage:
|
||||||
|
page.to_tab(0)
|
||||||
|
```
|
||||||
|
|
||||||
|
- 按文本选择下拉列表
|
||||||
|
|
||||||
|
```python
|
||||||
|
# 使用 selenium:
|
||||||
|
from selenium.webdriver.support.select import Select
|
||||||
|
select_element = Select(element)
|
||||||
|
select_element.select_by_visible_text('text')
|
||||||
|
|
||||||
|
# 使用 DrissionPage:
|
||||||
|
element.select('text')
|
||||||
|
```
|
||||||
|
|
||||||
|
- 拖拽一个元素
|
||||||
|
|
||||||
|
```python
|
||||||
|
# 使用 selenium:
|
||||||
|
ActionChains(driver).drag_and_drop(ele1, ele2).perform()
|
||||||
|
|
||||||
|
# 使用 DrissionPage:
|
||||||
|
ele1.drag_to(ele2)
|
||||||
|
```
|
||||||
|
|
||||||
|
- 滚动窗口到底部(保持水平滚动条不变)
|
||||||
|
|
||||||
|
```python
|
||||||
|
# 使用 selenium:
|
||||||
|
driver.execute_script("window.scrollTo(document.documentElement.scrollLeft, document.body.scrollHeight);")
|
||||||
|
|
||||||
|
# 使用 DrissionPage:
|
||||||
|
page.scroll_to('bottom')
|
||||||
|
```
|
||||||
|
|
||||||
|
- 设置 headless 模式
|
||||||
|
|
||||||
|
```python
|
||||||
|
# 使用 selenium:
|
||||||
|
options = webdriver.ChromeOptions()
|
||||||
|
options.add_argument("--headless")
|
||||||
|
|
||||||
|
# 使用 DrissionPage:
|
||||||
|
set_headless()
|
||||||
|
```
|
||||||
|
|
||||||
|
- 获取伪元素内容
|
||||||
|
|
||||||
|
```python
|
||||||
|
# 使用 selenium:
|
||||||
|
text = webdriver.execute_script('return window.getComputedStyle(arguments[0], "::after").getPropertyValue("content");', element)
|
||||||
|
|
||||||
|
# 使用 DrissionPage:
|
||||||
|
text = element.after
|
||||||
|
```
|
||||||
|
|
||||||
|
- 获取 shadow-root
|
||||||
|
|
||||||
|
```python
|
||||||
|
# 使用 selenium:
|
||||||
|
shadow_element = webdriver.execute_script('return arguments[0].shadowRoot', element)
|
||||||
|
|
||||||
|
# 使用 DrissionPage:
|
||||||
|
shadow_element = element.shadow_root
|
||||||
|
```
|
||||||
|
|
||||||
|
- 用 xpath 直接获取属性或文本节点(返回文本)
|
||||||
|
|
||||||
|
```python
|
||||||
|
# 使用 selenium:
|
||||||
|
相当复杂
|
||||||
|
|
||||||
|
# 使用 DrissionPage:
|
||||||
|
class_name = element('xpath://div[@id="div_id"]/@class')
|
||||||
|
text = element('xpath://div[@id="div_id"]/text()[2]')
|
||||||
|
```
|
||||||
|
|
||||||
|
**与 requests 代码对比**
|
||||||
|
|
||||||
|
以下代码实现一模一样的功能,对比两者的代码量:
|
||||||
|
|
||||||
|
- 获取元素内容
|
||||||
|
|
||||||
|
```python
|
||||||
|
url = 'https://baike.baidu.com/item/python'
|
||||||
|
|
||||||
|
# 使用 requests:
|
||||||
|
from lxml import etree
|
||||||
|
headers = {'User-Agent':'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.118 Safari/537.36'}
|
||||||
|
response = requests.get(url, headers = headers)
|
||||||
|
html = etree.HTML(response.text)
|
||||||
|
element = html.xpath('//h1')[0]
|
||||||
|
title = element.text
|
||||||
|
|
||||||
|
# 使用 DrissionPage:
|
||||||
|
page = MixPage('s')
|
||||||
|
page.get(url)
|
||||||
|
title = page('tag:h1').text
|
||||||
|
```
|
||||||
|
|
||||||
|
Tips: DrissionPage 自带默认 headers
|
||||||
|
|
||||||
|
- 下载文件
|
||||||
|
|
||||||
|
```python
|
||||||
|
url = 'https://www.baidu.com/img/flexible/logo/pc/result.png'
|
||||||
|
save_path = r'C:\download'
|
||||||
|
|
||||||
|
# 使用 requests:
|
||||||
|
r = requests.get(url)
|
||||||
|
with open(f'{save_path}\\img.png', 'wb') as fd:
|
||||||
|
for chunk in r.iter_content():
|
||||||
|
fd.write(chunk)
|
||||||
|
|
||||||
|
# 使用 DrissionPage:
|
||||||
|
page.download(url, save_path, 'img') # 支持重命名,处理文件名冲突
|
||||||
|
```
|
||||||
|
|
||||||
|
**模式切换**
|
||||||
|
|
||||||
|
用 selenium 登录网站,然后切换到 requests 读取网页。两者会共享登录信息。
|
||||||
|
|
||||||
|
```python
|
||||||
|
page = MixPage() # 创建页面对象,默认 driver 模式
|
||||||
|
page.get('https://gitee.com/profile') # 访问个人中心页面(未登录,重定向到登录页面)
|
||||||
|
|
||||||
|
page.ele('@id:user_login').input('your_user_name') # 使用 selenium 输入账号密码登录
|
||||||
|
page.ele('@id:user_password').input('your_password\n')
|
||||||
|
sleep(1)
|
||||||
|
|
||||||
|
page.change_mode() # 切换到 session 模式
|
||||||
|
print('登录后title:', page.title, '\n') # 登录后 session 模式的输出
|
||||||
|
```
|
||||||
|
|
||||||
|
输出:
|
||||||
|
|
||||||
|
```
|
||||||
|
登录后title: 个人资料 - 码云 Gitee.com
|
||||||
|
```
|
||||||
|
|
||||||
|
**获取并打印元素属性**
|
||||||
|
|
||||||
|
```python
|
||||||
|
# 接上段代码
|
||||||
|
foot = page.ele('@id:footer-left') # 用 id 查找元素
|
||||||
|
first_col = foot.ele('css:>div') # 使用 css selector 在元素的下级中查找元素(第一个)
|
||||||
|
lnk = first_col.ele('text:命令学') # 使用文本内容查找元素
|
||||||
|
text = lnk.text # 获取元素文本
|
||||||
|
href = lnk.attr('href') # 获取元素属性值
|
||||||
|
|
||||||
|
print(text, href, '\n')
|
||||||
|
|
||||||
|
# 简洁模式串联查找
|
||||||
|
text = page('@id:footer-left')('css:>div')('text:命令学').text
|
||||||
|
print(text)
|
||||||
|
```
|
||||||
|
|
||||||
|
输出:
|
||||||
|
|
||||||
|
```
|
||||||
|
Git 命令学习 https://oschina.gitee.io/learn-git-branching/
|
||||||
|
|
||||||
|
Git 命令学习
|
||||||
|
```
|
||||||
|
|
||||||
|
**下载文件**
|
||||||
|
|
||||||
|
```python
|
||||||
|
url = 'https://www.baidu.com/img/flexible/logo/pc/result.png'
|
||||||
|
save_path = r'C:\download'
|
||||||
|
page.download(url, save_path)
|
||||||
|
```
|
||||||
|
|
||||||
|
# 使用方法
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
|
请在 Wiki中查看:[点击跳转到wiki](https://gitee.com/g1879/DrissionPage/wikis/%E5%AE%89%E8%A3%85%E4%B8%8E%E5%AF%BC%E5%85%A5?sort_id=3201408)
|
||||||
|
|
||||||
|
# 版本历史
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
|
请在 Wiki中查看:[点击查看版本历史](https://gitee.com/g1879/DrissionPage/wikis/%E7%89%88%E6%9C%AC%E5%8E%86%E5%8F%B2?sort_id=3201403)
|
||||||
|
|
||||||
|
# APIs
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
|
请在 Wiki中查看:[点击查看APIs](https://gitee.com/g1879/DrissionPage/wikis/Drission%20%E7%B1%BB?sort_id=3159323)
|
||||||
|
|
1144
README.zh-cn.md
1144
README.zh-cn.md
File diff suppressed because it is too large
Load Diff
2
setup.py
2
setup.py
@ -8,7 +8,7 @@ with open("README.md", "r", encoding='utf-8') as fh:
|
|||||||
|
|
||||||
setup(
|
setup(
|
||||||
name="DrissionPage",
|
name="DrissionPage",
|
||||||
version="1.5.0",
|
version="1.9.3",
|
||||||
author="g1879",
|
author="g1879",
|
||||||
author_email="g1879@qq.com",
|
author_email="g1879@qq.com",
|
||||||
description="A module that integrates selenium and requests session, encapsulates common page operations.",
|
description="A module that integrates selenium and requests session, encapsulates common page operations.",
|
||||||
|
Loading…
x
Reference in New Issue
Block a user