From 09db94209085605de9295cd7cf58604586ee45f2 Mon Sep 17 00:00:00 2001 From: g1879 Date: Thu, 24 Dec 2020 17:07:17 +0800 Subject: [PATCH 01/94] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E4=B8=8D=E8=83=BD?= =?UTF-8?q?=E8=8E=B7=E5=8F=96content-type=E6=97=B6=E4=BC=9A=E5=87=BA?= =?UTF-8?q?=E7=8E=B0=E7=9A=84=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/session_page.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/DrissionPage/session_page.py b/DrissionPage/session_page.py index 060e50d..7fc2223 100644 --- a/DrissionPage/session_page.py +++ b/DrissionPage/session_page.py @@ -520,14 +520,14 @@ class SessionPage(object): else: # ----------------获取并设置编码开始----------------- # 在headers中获取编码 - content_type = r.headers.get('content-type').lower() + content_type = r.headers.get('content-type', '').lower() charset = re.search(r'charset[=: ]*(.*)?[;]', content_type) if charset: r.encoding = charset.group(1) # 在headers中获取不到编码,且如果是网页 - elif content_type.replace(' ', '').lower().startswith('text/html'): + elif content_type.replace(' ', '').startswith('text/html'): re_result = re_SEARCH(b']+).*?>', r.content) if re_result: From f9ba0625e3efefedd5a692d32c71c9c38aa15453 Mon Sep 17 00:00:00 2001 From: g1879 Date: Thu, 24 Dec 2020 17:07:28 +0800 Subject: [PATCH 02/94] 1.7.7 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 4fa437f..458c8d0 100644 --- a/setup.py +++ b/setup.py @@ -8,7 +8,7 @@ with open("README.md", "r", encoding='utf-8') as fh: setup( name="DrissionPage", - version="1.5.0", + version="1.7.7", author="g1879", author_email="g1879@qq.com", description="A module that integrates selenium and requests session, encapsulates common page operations.", From d10da8d71890bbfe4f11ab95f401dbdd5d63bedd Mon Sep 17 00:00:00 2001 From: g1879 Date: Thu, 24 Dec 2020 17:08:38 +0800 Subject: [PATCH 03/94] =?UTF-8?q?=E5=88=9B=E5=BB=BAdriver=E6=97=B6?= =?UTF-8?q?=E5=8F=AF=E8=87=AA=E5=8A=A8=E4=B8=8B=E8=BD=BDchromedriver.exe?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/config.py | 2 +- DrissionPage/drission.py | 29 ++++++++++++++------ DrissionPage/easy_set.py | 59 ++++++++++++++++++++++++++-------------- 3 files changed, 60 insertions(+), 30 deletions(-) diff --git a/DrissionPage/config.py b/DrissionPage/config.py index 17fccd0..a3e5253 100644 --- a/DrissionPage/config.py +++ b/DrissionPage/config.py @@ -22,7 +22,7 @@ class OptionsManager(object): """初始化,读取配置文件,如没有设置临时文件夹,则设置并新建 \n :param path: ini文件的路径,默认读取模块文件夹下的 """ - self.ini_path = path or str(Path(__file__).parent / 'configs.ini') + self.ini_path = str(Path(__file__).parent / 'configs.ini') if path == 'default' or path is None else path self._conf = RawConfigParser() self._conf.read(self.ini_path, encoding='utf-8') diff --git a/DrissionPage/drission.py b/DrissionPage/drission.py index 6930023..88dde3e 100644 --- a/DrissionPage/drission.py +++ b/DrissionPage/drission.py @@ -92,19 +92,32 @@ class Drission(object): options.add_argument(f'--proxy-server={self._proxy["http"]}') driver_path = self._driver_options.get('driver_path', None) or 'chromedriver' + chrome_path = self._driver_options.get('binary_location', None) or 'chrome.exe' + + if options.debugger_address and _check_port(options.debugger_address) is False: + from subprocess import Popen + port = options.debugger_address.split(':')[-1] + + Popen(f'{chrome_path} --remote-debugging-port={port}', shell=False) try: - if options.debugger_address and _check_port(options.debugger_address) is False: - from subprocess import Popen - port = options.debugger_address.split(':')[-1] - chrome_path = self._driver_options.get('binary_location', None) or 'chrome.exe' - Popen(f'{chrome_path} --remote-debugging-port={port}', shell=False) - self._driver = webdriver.Chrome(driver_path, options=options) except (WebDriverException, SessionNotCreatedException): - print('未指定chromedriver路径或版本与Chrome不匹配,可执行easy_set.get_match_driver()自动下载匹配的版本。') - exit(0) + from .easy_set import get_match_driver + + chrome_path = self._driver_options.get('binary_location', None) or None + driver_path = get_match_driver(chrome_path=chrome_path, check_version=False, show_msg=False) + + if driver_path: + try: + self._driver = webdriver.Chrome(driver_path, options=options) + except: + print('无法启动,请检查chromedriver版本与Chrome是否匹配,并手动设置。') + exit(0) + else: + print('无法启动,请检查chromedriver版本与Chrome是否匹配,并手动设置。') + exit(0) # 反爬设置,似乎没用 self._driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", { diff --git a/DrissionPage/easy_set.py b/DrissionPage/easy_set.py index 59e5c52..a64fadb 100644 --- a/DrissionPage/easy_set.py +++ b/DrissionPage/easy_set.py @@ -190,27 +190,33 @@ def check_driver_version(driver_path: str = None, chrome_path: str = None) -> bo # -------------------------自动识别chrome版本号并下载对应driver------------------------ -def get_match_driver(ini_path: str = None, +def get_match_driver(ini_path: Union[str, None] = 'default', save_path: str = None, - chrome_path: str = None) -> None: + chrome_path: str = None, + show_msg: bool = True, + check_version: bool = True) -> Union[str, None]: """自动识别chrome版本并下载匹配的driver \n :param ini_path: 要读取和修改的ini文件路径 :param save_path: chromedriver保存路径 :param chrome_path: 指定chrome.exe位置 + :param show_msg: 是否打印信息 + :param check_version: 是否检查版本匹配 :return: None """ save_path = save_path or str(Path(__file__).parent) - chrome_path = chrome_path or _get_chrome_path(ini_path) + chrome_path = chrome_path or _get_chrome_path(ini_path, show_msg) chrome_path = Path(chrome_path).absolute() if chrome_path else None - print('chrome.exe路径', chrome_path, '\n') + if show_msg: + print('chrome.exe路径', chrome_path, '\n') ver = _get_chrome_version(chrome_path) - print('version', ver, '\n') + if show_msg: + print('version', ver, '\n') - zip_path = _download_driver(ver, save_path) + zip_path = _download_driver(ver, save_path, show_msg=show_msg) - if not zip_path: + if not zip_path and show_msg: print('没有找到对应版本的driver。') try: @@ -218,28 +224,37 @@ def get_match_driver(ini_path: str = None, except TypeError: driver_path = None - print('\n解压路径', driver_path, '\n') + if show_msg: + print('\n解压路径', driver_path, '\n') if driver_path: Path(zip_path).unlink() - set_paths(driver_path=driver_path, chrome_path=str(chrome_path), ini_path=ini_path, check_version=False) + if ini_path: + set_paths(driver_path=driver_path, chrome_path=str(chrome_path), ini_path=ini_path, check_version=False) - if not check_driver_version(driver_path, chrome_path): - print('获取失败,请手动配置。') + if check_version: + if not check_driver_version(driver_path, chrome_path) and show_msg: + print('获取失败,请手动配置。') else: - print('获取失败,请手动配置。') + if show_msg: + print('获取失败,请手动配置。') + + return driver_path -def _get_chrome_path(ini_path: str = None) -> Union[str, None]: +def _get_chrome_path(ini_path: str = None, show_msg: bool = True) -> Union[str, None]: """从ini文件或系统变量中获取chrome.exe的路径 \n :param ini_path: ini文件路径 :return: chrome.exe路径 """ # -----------从ini文件中获取-------------- - try: - path = OptionsManager(ini_path).chrome_options['binary_location'] - except KeyError: - return None + if ini_path: + try: + path = OptionsManager(ini_path).chrome_options['binary_location'] + except KeyError: + path = None + else: + path = None if path and Path(path).is_file(): print('ini文件中', end='') @@ -253,7 +268,8 @@ def _get_chrome_path(ini_path: str = None) -> Union[str, None]: path = Path(r.group(0)) if 'chrome.exe' in r.group(0) else Path(r.group(0)) / 'chrome.exe' if path.exists(): - print('系统中', end='') + if show_msg: + print('系统中', end='') return str(path) paths = paths.split(';') @@ -262,7 +278,8 @@ def _get_chrome_path(ini_path: str = None) -> Union[str, None]: path = Path(path) / 'chrome.exe' if path.exists(): - print('系统中', end='') + if show_msg: + print('系统中', end='') return str(path) @@ -283,7 +300,7 @@ def _get_chrome_version(path: str) -> Union[str, None]: return None -def _download_driver(version: str, save_path: str = None) -> Union[str, None]: +def _download_driver(version: str, save_path: str = None, show_msg: bool = True) -> Union[str, None]: """根据传入的版本号到镜像网站查找,下载最相近的 \n :param version: 本地版本号 :return: 保存地址 @@ -317,7 +334,7 @@ def _download_driver(version: str, save_path: str = None) -> Union[str, None]: if remote_ver: url = f'https://cdn.npm.taobao.org/dist/chromedriver/{remote_ver}chromedriver_win32.zip' save_path = save_path or Path(__file__).parent - result = page.download(url, save_path, file_exists='overwrite', show_msg=True) + result = page.download(url, save_path, file_exists='overwrite', show_msg=show_msg) if result[0]: return result[1] From ba3943da9792dffdfe39f3e4014b506cfd7b91e9 Mon Sep 17 00:00:00 2001 From: g1879 Date: Sat, 26 Dec 2020 17:46:59 +0800 Subject: [PATCH 04/94] =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E4=BB=8E=E6=B3=A8?= =?UTF-8?q?=E5=86=8C=E8=A1=A8=E8=8E=B7=E5=8F=96chrome.exe=E8=B7=AF?= =?UTF-8?q?=E5=BE=84?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/easy_set.py | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/DrissionPage/easy_set.py b/DrissionPage/easy_set.py index a64fadb..cb93160 100644 --- a/DrissionPage/easy_set.py +++ b/DrissionPage/easy_set.py @@ -277,10 +277,30 @@ def _get_chrome_path(ini_path: str = None, show_msg: bool = True) -> Union[str, for path in paths: path = Path(path) / 'chrome.exe' - if path.exists(): - if show_msg: - print('系统中', end='') - return str(path) + try: + if path.exists(): + if show_msg: + print('系统变量中', end='') + return str(path) + except OSError: + pass + + # -----------从注册表中获取-------------- + import winreg + try: + key = winreg.OpenKey(winreg.HKEY_LOCAL_MACHINE, + r'SOFTWARE\Microsoft\Windows\CurrentVersion\App Paths\chrome.exe', + reserved=0, access=winreg.KEY_READ) + k = winreg.EnumValue(key, 0) + winreg.CloseKey(key) + + if show_msg: + print('注册表中', end='') + + return k[1] + + except FileNotFoundError: + return def _get_chrome_version(path: str) -> Union[str, None]: From 9e923c8f5596f603e9798997597654d69ff0f19a Mon Sep 17 00:00:00 2001 From: g1879 Date: Sun, 27 Dec 2020 01:28:15 +0800 Subject: [PATCH 05/94] =?UTF-8?q?=E4=BF=AE=E5=A4=8Dset=5Fcookies()?= =?UTF-8?q?=E6=97=B6=E4=B8=8D=E8=83=BD=E6=AD=A3=E7=A1=AE=E8=A6=86=E7=9B=96?= =?UTF-8?q?=E4=B8=80=E7=BA=A7=E5=9F=9F=E5=90=8D=E5=89=8D=E9=9D=A2=E6=B2=A1?= =?UTF-8?q?=E6=9C=89.=E7=9A=84cookie=E7=9A=84=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/drission.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/DrissionPage/drission.py b/DrissionPage/drission.py index 88dde3e..01eb81d 100644 --- a/DrissionPage/drission.py +++ b/DrissionPage/drission.py @@ -227,6 +227,11 @@ class Drission(object): self.driver.get(cookie_domain if cookie_domain.startswith('http://') else f'http://{cookie_domain}') + # 避免selenium自动添加.后无法正确覆盖已有cookie + c = self.driver.get_cookie(cookie['name']) + if c and c['domain'] == cookie['domain']: + self.driver.delete_cookie(cookie['name']) + self.driver.add_cookie(cookie) def _set_session(self, data: dict) -> None: From 775e9d5ca24c91e1449cc1b6831f4ccd9deaa5ce Mon Sep 17 00:00:00 2001 From: g1879 Date: Sun, 27 Dec 2020 10:47:45 +0800 Subject: [PATCH 06/94] 1.7.8 --- README.en.md | 34 +++++++++++++++++++++++++--------- README.zh-cn.md | 11 +++++++++-- setup.py | 2 +- 3 files changed, 35 insertions(+), 12 deletions(-) diff --git a/README.en.md b/README.en.md index 78aa983..83d9e4d 100644 --- a/README.en.md +++ b/README.en.md @@ -318,26 +318,42 @@ The get_match_driver() method in the easy_set tool can automatically identify th from DrissionPage import MixPage ``` - - ## Initialization If you only use session mode, you can skip this section. -Before using selenium, you must configure the path of chrome.exe and chromedriver.exe and ensure that their versions match. +Before using selenium, you must configure the path of chrome.exe and chromedriver.exe and ensure that their versions +match. +In the new version, if the program finds that their versions do not match when running, it will automatically download +the corresponding version and set the path. If there is no special need, no manual intervention is required. There are four ways to configure the path: --Use the get_match_driver() method of the easy_set tool (recommended) --Write the path to the ini file of this library --Write two paths to system variables --Manually pass in the path when using -### Use get_match_driver() method +- Run directly, let the program automatically complete the settings (recommended) -If you choose the first method, please run the following code before using it for the first time. The program will automatically detect the Chrome version installed on your computer, download the corresponding driver, and record it in the ini file. +- Use the get_match_driver() method of the easy_set tool + +- Write the path to the ini file of this library + +- Write two paths to system variables + +- Fill in the path in the code + +**auto configuration** + +In the new version, you don't need to do any configuration, just run the program directly, the program will get the path +of chrome.exe in the system, and automatically download the chromedriver.exe that matches the version. No feeling at +all. If you need to set the chrome.exe used by yourself, you can use the following method. + +**Use the get_match_driver() method** + +If you choose this method, please run the following code before using it for the first time. The program will +automatically detect the chrome version installed on your computer, download the corresponding driver, and record it in +the ini file. ```python from DrissionPage.easy_set import get_match_driver + get_match_driver() ``` diff --git a/README.zh-cn.md b/README.zh-cn.md index e229543..5959311 100644 --- a/README.zh-cn.md +++ b/README.zh-cn.md @@ -315,16 +315,23 @@ from DrissionPage import MixPage > 如果你只使用 session 模式,可跳过本节。 使用 selenium 前,必须配置 chrome.exe 和 chromedriver.exe 的路径,并确保它们版本匹配。 +新版本中,运行时若程序发现它们版本不匹配,会自动下载对应版本并设置路径。如无特殊须要,无须手动干预。 配置路径有四种方法: -- 使用 easy_set 工具的 get_match_driver() 方法(推荐) + +- 直接运行,让程序自动完成设置(推荐) +- 使用 easy_set 工具的 get_match_driver() 方法 - 将路径写入本库的 ini 文件 - 将两个路径写入系统变量 - 在代码中填写路径 +**自动设置** + +新版本中,您无须做任何配置,只要直接运行程序,程序会获取系统中 chrome.exe 路径,并自动下载版本匹配的 chromedriver.exe。全程无感。如须自行设置所使用的 chrome.exe,可用下面的方法。 + **使用 get_match_driver() 方法** -若你选择第一种方式,请在第一次使用前,运行以下代码,程序会自动检测电脑安装的 chrome 版本,下载对应 driver,并记录到 ini 文件。 +若你选择这种方式,请在第一次使用前,运行以下代码,程序会自动检测电脑安装的 chrome 版本,下载对应 driver,并记录到 ini 文件。 ```python from DrissionPage.easy_set import get_match_driver diff --git a/setup.py b/setup.py index 458c8d0..779358a 100644 --- a/setup.py +++ b/setup.py @@ -8,7 +8,7 @@ with open("README.md", "r", encoding='utf-8') as fh: setup( name="DrissionPage", - version="1.7.7", + version="1.7.8", author="g1879", author_email="g1879@qq.com", description="A module that integrates selenium and requests session, encapsulates common page operations.", From b0444fddd9cc79065d25e4b662e15fe21bab511c Mon Sep 17 00:00:00 2001 From: g1879 Date: Sun, 27 Dec 2020 10:50:34 +0800 Subject: [PATCH 07/94] =?UTF-8?q?=E4=BF=AE=E5=A4=8Dset=5Fcookies()?= =?UTF-8?q?=E6=B7=BB=E5=8A=A0cookie=E6=97=B6=EF=BC=8Cselenium=E8=87=AA?= =?UTF-8?q?=E5=8A=A8=E5=9C=A8=E4=BA=8C=E7=BA=A7=E5=9F=9F=E5=90=8D=E5=89=8D?= =?UTF-8?q?=E6=B7=BB=E5=8A=A0.=E5=AF=BC=E8=87=B4=E6=97=A0=E6=B3=95?= =?UTF-8?q?=E6=AD=A3=E7=A1=AE=E8=A6=86=E7=9B=96cookie=E7=9A=84=E9=97=AE?= =?UTF-8?q?=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/drission.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/DrissionPage/drission.py b/DrissionPage/drission.py index 01eb81d..65bbb30 100644 --- a/DrissionPage/drission.py +++ b/DrissionPage/drission.py @@ -228,9 +228,10 @@ class Drission(object): else f'http://{cookie_domain}') # 避免selenium自动添加.后无法正确覆盖已有cookie - c = self.driver.get_cookie(cookie['name']) - if c and c['domain'] == cookie['domain']: - self.driver.delete_cookie(cookie['name']) + if cookie['domain'][0] != '.': + c = self.driver.get_cookie(cookie['name']) + if c and c['domain'] == cookie['domain']: + self.driver.delete_cookie(cookie['name']) self.driver.add_cookie(cookie) From 48049e4b03690c8fc522e532dfac224ec4a034be Mon Sep 17 00:00:00 2001 From: g1879 Date: Mon, 28 Dec 2020 11:44:59 +0800 Subject: [PATCH 08/94] =?UTF-8?q?=E8=B0=83=E6=95=B4=E8=8E=B7=E5=8F=96chrom?= =?UTF-8?q?e.exe=E6=96=B9=E6=B3=95=E6=AC=A1=E5=BA=8F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/easy_set.py | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/DrissionPage/easy_set.py b/DrissionPage/easy_set.py index cb93160..2d67807 100644 --- a/DrissionPage/easy_set.py +++ b/DrissionPage/easy_set.py @@ -260,6 +260,23 @@ def _get_chrome_path(ini_path: str = None, show_msg: bool = True) -> Union[str, print('ini文件中', end='') return str(path) + # -----------从注册表中获取-------------- + import winreg + try: + key = winreg.OpenKey(winreg.HKEY_LOCAL_MACHINE, + r'SOFTWARE\Microsoft\Windows\CurrentVersion\App Paths\chrome.exe', + reserved=0, access=winreg.KEY_READ) + k = winreg.EnumValue(key, 0) + winreg.CloseKey(key) + + if show_msg: + print('注册表中', end='') + + return k[1] + + except FileNotFoundError: + pass + # -----------从系统路径中获取-------------- paths = popen('set path').read().lower() r = RE_SEARCH(r'[^;]*chrome[^;]*', paths) @@ -285,23 +302,6 @@ def _get_chrome_path(ini_path: str = None, show_msg: bool = True) -> Union[str, except OSError: pass - # -----------从注册表中获取-------------- - import winreg - try: - key = winreg.OpenKey(winreg.HKEY_LOCAL_MACHINE, - r'SOFTWARE\Microsoft\Windows\CurrentVersion\App Paths\chrome.exe', - reserved=0, access=winreg.KEY_READ) - k = winreg.EnumValue(key, 0) - winreg.CloseKey(key) - - if show_msg: - print('注册表中', end='') - - return k[1] - - except FileNotFoundError: - return - def _get_chrome_version(path: str) -> Union[str, None]: """根据文件路径获取版本号 \n From 51cef7fa7f60957bcdf39e307333a3643b49a6d8 Mon Sep 17 00:00:00 2001 From: g1879 Date: Mon, 28 Dec 2020 15:08:43 +0800 Subject: [PATCH 09/94] =?UTF-8?q?=E9=A1=B5=E9=9D=A2=E7=B1=BB=E6=B7=BB?= =?UTF-8?q?=E5=8A=A0retry=5Ftimes=E5=92=8Cretry=5Finterval=E5=B1=9E?= =?UTF-8?q?=E6=80=A7?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/driver_page.py | 16 +++++++++++++--- DrissionPage/mix_page.py | 11 +++++++---- DrissionPage/session_page.py | 15 +++++++++++---- 3 files changed, 31 insertions(+), 11 deletions(-) diff --git a/DrissionPage/driver_page.py b/DrissionPage/driver_page.py index 924c778..874179f 100644 --- a/DrissionPage/driver_page.py +++ b/DrissionPage/driver_page.py @@ -30,6 +30,9 @@ class DriverPage(object): self._url_available = None self._wait = None + self.retry_times = 3 + self.retry_interval = 2 + @property def driver(self) -> WebDriver: return self._driver @@ -99,22 +102,24 @@ class DriverPage(object): """ self.driver.get(to_url) is_ok = self.check_page() + while times and is_ok is False: sleep(interval) self.driver.get(to_url) is_ok = self.check_page() times -= 1 + if is_ok is False and show_errmsg: raise ConnectionError('Connect error.') + return is_ok def get(self, url: str, go_anyway: bool = False, show_errmsg: bool = False, - retry: int = 0, - interval: float = 1, - ) -> Union[None, bool]: + retry: int = None, + interval: float = None) -> Union[None, bool]: """访问url \n :param url: 目标url :param go_anyway: 若目标url与当前url一致,是否强制跳转 @@ -124,10 +129,15 @@ class DriverPage(object): :return: 目标url是否可用 """ to_url = quote(url, safe='/:&?=%;#@') + retry = int(retry) if retry is not None else int(self.retry_times) + interval = int(interval) if interval is not None else int(self.retry_interval) + if not url or (not go_anyway and self.url == to_url): return + self._url = to_url self._url_available = self._try_to_connect(to_url, times=retry, interval=interval, show_errmsg=show_errmsg) + return self._url_available def ele(self, diff --git a/DrissionPage/mix_page.py b/DrissionPage/mix_page.py index 4e4e0a7..9961281 100644 --- a/DrissionPage/mix_page.py +++ b/DrissionPage/mix_page.py @@ -58,6 +58,9 @@ class MixPage(Null, SessionPage, DriverPage): self._url_available = None self._mode = mode + self.retry_times = 3 + self.retry_interval = 2 + if mode == 's': self._driver = None self._session = True @@ -236,8 +239,8 @@ class MixPage(Null, SessionPage, DriverPage): data: dict = None, go_anyway: bool = False, show_errmsg: bool = False, - retry: int = 2, - interval: float = 1, + retry: int = None, + interval: float = None, **kwargs) -> Union[bool, None]: """用post方式跳转到url,会切换到s模式 \n :param url: 目标url @@ -319,8 +322,8 @@ class MixPage(Null, SessionPage, DriverPage): url: str, go_anyway=False, show_errmsg: bool = False, - retry: int = 2, - interval: float = 1, + retry: int = None, + interval: float = None, **kwargs) -> Union[bool, None]: """跳转到一个url \n 跳转前先同步cookies,跳转后判断目标url是否可用 diff --git a/DrissionPage/session_page.py b/DrissionPage/session_page.py index 7fc2223..372d391 100644 --- a/DrissionPage/session_page.py +++ b/DrissionPage/session_page.py @@ -33,6 +33,9 @@ class SessionPage(object): self._url_available = None self._response = None + self.retry_times = 3 + self.retry_interval = 2 + @property def session(self) -> Session: """返回session对象""" @@ -212,8 +215,8 @@ class SessionPage(object): url: str, go_anyway: bool = False, show_errmsg: bool = False, - retry: int = 0, - interval: float = 1, + retry: int = None, + interval: float = None, **kwargs) -> Union[bool, None]: """用get方式跳转到url \n :param url: 目标url @@ -225,6 +228,8 @@ class SessionPage(object): :return: url是否可用 """ to_url = quote(url, safe='/:&?=%;#@+') + retry = int(retry) if retry is not None else int(self.retry_times) + interval = int(interval) if interval is not None else int(self.retry_interval) if not url or (not go_anyway and self.url == to_url): return @@ -252,8 +257,8 @@ class SessionPage(object): data: dict = None, go_anyway: bool = True, show_errmsg: bool = False, - retry: int = 0, - interval: float = 1, + retry: int = None, + interval: float = None, **kwargs) -> Union[bool, None]: """用post方式跳转到url \n :param url: 目标url @@ -266,6 +271,8 @@ class SessionPage(object): :return: url是否可用 """ to_url = quote(url, safe='/:&?=%;#@') + retry = int(retry) if retry is not None else int(self.retry_times) + interval = int(interval) if interval is not None else int(self.retry_interval) if not url or (not go_anyway and self._url == to_url): return From 88f22bad842df74ba6652c141aa3f116d98d08be Mon Sep 17 00:00:00 2001 From: g1879 Date: Mon, 28 Dec 2020 15:08:56 +0800 Subject: [PATCH 10/94] 1.7.10 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 779358a..082aa22 100644 --- a/setup.py +++ b/setup.py @@ -8,7 +8,7 @@ with open("README.md", "r", encoding='utf-8') as fh: setup( name="DrissionPage", - version="1.7.8", + version="1.7.10", author="g1879", author_email="g1879@qq.com", description="A module that integrates selenium and requests session, encapsulates common page operations.", From 7f6285f3cd238689f4045e3b57533cda786d7431 Mon Sep 17 00:00:00 2001 From: g1879 Date: Tue, 29 Dec 2020 23:07:29 +0800 Subject: [PATCH 11/94] =?UTF-8?q?=E5=AE=8C=E5=96=84=E5=AF=B9xpath=E8=8E=B7?= =?UTF-8?q?=E5=8F=96=E6=95=B0=E5=AD=97=E7=BB=93=E6=9E=9C=E7=9A=84=E6=94=AF?= =?UTF-8?q?=E6=8C=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/driver_element.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/DrissionPage/driver_element.py b/DrissionPage/driver_element.py index 68f5a9c..79c3e2b 100644 --- a/DrissionPage/driver_element.py +++ b/DrissionPage/driver_element.py @@ -627,7 +627,7 @@ class ElementsByXpath(object): """用js通过xpath获取元素、节点或属性 :param node: 'document' 或 元素对象 :param xpath_txt: xpath语句 - :param type_txt: resultType,参考https://developer.mozilla.org/zh-CN/docs/Web/API/Document/evaluate + :param type_txt: resultType,参考 https://developer.mozilla.org/zh-CN/docs/Web/API/Document/evaluate :return: 元素对象或属性、文本字符串 """ node_txt = 'document' if not node or node == 'document' else 'arguments[0]' @@ -685,8 +685,14 @@ class ElementsByXpath(object): return e # 找不到目标时 - except JavascriptException: - return None + except JavascriptException as err: + if 'The result is not a node set' in err.msg: + try: + return get_nodes(the_node, xpath_txt=self.xpath, type_txt='1') + except JavascriptException: + return None + else: + return None elif self.mode == 'all': return ([DriverElement(x, self.page) if isinstance(x, WebElement) From 67eb48fd972970aa6be65c367b4bfeef5855cae9 Mon Sep 17 00:00:00 2001 From: g1879 Date: Tue, 29 Dec 2020 23:07:43 +0800 Subject: [PATCH 12/94] 1.7.11 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 082aa22..de43b26 100644 --- a/setup.py +++ b/setup.py @@ -8,7 +8,7 @@ with open("README.md", "r", encoding='utf-8') as fh: setup( name="DrissionPage", - version="1.7.10", + version="1.7.11", author="g1879", author_email="g1879@qq.com", description="A module that integrates selenium and requests session, encapsulates common page operations.", From b33b14c3a08503aa74afd6d3b1507f2facd35218 Mon Sep 17 00:00:00 2001 From: g1879 Date: Tue, 29 Dec 2020 23:29:52 +0800 Subject: [PATCH 13/94] 1.7.12 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index de43b26..ac7b8d7 100644 --- a/setup.py +++ b/setup.py @@ -8,7 +8,7 @@ with open("README.md", "r", encoding='utf-8') as fh: setup( name="DrissionPage", - version="1.7.11", + version="1.7.12", author="g1879", author_email="g1879@qq.com", description="A module that integrates selenium and requests session, encapsulates common page operations.", From 1badd08827ac1bb638e3125d7a6deea247401802 Mon Sep 17 00:00:00 2001 From: g1879 Date: Tue, 29 Dec 2020 23:30:21 +0800 Subject: [PATCH 14/94] =?UTF-8?q?=E4=BF=AE=E5=A4=8Dxpath=E8=BF=94=E5=9B=9E?= =?UTF-8?q?=E6=95=B0=E5=AD=97=E6=97=B6=E5=87=BA=E9=94=99=E7=9A=84=E9=97=AE?= =?UTF-8?q?=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/session_element.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/DrissionPage/session_element.py b/DrissionPage/session_element.py index 47b3fe5..fe01d14 100644 --- a/DrissionPage/session_element.py +++ b/DrissionPage/session_element.py @@ -368,6 +368,9 @@ def execute_session_find(page_or_ele, else: ele = page_or_ele.cssselect(loc[1]) + if not isinstance(ele, list): + return ele + # 把lxml元素对象包装成SessionElement对象并按需要返回第一个或全部 if mode == 'single': ele = ele[0] if ele else None From 5187a6b5b12ee0166e34f67e9ee8760df4a84269 Mon Sep 17 00:00:00 2001 From: g1879 Date: Thu, 31 Dec 2020 00:33:59 +0800 Subject: [PATCH 15/94] =?UTF-8?q?=E4=BC=98=E5=8C=96s=E6=A8=A1=E5=BC=8Fhtml?= =?UTF-8?q?=E5=92=8Ctext=EF=BC=8C=E5=8E=BB=E9=99=A4=E7=A9=BA=E6=A0=BC?= =?UTF-8?q?=E5=92=8C=E5=9B=9E=E8=BD=A6=EF=BC=8C=E6=9C=AA=E5=AE=8C=E6=88=90?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/common.py | 7 +++++-- DrissionPage/session_element.py | 31 +++++++++++++++++++++++-------- 2 files changed, 28 insertions(+), 10 deletions(-) diff --git a/DrissionPage/common.py b/DrissionPage/common.py index c062606..da2d22c 100644 --- a/DrissionPage/common.py +++ b/DrissionPage/common.py @@ -201,9 +201,12 @@ def _make_search_str(search_str: str) -> str: return search_str -def format_html(text: str) -> str: +def format_html(text: str, replace_space: bool = True) -> str: """处理html编码字符""" - return unescape(text).replace('\xa0', ' ') if text else text + if text: + return unescape(text).replace('\xa0', ' ') if replace_space else unescape(text) + else: + return text def translate_loc(loc: tuple) -> tuple: diff --git a/DrissionPage/session_element.py b/DrissionPage/session_element.py index fe01d14..bfb3f95 100644 --- a/DrissionPage/session_element.py +++ b/DrissionPage/session_element.py @@ -37,8 +37,15 @@ class SessionElement(DrissionElement): def html(self) -> str: """返回元素outerHTML文本""" # tostring()会把跟紧元素的文本节点也带上,因此要去掉 + # print(tostring(self._inner_ele, method="html").decode()) html = format_html(tostring(self._inner_ele, method="html").decode()) + # print(html) return html[:html.rfind('>') + 1] + # return format_html(html[:html.rfind('>') + 1],False) + + # def _html(self) -> str: + # html = tostring(self._inner_ele, method="html").decode() + # return html[:html.rfind('>') + 1] @property def inner_html(self) -> str: @@ -59,7 +66,19 @@ class SessionElement(DrissionElement): @property def text(self) -> str: """返回元素内所有文本""" - return str(self._inner_ele.text_content()) + html = format_html(tostring(self._inner_ele, method="html").decode(), False) + html = html[:html.rfind('>') + 1] + + txt = re.sub(r'<.*?>', '', html).replace('\n', ' ') + txt = re.sub(r' {2,}', ' ', txt).strip() + # return format_html(txt) + return txt + + # return t + # return str(self._inner_ele.text_content()) + # return self._inner_ele.text_content() + + # txt = str(self._inner_ele.text_content()).replace('\n', ' ') @property def link(self) -> str: @@ -284,12 +303,6 @@ class SessionElement(DrissionElement): ele = self while ele: - # ele_id = ele.attr('id') - - # if ele_id: - # return f'#{ele_id}{path_str}' if mode == 'css' else f'//{ele.tag}[@id="{ele_id}"]{path_str}' - # else: - if mode == 'css': brothers = len(ele.eles(f'xpath:./preceding-sibling::*')) path_str = f'>:nth-child({brothers + 1}){path_str}' @@ -357,7 +370,8 @@ def execute_session_find(page_or_ele, page_or_ele = page_or_ele.inner_ele else: # 传入的是SessionPage对象 page = page_or_ele - page_or_ele = fromstring(page_or_ele.html) + # page_or_ele = fromstring(page_or_ele.html) + page_or_ele = fromstring(page_or_ele.response.text, False) try: # 用lxml内置方法获取lxml的元素对象列表 @@ -368,6 +382,7 @@ def execute_session_find(page_or_ele, else: ele = page_or_ele.cssselect(loc[1]) + # 结果不是列表,如数字 if not isinstance(ele, list): return ele From eec2a510c407f655724f9770d817f40ef8ba8a57 Mon Sep 17 00:00:00 2001 From: g1879 Date: Thu, 31 Dec 2020 18:16:26 +0800 Subject: [PATCH 16/94] =?UTF-8?q?=E7=BB=A7=E7=BB=AD=E7=A0=94=E7=A9=B6text?= =?UTF-8?q?=E7=9A=84=E6=98=BE=E7=A4=BA=EF=BC=8C=E6=9C=AA=E5=AE=8C=E6=88=90?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/session_element.py | 76 +++++++++++++++++++++------------ 1 file changed, 49 insertions(+), 27 deletions(-) diff --git a/DrissionPage/session_element.py b/DrissionPage/session_element.py index bfb3f95..bde0973 100644 --- a/DrissionPage/session_element.py +++ b/DrissionPage/session_element.py @@ -37,15 +37,8 @@ class SessionElement(DrissionElement): def html(self) -> str: """返回元素outerHTML文本""" # tostring()会把跟紧元素的文本节点也带上,因此要去掉 - # print(tostring(self._inner_ele, method="html").decode()) html = format_html(tostring(self._inner_ele, method="html").decode()) - # print(html) return html[:html.rfind('>') + 1] - # return format_html(html[:html.rfind('>') + 1],False) - - # def _html(self) -> str: - # html = tostring(self._inner_ele, method="html").decode() - # return html[:html.rfind('>') + 1] @property def inner_html(self) -> str: @@ -53,6 +46,53 @@ class SessionElement(DrissionElement): r = re.match(r'<.*?>(.*)', self.html, flags=re.DOTALL) return '' if not r else r.group(1) + @property + def text(self) -> str: + """返回元素内所有文本""" + + # html = tostring(self._inner_ele, method="html").decode() + # html = html[:html.rfind('>') + 1] + # html = re.sub(r'<.*?>', '', html).strip('\n ') + # html = format_html(re.sub(r' {2,}', ' ', html)) + # html = format_html(re.sub(r'\n{2,}', '\n', html)) + # html = format_html(re.sub(r'( \n){2,}', '\n', html)) + # html = format_html(re.sub(r'(\n ){2,}', '\n', html)) + # return html + + # return format_html(str(self._inner_ele.text_content())) + # return format_html(str(self._inner_ele.text_content()))#.replace('\n','') + + def get_node(ele): + l = [] + for el in ele.eles('xpath:./node()'): + if isinstance(el, str): + s = el.replace(' ', '').replace('\n', '') + # print('字符串', [s]) + if s != '': + l.append(s.strip(' \n')) + else: + l.append('\n') + else: + # print('元素', el) + l.extend(get_node(el)) + return l + + # for i in self.eles('xpath:./*'): + # print([i]) + + # l = [] + # for el in get_node(self): + # if isinstance(el,str): + # print('字符串') + # print(el) + # l.append(el) + # else: + # print('元素') + # print(el._inner_ele.text) + # l.append(el._inner_ele.text) + s = ''.join(get_node(self)) + return re.sub(r'\n{2,}', '\n', s) + @property def tag(self) -> str: """返回元素类型""" @@ -63,23 +103,6 @@ class SessionElement(DrissionElement): """返回元素所有属性及值""" return {attr: self.attr(attr) for attr, val in self.inner_ele.items()} - @property - def text(self) -> str: - """返回元素内所有文本""" - html = format_html(tostring(self._inner_ele, method="html").decode(), False) - html = html[:html.rfind('>') + 1] - - txt = re.sub(r'<.*?>', '', html).replace('\n', ' ') - txt = re.sub(r' {2,}', ' ', txt).strip() - # return format_html(txt) - return txt - - # return t - # return str(self._inner_ele.text_content()) - # return self._inner_ele.text_content() - - # txt = str(self._inner_ele.text_content()).replace('\n', ' ') - @property def link(self) -> str: """返回href或src绝对url""" @@ -315,7 +338,7 @@ class SessionElement(DrissionElement): return path_str[1:] if mode == 'css' else path_str def _get_brother(self, num: int = 1, mode: str = 'ele', direction: str = 'next'): - """返回前面第num个兄弟元素或节点 \n + """返回前面或后面第num个兄弟元素或节点 \n :param num: 前面第几个兄弟元素或节点 :param mode: 'ele', 'node' 或 'text',匹配元素、节点、或文本节点 :param direction: 'next' 或 'prev',查找的方向 @@ -370,8 +393,7 @@ def execute_session_find(page_or_ele, page_or_ele = page_or_ele.inner_ele else: # 传入的是SessionPage对象 page = page_or_ele - # page_or_ele = fromstring(page_or_ele.html) - page_or_ele = fromstring(page_or_ele.response.text, False) + page_or_ele = fromstring(format_html(page_or_ele.response.text, False)) try: # 用lxml内置方法获取lxml的元素对象列表 From 4ff724e6d492146956ac54b8924680cb931eb41b Mon Sep 17 00:00:00 2001 From: g1879 Date: Thu, 31 Dec 2020 18:16:34 +0800 Subject: [PATCH 17/94] =?UTF-8?q?=E4=BF=AE=E6=94=B9=E9=94=99=E5=88=AB?= =?UTF-8?q?=E5=AD=97?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/session_page.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DrissionPage/session_page.py b/DrissionPage/session_page.py index 372d391..640173c 100644 --- a/DrissionPage/session_page.py +++ b/DrissionPage/session_page.py @@ -476,7 +476,7 @@ class SessionPage(object): :param data: post方式要提交的数据 :param show_errmsg: 是否显示和抛出异常 :param kwargs: 其它参数 - :return: tuple,第一位为Response或None,第二位为出错信息或'Sussess' + :return: tuple,第一位为Response或None,第二位为出错信息或'Success' """ if not url: if show_errmsg: From c7a8af6fe4aacfad720096021037b601d5020ffa Mon Sep 17 00:00:00 2001 From: g1879 Date: Fri, 1 Jan 2021 00:33:45 +0800 Subject: [PATCH 18/94] =?UTF-8?q?s=E6=A8=A1=E5=BC=8F=E5=85=83=E7=B4=A0text?= =?UTF-8?q?=E5=B0=BD=E9=87=8F=E4=B8=8Ed=E6=A8=A1=E5=BC=8F=E4=BF=9D?= =?UTF-8?q?=E6=8C=81=E4=B8=80=E8=87=B4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/common.py | 7 ++--- DrissionPage/session_element.py | 54 +++++++++++---------------------- 2 files changed, 19 insertions(+), 42 deletions(-) diff --git a/DrissionPage/common.py b/DrissionPage/common.py index da2d22c..f7a4d06 100644 --- a/DrissionPage/common.py +++ b/DrissionPage/common.py @@ -201,12 +201,9 @@ def _make_search_str(search_str: str) -> str: return search_str -def format_html(text: str, replace_space: bool = True) -> str: +def format_html(text: str) -> str: """处理html编码字符""" - if text: - return unescape(text).replace('\xa0', ' ') if replace_space else unescape(text) - else: - return text + return unescape(text) if text else text def translate_loc(loc: tuple) -> tuple: diff --git a/DrissionPage/session_element.py b/DrissionPage/session_element.py index bde0973..7d402ff 100644 --- a/DrissionPage/session_element.py +++ b/DrissionPage/session_element.py @@ -38,7 +38,7 @@ class SessionElement(DrissionElement): """返回元素outerHTML文本""" # tostring()会把跟紧元素的文本节点也带上,因此要去掉 html = format_html(tostring(self._inner_ele, method="html").decode()) - return html[:html.rfind('>') + 1] + return format_html(html[:html.rfind('>') + 1]) @property def inner_html(self) -> str: @@ -50,48 +50,27 @@ class SessionElement(DrissionElement): def text(self) -> str: """返回元素内所有文本""" - # html = tostring(self._inner_ele, method="html").decode() - # html = html[:html.rfind('>') + 1] - # html = re.sub(r'<.*?>', '', html).strip('\n ') - # html = format_html(re.sub(r' {2,}', ' ', html)) - # html = format_html(re.sub(r'\n{2,}', '\n', html)) - # html = format_html(re.sub(r'( \n){2,}', '\n', html)) - # html = format_html(re.sub(r'(\n ){2,}', '\n', html)) - # return html - - # return format_html(str(self._inner_ele.text_content())) - # return format_html(str(self._inner_ele.text_content()))#.replace('\n','') - + # 为尽量保证与浏览器结果一致,弄得比较复杂 def get_node(ele): - l = [] + str_list = [] for el in ele.eles('xpath:./node()'): if isinstance(el, str): - s = el.replace(' ', '').replace('\n', '') - # print('字符串', [s]) - if s != '': - l.append(s.strip(' \n')) + if el.replace(' ', '').replace('\n', '') != '': + str_list.append(el.replace('\xa0', ' ').replace('\n', ' ').strip()) + elif '\n' in el: + str_list.append('\n') else: - l.append('\n') + str_list.append(' ') else: - # print('元素', el) - l.extend(get_node(el)) - return l + str_list.extend(get_node(el)) - # for i in self.eles('xpath:./*'): - # print([i]) + return str_list - # l = [] - # for el in get_node(self): - # if isinstance(el,str): - # print('字符串') - # print(el) - # l.append(el) - # else: - # print('元素') - # print(el._inner_ele.text) - # l.append(el._inner_ele.text) - s = ''.join(get_node(self)) - return re.sub(r'\n{2,}', '\n', s) + re_str = ''.join(get_node(self)) + re_str = re.sub(r'\n{2,}', '\n', re_str) + re_str = re.sub(r' {2,}', ' ', re_str) + + return format_html(re_str.strip('\n ')) @property def tag(self) -> str: @@ -393,7 +372,8 @@ def execute_session_find(page_or_ele, page_or_ele = page_or_ele.inner_ele else: # 传入的是SessionPage对象 page = page_or_ele - page_or_ele = fromstring(format_html(page_or_ele.response.text, False)) + # page_or_ele = fromstring(format_html(page_or_ele.response.text, False)) + page_or_ele = fromstring(page_or_ele.response.text) try: # 用lxml内置方法获取lxml的元素对象列表 From 5a21481c33e5561d8421aa2c88f78656b71ade16 Mon Sep 17 00:00:00 2001 From: g1879 Date: Fri, 1 Jan 2021 01:01:50 +0800 Subject: [PATCH 19/94] =?UTF-8?q?d=E6=A8=A1=E5=BC=8F=E6=9F=A5=E6=89=BE?= =?UTF-8?q?=E5=85=83=E7=B4=A0timeout=E6=94=AF=E6=8C=810=E7=A7=92?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/driver_element.py | 33 ++++++++++++++++++++++++++------- 1 file changed, 26 insertions(+), 7 deletions(-) diff --git a/DrissionPage/driver_element.py b/DrissionPage/driver_element.py index 79c3e2b..8622bfe 100644 --- a/DrissionPage/driver_element.py +++ b/DrissionPage/driver_element.py @@ -155,7 +155,7 @@ class DriverElement(DrissionElement): :return: DriverElement对象 """ loc = 'xpath', f'.{"/.." * num}' - return self.ele(loc, timeout=0.1) + return self.ele(loc, timeout=0) def nexts(self, num: int = 1, mode: str = 'ele'): """返回后面第num个兄弟元素或节点文本 \n @@ -548,13 +548,15 @@ class DriverElement(DrissionElement): else: raise ValueError(f"Argument direction can only be 'next' or 'prev', not '{direction}'.") + timeout = 0 if direction == 'prev' else .5 + # 获取节点 - ele_or_node = self.ele(f'xpath:./{direction_txt}-sibling::{node_txt}[{num}]', timeout=0.1) + ele_or_node = self.ele(f'xpath:./{direction_txt}-sibling::{node_txt}[{num}]', timeout=timeout) # 跳过元素间的换行符 while ele_or_node == '\n': num += 1 - ele_or_node = self.ele(f'xpath:./{direction_txt}-sibling::{node_txt}[{num}]', timeout=0.1) + ele_or_node = self.ele(f'xpath:./{direction_txt}-sibling::{node_txt}[{num}]', timeout=timeout) return ele_or_node @@ -590,13 +592,30 @@ def execute_driver_find(page_or_ele, wait = page.wait if loc[0] == 'xpath': - return wait.until(ElementsByXpath(page, loc[1], mode, timeout)) + if timeout: + return wait.until(ElementsByXpath(page, loc[1], mode, timeout)) + else: + return ElementsByXpath(page, loc[1], mode, timeout)(driver) else: if mode == 'single': - return DriverElement(wait.until(ec.presence_of_element_located(loc)), page) + if timeout: + return DriverElement(wait.until(ec.presence_of_element_located(loc)), page) + else: + try: + return DriverElement(driver.find_element_by_css_selector(loc[1]), page) + except: + return None elif mode == 'all': - eles = wait.until(ec.presence_of_all_elements_located(loc)) - return [DriverElement(ele, page) for ele in eles] + if timeout: + eles = wait.until(ec.presence_of_all_elements_located(loc)) + return [DriverElement(ele, page) for ele in eles] + else: + try: + eles = driver.find_elements_by_css_selector(loc[1]) + return [DriverElement(ele, page) for ele in eles] + except: + return [] + except TimeoutException: return [] if mode == 'all' else None From adb1952d9fc847b087542e565c22d0d7068b7df5 Mon Sep 17 00:00:00 2001 From: g1879 Date: Fri, 1 Jan 2021 01:03:27 +0800 Subject: [PATCH 20/94] =?UTF-8?q?d=E6=A8=A1=E5=BC=8F=E6=9F=A5=E6=89=BE?= =?UTF-8?q?=E5=85=83=E7=B4=A0timeout=E6=94=AF=E6=8C=810=E7=A7=92?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/driver_element.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/DrissionPage/driver_element.py b/DrissionPage/driver_element.py index 8622bfe..e4999ed 100644 --- a/DrissionPage/driver_element.py +++ b/DrissionPage/driver_element.py @@ -596,7 +596,7 @@ def execute_driver_find(page_or_ele, return wait.until(ElementsByXpath(page, loc[1], mode, timeout)) else: return ElementsByXpath(page, loc[1], mode, timeout)(driver) - else: + else: # 用css获取 if mode == 'single': if timeout: return DriverElement(wait.until(ec.presence_of_element_located(loc)), page) @@ -605,6 +605,7 @@ def execute_driver_find(page_or_ele, return DriverElement(driver.find_element_by_css_selector(loc[1]), page) except: return None + elif mode == 'all': if timeout: eles = wait.until(ec.presence_of_all_elements_located(loc)) @@ -616,7 +617,6 @@ def execute_driver_find(page_or_ele, except: return [] - except TimeoutException: return [] if mode == 'all' else None From 6421b43aa93d7062b3e167c9393888c5ecaf0573 Mon Sep 17 00:00:00 2001 From: g1879 Date: Fri, 1 Jan 2021 01:20:18 +0800 Subject: [PATCH 21/94] =?UTF-8?q?d=E6=A8=A1=E5=BC=8F=E6=9F=A5=E6=89=BE?= =?UTF-8?q?=E5=85=83=E7=B4=A0timeout=E6=94=AF=E6=8C=810=E7=A7=92?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/driver_element.py | 2 +- DrissionPage/driver_page.py | 2 +- DrissionPage/mix_page.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/DrissionPage/driver_element.py b/DrissionPage/driver_element.py index e4999ed..46d1443 100644 --- a/DrissionPage/driver_element.py +++ b/DrissionPage/driver_element.py @@ -585,7 +585,7 @@ def execute_driver_find(page_or_ele, driver = page_or_ele.driver try: - if timeout and timeout != page.timeout: + if timeout is not None and timeout != page.timeout: wait = WebDriverWait(driver, timeout=timeout) else: page.wait._driver = driver diff --git a/DrissionPage/driver_page.py b/DrissionPage/driver_page.py index 874179f..871a900 100644 --- a/DrissionPage/driver_page.py +++ b/DrissionPage/driver_page.py @@ -199,7 +199,7 @@ class DriverPage(object): else: raise ValueError('Argument loc_or_str can only be tuple, str, DriverElement, DriverElement.') - timeout = timeout or self.timeout + timeout = timeout if timeout is not None else self.timeout return execute_driver_find(self, loc_or_ele, mode, timeout) def eles(self, diff --git a/DrissionPage/mix_page.py b/DrissionPage/mix_page.py index 9961281..0336f21 100644 --- a/DrissionPage/mix_page.py +++ b/DrissionPage/mix_page.py @@ -379,7 +379,7 @@ class MixPage(Null, SessionPage, DriverPage): if self._mode == 's': return super().ele(loc_or_ele, mode=mode) elif self._mode == 'd': - timeout = timeout or self.timeout + timeout = timeout if timeout is not None else self.timeout return super(SessionPage, self).ele(loc_or_ele, mode=mode, timeout=timeout) def eles(self, From c262a978f7f3589cf68917e70a0ff4fb53e68533 Mon Sep 17 00:00:00 2001 From: g1879 Date: Fri, 1 Jan 2021 01:21:29 +0800 Subject: [PATCH 22/94] =?UTF-8?q?=E6=9F=A5=E6=89=BE=E5=85=83=E7=B4=A0?= =?UTF-8?q?=E8=AF=AD=E5=8F=A5=E5=A2=9E=E5=8A=A0=E8=B6=85=E7=AE=80=E6=B4=81?= =?UTF-8?q?=E6=96=B9=E5=BC=8F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/common.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/DrissionPage/common.py b/DrissionPage/common.py index f7a4d06..ce90222 100644 --- a/DrissionPage/common.py +++ b/DrissionPage/common.py @@ -113,6 +113,18 @@ def str_to_loc(loc: str) -> tuple: else: loc = loc.replace('#', '@id=', 1) + if loc.startswith(('x:', 'x=')): + loc = f'xpath:{loc[2:]}' + + if loc.startswith(('c:', 'c=')): + loc = f'css:{loc[2:]}' + + if loc.startswith(('t:', 't=')): + loc = f'tag:{loc[2:]}' + + if loc.startswith(('tx:', 'tx=')): + loc = f'text{loc[2:]}' + # 根据属性查找 if loc.startswith('@'): r = re_SPLIT(r'([:=])', loc[1:], maxsplit=1) From c0bfdfa1e90757ffe0f13b6f2e2af26a9cf578d7 Mon Sep 17 00:00:00 2001 From: g1879 Date: Sat, 2 Jan 2021 16:41:08 +0800 Subject: [PATCH 23/94] =?UTF-8?q?=E4=BC=98=E5=8C=96=E9=80=BB=E8=BE=91?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/common.py | 25 +++++++++++----------- DrissionPage/driver_element.py | 39 ++++++++++------------------------ DrissionPage/driver_page.py | 1 - DrissionPage/mix_page.py | 3 +-- 4 files changed, 24 insertions(+), 44 deletions(-) diff --git a/DrissionPage/common.py b/DrissionPage/common.py index ce90222..3c24128 100644 --- a/DrissionPage/common.py +++ b/DrissionPage/common.py @@ -107,22 +107,16 @@ def str_to_loc(loc: str) -> tuple: else: loc = loc.replace('.', '@class=', 1) - if loc.startswith('#'): + elif loc.startswith('#'): if loc.startswith(('#=', '#:',)): loc = loc.replace('#', '@id', 1) else: loc = loc.replace('#', '@id=', 1) - if loc.startswith(('x:', 'x=')): - loc = f'xpath:{loc[2:]}' - - if loc.startswith(('c:', 'c=')): - loc = f'css:{loc[2:]}' - - if loc.startswith(('t:', 't=')): + elif loc.startswith(('t:', 't=')): loc = f'tag:{loc[2:]}' - if loc.startswith(('tx:', 'tx=')): + elif loc.startswith(('tx:', 'tx=')): loc = f'text{loc[2:]}' # 根据属性查找 @@ -135,7 +129,7 @@ def str_to_loc(loc: str) -> tuple: loc_str = f'//*[@{loc[1:]}]' # 根据tag name查找 - elif loc.startswith(('tag=', 'tag:')): + elif loc.startswith(('tag:', 'tag=')): if '@' not in loc[4:]: loc_str = f'//*[name()="{loc[4:]}"]' else: @@ -149,7 +143,7 @@ def str_to_loc(loc: str) -> tuple: loc_str = f'//*[name()="{at_lst[0]}" and @{r[0]}]' # 根据文本查找 - elif loc.startswith(('text=', 'text:')): + elif loc.startswith(('text:', 'text=')): if len(loc) > 5: mode = 'exact' if loc[4] == '=' else 'fuzzy' loc_str = _make_xpath_str('*', 'text()', loc[5:], mode) @@ -157,13 +151,18 @@ def str_to_loc(loc: str) -> tuple: loc_str = '//*[not(text())]' # 用xpath查找 - elif loc.startswith(('xpath=', 'xpath:')): + elif loc.startswith(('xpath:', 'xpath=')): loc_str = loc[6:] + elif loc.startswith(('x:', 'x=')): + loc_str = loc[2:] # 用css selector查找 - elif loc.startswith(('css=', 'css:')): + elif loc.startswith(('css:', 'css=')): loc_by = 'css selector' loc_str = loc[4:] + elif loc.startswith(('c:', 'c=')): + loc_by = 'css selector' + loc_str = loc[2:] # 根据文本模糊查找 else: diff --git a/DrissionPage/driver_element.py b/DrissionPage/driver_element.py index 46d1443..88e6544 100644 --- a/DrissionPage/driver_element.py +++ b/DrissionPage/driver_element.py @@ -584,38 +584,21 @@ def execute_driver_find(page_or_ele, page = page_or_ele driver = page_or_ele.driver + if timeout is not None and timeout != page.timeout: + wait = WebDriverWait(driver, timeout=timeout) + else: + page.wait._driver = driver + wait = page.wait + try: - if timeout is not None and timeout != page.timeout: - wait = WebDriverWait(driver, timeout=timeout) - else: - page.wait._driver = driver - wait = page.wait - if loc[0] == 'xpath': - if timeout: - return wait.until(ElementsByXpath(page, loc[1], mode, timeout)) - else: - return ElementsByXpath(page, loc[1], mode, timeout)(driver) - else: # 用css获取 + return wait.until(ElementsByXpath(page, loc[1], mode, timeout)) + else: # 使用css selector查找 if mode == 'single': - if timeout: - return DriverElement(wait.until(ec.presence_of_element_located(loc)), page) - else: - try: - return DriverElement(driver.find_element_by_css_selector(loc[1]), page) - except: - return None - + return DriverElement(wait.until(ec.presence_of_element_located(loc)), page) elif mode == 'all': - if timeout: - eles = wait.until(ec.presence_of_all_elements_located(loc)) - return [DriverElement(ele, page) for ele in eles] - else: - try: - eles = driver.find_elements_by_css_selector(loc[1]) - return [DriverElement(ele, page) for ele in eles] - except: - return [] + eles = wait.until(ec.presence_of_all_elements_located(loc)) + return [DriverElement(ele, page) for ele in eles] except TimeoutException: return [] if mode == 'all' else None diff --git a/DrissionPage/driver_page.py b/DrissionPage/driver_page.py index 871a900..b37dde9 100644 --- a/DrissionPage/driver_page.py +++ b/DrissionPage/driver_page.py @@ -199,7 +199,6 @@ class DriverPage(object): else: raise ValueError('Argument loc_or_str can only be tuple, str, DriverElement, DriverElement.') - timeout = timeout if timeout is not None else self.timeout return execute_driver_find(self, loc_or_ele, mode, timeout) def eles(self, diff --git a/DrissionPage/mix_page.py b/DrissionPage/mix_page.py index 0336f21..9b84723 100644 --- a/DrissionPage/mix_page.py +++ b/DrissionPage/mix_page.py @@ -74,7 +74,7 @@ class MixPage(Null, SessionPage, DriverPage): loc_or_str: Union[Tuple[str, str], str, DriverElement, SessionElement, WebElement], mode: str = 'single', timeout: float = None): - return self.ele(loc_or_str, mode, timeout or self.timeout) + return self.ele(loc_or_str, mode, timeout) @property def url(self) -> Union[str, None]: @@ -379,7 +379,6 @@ class MixPage(Null, SessionPage, DriverPage): if self._mode == 's': return super().ele(loc_or_ele, mode=mode) elif self._mode == 'd': - timeout = timeout if timeout is not None else self.timeout return super(SessionPage, self).ele(loc_or_ele, mode=mode, timeout=timeout) def eles(self, From 89769f3caa5ad84d8fe221af09ab456d7b303b27 Mon Sep 17 00:00:00 2001 From: g1879 Date: Mon, 4 Jan 2021 00:35:55 +0800 Subject: [PATCH 24/94] =?UTF-8?q?=E7=BB=A7=E7=BB=AD=E7=A0=94=E7=A9=B6text?= =?UTF-8?q?=E5=B1=9E=E6=80=A7=E7=9A=84=E6=A0=BC=E5=BC=8F=EF=BC=8C=E6=9C=AA?= =?UTF-8?q?=E5=AE=8C=E6=88=90?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/common.py | 26 +++++++++++++++++----- DrissionPage/driver_element.py | 34 +++++++++++++++++++++++----- DrissionPage/session_element.py | 39 ++++++++++++++++++++++++--------- 3 files changed, 78 insertions(+), 21 deletions(-) diff --git a/DrissionPage/common.py b/DrissionPage/common.py index 3c24128..d4a7044 100644 --- a/DrissionPage/common.py +++ b/DrissionPage/common.py @@ -5,7 +5,7 @@ @File : common.py """ from abc import abstractmethod -from html import unescape +# from html import unescape from pathlib import Path from re import split as re_SPLIT from shutil import rmtree @@ -96,7 +96,12 @@ def str_to_loc(loc: str) -> tuple: text:search_text - 文本含有search_text的元素 \n text=search_text - 文本等于search_text的元素 \n xpath://div[@class="ele_class"] - 用xpath查找 \n - css:div.ele_class - 用css selector查找 + css:div.ele_class - 用css selector查找 \n + xpath://div[@class="ele_class"] - 等同于 x://div[@class="ele_class"] \n + css:div.ele_class - 等同于 c:div.ele_class \n + tag:div - 等同于 t:div \n + text:search_text - 等同于 tx:search_text \n + text=search_text - 等同于 tx=search_text \n """ loc_by = 'xpath' @@ -195,7 +200,7 @@ def _make_xpath_str(tag: str, arg: str, val: str, mode: str = 'fuzzy') -> str: def _make_search_str(search_str: str) -> str: - """将"转义,不知何故不能直接用\来转义 \n + """将"转义,不知何故不能直接用 \ 来转义 \n :param search_str: 查询字符串 :return: 把"转义后的字符串 """ @@ -212,9 +217,20 @@ def _make_search_str(search_str: str) -> str: return search_str -def format_html(text: str) -> str: +def format_html(text: str, replace_space: bool = True) -> str: """处理html编码字符""" - return unescape(text) if text else text + if not text: + return text + + # text = unescape(text) + + # if '&' in text: + # html = unescape(text) + + if replace_space: + text = text.replace('\xa0', ' ') + + return text def translate_loc(loc: tuple) -> tuple: diff --git a/DrissionPage/driver_element.py b/DrissionPage/driver_element.py index 88e6544..65a3b37 100644 --- a/DrissionPage/driver_element.py +++ b/DrissionPage/driver_element.py @@ -4,6 +4,7 @@ @Contact : g1879@qq.com @File : driver_element.py """ +import re from pathlib import Path from time import sleep from typing import Union, List, Any, Tuple @@ -78,7 +79,13 @@ class DriverElement(DrissionElement): @property def text(self) -> str: """返回元素内所有文本""" - return self.attr('innerText') + # return self.inner_ele.get_attribute('innerText') + re_str = self.inner_ele.get_attribute('innerText') + re_str = re.sub(r'\n{2,}', '\n', re_str) + re_str = re.sub(r' {2,}', ' ', re_str) + + return format_html(re_str.strip('\n ')) + # return re_str.strip('\n ') @property def link(self) -> str: @@ -178,7 +185,10 @@ class DriverElement(DrissionElement): :param attr: 属性名 :return: 属性值文本 """ - attr = 'innerText' if attr == 'text' else attr + # attr = 'innerText' if attr == 'text' else attr + if attr in ('text', 'innerText'): + return self.text + return format_html(self.inner_ele.get_attribute(attr)) def ele(self, @@ -188,7 +198,7 @@ class DriverElement(DrissionElement): """返回当前元素下级符合条件的子元素、属性或节点文本,默认返回第一个 \n 示例: \n - 用loc元组查找: \n - ele.ele((By.CLASS_NAME, 'ele_class')) - 返回第一个class为ele_class的子元素 \n + ele.ele((By.CLASS_NAME, 'ele_class')) - 返回第一个class为ele_class的子元素 \n - 用查询字符串查找: \n 查找方式:属性、tag name和属性、文本、xpath、css selector、id、class \n @表示属性,.表示class,#表示id,=表示精确匹配,:表示模糊匹配,无控制字符串时默认搜索该字符串 \n @@ -209,6 +219,12 @@ class DriverElement(DrissionElement): ele.ele('text=some_text') - 返回第一个文本等于some_text的子元素 \n ele.ele('xpath://div[@class="ele_class"]') - 返回第一个符合xpath的子元素 \n ele.ele('css:div.ele_class') - 返回第一个符合css selector的子元素 \n + - 查询字符串还有最精简模式,用x代替xpath、c代替css、t代替tag、tx代替text: \n + ele.ele('xpath://div[@class="ele_class"]') - 等同于 ele.ele('x://div[@class="ele_class"]') \n + ele.ele('css:div.ele_class') - 等同于 ele.ele('c:div.ele_class') \n + ele.ele('tag:div') - 等同于 ele.ele('t:div') \n + ele.ele('text:some_text') - 等同于 ele.ele('tx:some_text') \n + ele.ele('text=some_text') - 等同于 ele.ele('tx=some_text') :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 :param mode: 'single' 或 'all',对应查找一个或全部 :param timeout: 查找元素超时时间 @@ -244,7 +260,7 @@ class DriverElement(DrissionElement): """返回当前元素下级所有符合条件的子元素、属性或节点文本 \n 示例: \n - 用loc元组查找: \n - ele.eles((By.CLASS_NAME, 'ele_class')) - 返回所有class为ele_class的子元素 \n + ele.eles((By.CLASS_NAME, 'ele_class')) - 返回所有class为ele_class的子元素 \n - 用查询字符串查找: \n 查找方式:属性、tag name和属性、文本、xpath、css selector、id、class \n @表示属性,.表示class,#表示id,=表示精确匹配,:表示模糊匹配,无控制字符串时默认搜索该字符串 \n @@ -265,6 +281,12 @@ class DriverElement(DrissionElement): ele.eles('text=some_text') - 返回所有文本等于some_text的子元素 \n ele.eles('xpath://div[@class="ele_class"]') - 返回所有符合xpath的子元素 \n ele.eles('css:div.ele_class') - 返回所有符合css selector的子元素 \n + - 查询字符串还有最精简模式,用x代替xpath、c代替css、t代替tag、tx代替text: \n + ele.eles('xpath://div[@class="ele_class"]') - 等同于 ele.eles('x://div[@class="ele_class"]') \n + ele.eles('css:div.ele_class') - 等同于 ele.eles('c:div.ele_class') \n + ele.eles('tag:div') - 等同于 ele.eles('t:div') \n + ele.eles('text:some_text') - 等同于 ele.eles('tx:some_text') \n + ele.eles('text=some_text') - 等同于 ele.eles('tx=some_text') :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 :param timeout: 查找元素超时时间 :return: DriverElement对象组成的列表 @@ -574,7 +596,7 @@ def execute_driver_find(page_or_ele, :return: 返回DriverElement元素或它们组成的列表 """ mode = mode or 'single' - if mode not in ['single', 'all']: + if mode not in ('single', 'all'): raise ValueError(f"Argument mode can only be 'single' or 'all', not '{mode}'.") if isinstance(page_or_ele, DrissionElement): @@ -674,7 +696,7 @@ class ElementsByXpath(object): else: driver, the_node = ele_or_driver.parent, ele_or_driver - # 把lxml元素对象包装成DriverElement对象并按需要返回第一个或全部 + # 把lxml元素对象包装成DriverElement对象并按需要返回第一个或全部 if self.mode == 'single': try: e = get_nodes(the_node, xpath_txt=self.xpath, type_txt='9') diff --git a/DrissionPage/session_element.py b/DrissionPage/session_element.py index 7d402ff..04ec7ef 100644 --- a/DrissionPage/session_element.py +++ b/DrissionPage/session_element.py @@ -36,9 +36,8 @@ class SessionElement(DrissionElement): @property def html(self) -> str: """返回元素outerHTML文本""" - # tostring()会把跟紧元素的文本节点也带上,因此要去掉 html = format_html(tostring(self._inner_ele, method="html").decode()) - return format_html(html[:html.rfind('>') + 1]) + return html[:html.rfind('>') + 1] # tostring()会把跟紧元素的文本节点也带上,因此要去掉 @property def inner_html(self) -> str: @@ -50,26 +49,35 @@ class SessionElement(DrissionElement): def text(self) -> str: """返回元素内所有文本""" + # re_str = str(self._inner_ele.text_content()) + # # re_str = re.sub(r'
', '\n', re_str) + # re_str = re.sub(r'\n{2,}', '\n', re_str) + # re_str = re.sub(r' {2,}', ' ', re_str) + # return format_html(re_str.strip('\n ')) + # # return format_html(re_str) + # 为尽量保证与浏览器结果一致,弄得比较复杂 def get_node(ele): str_list = [] for el in ele.eles('xpath:./node()'): if isinstance(el, str): if el.replace(' ', '').replace('\n', '') != '': - str_list.append(el.replace('\xa0', ' ').replace('\n', ' ').strip()) + # str_list.append(el.replace('\xa0', ' ').replace('\n', ' ').strip()) + str_list.append(el.replace('\n', ' ').strip(' ')) elif '\n' in el: str_list.append('\n') else: str_list.append(' ') else: str_list.extend(get_node(el)) + if el.tag in ('br', 'p',): + str_list.append('\n') return str_list re_str = ''.join(get_node(self)) re_str = re.sub(r'\n{2,}', '\n', re_str) re_str = re.sub(r' {2,}', ' ', re_str) - return format_html(re_str.strip('\n ')) @property @@ -176,7 +184,7 @@ class SessionElement(DrissionElement): elif attr == 'src': return self._make_absolute(self.inner_ele.get('src')) - elif attr in ['text', 'innerText']: + elif attr in ('text', 'innerText'): return self.text elif attr == 'outerHTML': @@ -189,10 +197,10 @@ class SessionElement(DrissionElement): return self.inner_ele.get(attr) def ele(self, loc_or_str: Union[Tuple[str, str], str], mode: str = None): - """返回当前元素下级符合条件的子元素、属性或节点文本,默认返回第一个 \n + """返回当前元素下级符合条件的子元素、属性或节点文本,默认返回第一个 \n 示例: \n - 用loc元组查找: \n - ele.ele((By.CLASS_NAME, 'ele_class')) - 返回第一个class为ele_class的子元素 \n + ele.ele((By.CLASS_NAME, 'ele_class')) - 返回第一个class为ele_class的子元素 \n - 用查询字符串查找: \n 查找方式:属性、tag name和属性、文本、xpath、css selector、id、class \n @表示属性,.表示class,#表示id,=表示精确匹配,:表示模糊匹配,无控制字符串时默认搜索该字符串 \n @@ -213,6 +221,12 @@ class SessionElement(DrissionElement): ele.ele('text=some_text') - 返回第一个文本等于some_text的子元素 \n ele.ele('xpath://div[@class="ele_class"]') - 返回第一个符合xpath的子元素 \n ele.ele('css:div.ele_class') - 返回第一个符合css selector的子元素 \n + - 查询字符串还有最精简模式,用x代替xpath、c代替css、t代替tag、tx代替text: \n + ele.ele('xpath://div[@class="ele_class"]') - 等同于 ele.ele('x://div[@class="ele_class"]') \n + ele.ele('css:div.ele_class') - 等同于 ele.ele('c:div.ele_class') \n + ele.ele('tag:div') - 等同于 ele.ele('t:div') \n + ele.ele('text:some_text') - 等同于 ele.ele('tx:some_text') \n + ele.ele('text=some_text') - 等同于 ele.ele('tx=some_text') :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 :param mode: 'single' 或 'all‘,对应查找一个或全部 :return: SessionElement对象 @@ -246,7 +260,7 @@ class SessionElement(DrissionElement): """返回当前元素下级所有符合条件的子元素、属性或节点文本 \n 示例: \n - 用loc元组查找: \n - ele.eles((By.CLASS_NAME, 'ele_class')) - 返回所有class为ele_class的子元素 \n + ele.eles((By.CLASS_NAME, 'ele_class')) - 返回所有class为ele_class的子元素 \n - 用查询字符串查找: \n 查找方式:属性、tag name和属性、文本、xpath、css selector、id、class \n @表示属性,.表示class,#表示id,=表示精确匹配,:表示模糊匹配,无控制字符串时默认搜索该字符串 \n @@ -267,6 +281,12 @@ class SessionElement(DrissionElement): ele.eles('text=some_text') - 返回所有文本等于some_text的子元素 \n ele.eles('xpath://div[@class="ele_class"]') - 返回所有符合xpath的子元素 \n ele.eles('css:div.ele_class') - 返回所有符合css selector的子元素 \n + - 查询字符串还有最精简模式,用x代替xpath、c代替css、t代替tag、tx代替text: \n + ele.eles('xpath://div[@class="ele_class"]') - 等同于 ele.eles('x://div[@class="ele_class"]') \n + ele.eles('css:div.ele_class') - 等同于 ele.eles('c:div.ele_class') \n + ele.eles('tag:div') - 等同于 ele.eles('t:div') \n + ele.eles('text:some_text') - 等同于 ele.eles('tx:some_text') \n + ele.eles('text=some_text') - 等同于 ele.eles('tx=some_text') :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 :return: SessionElement对象组成的列表 """ @@ -363,7 +383,7 @@ def execute_session_find(page_or_ele, :return: 返回SessionElement元素或列表 """ mode = mode or 'single' - if mode not in ['single', 'all']: + if mode not in ('single', 'all'): raise ValueError(f"Argument mode can only be 'single' or 'all', not '{mode}'.") # 根据传入对象类型获取页面对象和lxml元素对象 @@ -372,7 +392,6 @@ def execute_session_find(page_or_ele, page_or_ele = page_or_ele.inner_ele else: # 传入的是SessionPage对象 page = page_or_ele - # page_or_ele = fromstring(format_html(page_or_ele.response.text, False)) page_or_ele = fromstring(page_or_ele.response.text) try: From 11e36d5b0cabd689dc449c6f8de57a6666a08a25 Mon Sep 17 00:00:00 2001 From: g1879 Date: Mon, 4 Jan 2021 00:36:30 +0800 Subject: [PATCH 25/94] =?UTF-8?q?=E5=BE=AE=E8=B0=83?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/driver_page.py | 2 +- DrissionPage/mix_page.py | 18 +++++++++++++++--- DrissionPage/session_page.py | 2 +- 3 files changed, 17 insertions(+), 5 deletions(-) diff --git a/DrissionPage/driver_page.py b/DrissionPage/driver_page.py index b37dde9..b370545 100644 --- a/DrissionPage/driver_page.py +++ b/DrissionPage/driver_page.py @@ -248,7 +248,7 @@ class DriverPage(object): :param timeout: 等待超时时间 :return: 等待是否成功 """ - if mode.lower() not in ['del', 'display', 'hidden']: + if mode.lower() not in ('del', 'display', 'hidden'): raise ValueError('Argument mode can only be "del", "display", "hidden"') from selenium.webdriver.support.wait import WebDriverWait diff --git a/DrissionPage/mix_page.py b/DrissionPage/mix_page.py index 9b84723..488f386 100644 --- a/DrissionPage/mix_page.py +++ b/DrissionPage/mix_page.py @@ -47,7 +47,7 @@ class MixPage(Null, SessionPage, DriverPage): :param session_options: requests设置,没有传入drission参数时会用这个设置新建Drission对象 """ super().__init__() - if drission in ['s', 'd', 'S', 'D']: + if drission in ('s', 'd', 'S', 'D'): mode = drission.lower() drission = None @@ -350,7 +350,7 @@ class MixPage(Null, SessionPage, DriverPage): - 接收到元素对象时: \n 返回元素对象对象 \n - 用loc元组查找: \n - ele.ele((By.CLASS_NAME, 'ele_class')) - 返回第一个class为ele_class的子元素 \n + ele.ele((By.CLASS_NAME, 'ele_class')) - 返回第一个class为ele_class的子元素 \n - 用查询字符串查找: \n 查找方式:属性、tag name和属性、文本、xpath、css selector、id、class \n @表示属性,.表示class,#表示id,=表示精确匹配,:表示模糊匹配,无控制字符串时默认搜索该字符串 \n @@ -371,6 +371,12 @@ class MixPage(Null, SessionPage, DriverPage): page.ele('text=some_text') - 返回第一个文本等于some_text的元素 \n page.ele('xpath://div[@class="ele_class"]') - 返回第一个符合xpath的元素 \n page.ele('css:div.ele_class') - 返回第一个符合css selector的元素 \n + - 查询字符串还有最精简模式,用x代替xpath、c代替css、t代替tag、tx代替text: \n + ele.ele('xpath://div[@class="ele_class"]') - 等同于 ele.ele('x://div[@class="ele_class"]') \n + ele.ele('css:div.ele_class') - 等同于 ele.ele('c:div.ele_class') \n + ele.ele('tag:div') - 等同于 ele.ele('t:div') \n + ele.ele('text:some_text') - 等同于 ele.ele('tx:some_text') \n + ele.ele('text=some_text') - 等同于 ele.ele('tx=some_text') :param loc_or_ele: 元素的定位信息,可以是元素对象,loc元组,或查询字符串 :param mode: 'single' 或 'all‘,对应查找一个或全部 :param timeout: 查找元素超时时间,d模式专用 @@ -387,7 +393,7 @@ class MixPage(Null, SessionPage, DriverPage): """返回页面中所有符合条件的元素、属性或节点文本 \n 示例: \n - 用loc元组查找: \n - page.eles((By.CLASS_NAME, 'ele_class')) - 返回所有class为ele_class的元素 \n + page.eles((By.CLASS_NAME, 'ele_class')) - 返回所有class为ele_class的元素 \n - 用查询字符串查找: \n 查找方式:属性、tag name和属性、文本、xpath、css selector、id、class \n @表示属性,.表示class,#表示id,=表示精确匹配,:表示模糊匹配,无控制字符串时默认搜索该字符串 \n @@ -408,6 +414,12 @@ class MixPage(Null, SessionPage, DriverPage): page.eles('text=some_text') - 返回所有文本等于some_text的元素 \n page.eles('xpath://div[@class="ele_class"]') - 返回所有符合xpath的元素 \n page.eles('css:div.ele_class') - 返回所有符合css selector的元素 \n + - 查询字符串还有最精简模式,用x代替xpath、c代替css、t代替tag、tx代替text: \n + ele.eles('xpath://div[@class="ele_class"]') - 等同于 ele.eles('x://div[@class="ele_class"]') \n + ele.eles('css:div.ele_class') - 等同于 ele.eles('c:div.ele_class') \n + ele.eles('tag:div') - 等同于 ele.eles('t:div') \n + ele.eles('text:some_text') - 等同于 ele.eles('tx:some_text') \n + ele.eles('text=some_text') - 等同于 ele.eles('tx=some_text') :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 :param timeout: 查找元素超时时间,d模式专用 :return: 元素对象或属性、文本节点文本组成的列表 diff --git a/DrissionPage/session_page.py b/DrissionPage/session_page.py index 640173c..d6ed958 100644 --- a/DrissionPage/session_page.py +++ b/DrissionPage/session_page.py @@ -483,7 +483,7 @@ class SessionPage(object): raise ValueError('url is empty.') return None, 'url is empty.' - if mode not in ['get', 'post']: + if mode not in ('get', 'post'): raise ValueError("Argument mode can only be 'get' or 'post'.") url = quote(url, safe='/:&?=%;#@+') From dfc557b2df8b48b276f10d36ee669dfe44053cbd Mon Sep 17 00:00:00 2001 From: g1879 Date: Mon, 4 Jan 2021 17:19:50 +0800 Subject: [PATCH 26/94] =?UTF-8?q?=E5=AE=8C=E5=96=84format=5Fhtml()?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/common.py | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/DrissionPage/common.py b/DrissionPage/common.py index d4a7044..b70b8e2 100644 --- a/DrissionPage/common.py +++ b/DrissionPage/common.py @@ -5,7 +5,7 @@ @File : common.py """ from abc import abstractmethod -# from html import unescape +from html import unescape from pathlib import Path from re import split as re_SPLIT from shutil import rmtree @@ -217,20 +217,15 @@ def _make_search_str(search_str: str) -> str: return search_str -def format_html(text: str, replace_space: bool = True) -> str: +def format_html(text: str, trans: bool = True) -> str: """处理html编码字符""" if not text: return text - # text = unescape(text) + if trans: + text = unescape(text) - # if '&' in text: - # html = unescape(text) - - if replace_space: - text = text.replace('\xa0', ' ') - - return text + return text.replace('\xa0', ' ') def translate_loc(loc: tuple) -> tuple: From 744e09c6492ffc9db43ae735acbe570b86919139 Mon Sep 17 00:00:00 2001 From: g1879 Date: Mon, 4 Jan 2021 17:23:13 +0800 Subject: [PATCH 27/94] =?UTF-8?q?=E7=BB=A7=E7=BB=AD=E5=AE=8C=E5=96=84text?= =?UTF-8?q?=E5=B1=9E=E6=80=A7=E8=BF=94=E5=9B=9E=E5=86=85=E5=AE=B9=EF=BC=9B?= =?UTF-8?q?=E5=A2=9E=E5=8A=A0comments=E5=B1=9E=E6=80=A7=EF=BC=9B=E5=AE=8C?= =?UTF-8?q?=E5=96=84=E5=AF=B9=E5=85=83=E7=B4=A0=E5=86=85=E6=9C=89=E6=B3=A8?= =?UTF-8?q?=E9=87=8A=E6=97=B6=E8=8A=82=E7=82=B9=E7=9A=84=E8=8E=B7=E5=8F=96?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/driver_element.py | 25 ++++++----- DrissionPage/session_element.py | 73 ++++++++++++++++----------------- 2 files changed, 51 insertions(+), 47 deletions(-) diff --git a/DrissionPage/driver_element.py b/DrissionPage/driver_element.py index 65a3b37..01cc60e 100644 --- a/DrissionPage/driver_element.py +++ b/DrissionPage/driver_element.py @@ -4,7 +4,6 @@ @Contact : g1879@qq.com @File : driver_element.py """ -import re from pathlib import Path from time import sleep from typing import Union, List, Any, Tuple @@ -79,13 +78,13 @@ class DriverElement(DrissionElement): @property def text(self) -> str: """返回元素内所有文本""" + return format_html(self.inner_ele.get_attribute('innerText'), False) # return self.inner_ele.get_attribute('innerText') - re_str = self.inner_ele.get_attribute('innerText') - re_str = re.sub(r'\n{2,}', '\n', re_str) - re_str = re.sub(r' {2,}', ' ', re_str) - - return format_html(re_str.strip('\n ')) - # return re_str.strip('\n ') + # re_str = self.inner_ele.get_attribute('innerText') + # re_str = re.sub(r'\n{2,}', '\n', re_str) + # re_str = re.sub(r' {2,}', ' ', re_str) + # + # return format_html(re_str.strip('\n ')) @property def link(self) -> str: @@ -116,6 +115,10 @@ class DriverElement(DrissionElement): """返回前一个兄弟元素""" return self._get_brother(1, 'ele', 'prev') + @property + def comments(self): + return self.eles('xpath:.//comment()') + # -----------------driver独占属性------------------- @property def size(self) -> dict: @@ -152,9 +155,9 @@ class DriverElement(DrissionElement): :return: 文本列表 """ if text_node_only: - return self.eles('xpath:./text()') + return self.eles('xpath:/text()') else: - return [x if isinstance(x, str) else x.text for x in self.eles('xpath:./node()')] + return [x if isinstance(x, str) else x.text for x in self.eles('xpath:./text() | *')] def parents(self, num: int = 1): """返回上面第num级父元素 \n @@ -576,7 +579,7 @@ class DriverElement(DrissionElement): ele_or_node = self.ele(f'xpath:./{direction_txt}-sibling::{node_txt}[{num}]', timeout=timeout) # 跳过元素间的换行符 - while ele_or_node == '\n': + while isinstance(ele_or_node, str) and ele_or_node.replace('\n', '').replace('\t', '').replace(' ', '') == '': num += 1 ele_or_node = self.ele(f'xpath:./{direction_txt}-sibling::{node_txt}[{num}]', timeout=timeout) @@ -662,6 +665,7 @@ class ElementsByXpath(object): return_txt = ''' if(e.singleNodeValue.constructor.name=="Text"){return e.singleNodeValue.data;} else if(e.singleNodeValue.constructor.name=="Attr"){return e.singleNodeValue.nodeValue;} + else if(e.singleNodeValue.constructor.name=="Comment"){return e.singleNodeValue.nodeValue;} else{return e.singleNodeValue;} ''' @@ -672,6 +676,7 @@ class ElementsByXpath(object): for(var i = 0; i str: """返回元素内所有文本""" + # 为尽量保证与浏览器结果一致,弄得比较复杂 + def get_node(ele, pre: bool = False): + str_list = [] + if ele.tag == 'pre': + pre = True + for el in ele.eles('xpath:./text() | *'): + if isinstance(el, str): + if el.replace(' ', '').replace('\n', '') != '': + if pre: + str_list.append(el) + else: + str_list.append(el.replace('\n', ' ').strip(' \t')) + + elif '\n' in el and str_list and str_list[-1] != '\n': + str_list.append('\n') + else: + str_list.append(' ') + else: + str_list.extend(get_node(el, pre)) + if el.tag in ('br', 'p',) and str_list and str_list[-1] != '\n': + str_list.append('\n') + + return str_list + + re_str = ''.join(get_node(self)) + re_str = re.sub(r' {2,}', ' ', re_str) + return format_html(re_str, False) + # re_str = str(self._inner_ele.text_content()) # # re_str = re.sub(r'
', '\n', re_str) # re_str = re.sub(r'\n{2,}', '\n', re_str) @@ -56,30 +84,6 @@ class SessionElement(DrissionElement): # return format_html(re_str.strip('\n ')) # # return format_html(re_str) - # 为尽量保证与浏览器结果一致,弄得比较复杂 - def get_node(ele): - str_list = [] - for el in ele.eles('xpath:./node()'): - if isinstance(el, str): - if el.replace(' ', '').replace('\n', '') != '': - # str_list.append(el.replace('\xa0', ' ').replace('\n', ' ').strip()) - str_list.append(el.replace('\n', ' ').strip(' ')) - elif '\n' in el: - str_list.append('\n') - else: - str_list.append(' ') - else: - str_list.extend(get_node(el)) - if el.tag in ('br', 'p',): - str_list.append('\n') - - return str_list - - re_str = ''.join(get_node(self)) - re_str = re.sub(r'\n{2,}', '\n', re_str) - re_str = re.sub(r' {2,}', ' ', re_str) - return format_html(re_str.strip('\n ')) - @property def tag(self) -> str: """返回元素类型""" @@ -120,26 +124,21 @@ class SessionElement(DrissionElement): """返回前一个兄弟元素""" return self._get_brother(1, 'ele', 'prev') + @property + def comments(self): + return self.eles('xpath:.//comment()') + def texts(self, text_node_only: bool = False) -> list: """返回元素内所有直接子节点的文本,包括元素和文本节点 \n :param text_node_only: 是否只返回文本节点 :return: 文本列表 """ if text_node_only: - return self.eles('xpath:/text()') + texts = self.eles('xpath:/text()') else: - texts = [] + texts = [x if isinstance(x, str) else x.text for x in self.eles('xpath:./text() | *')] - for node in self.eles('xpath:/node()'): - if isinstance(node, str): - text = node - else: - text = node.text - - if text: - texts.append(text) - - return texts + return [format_html(x) for x in texts if x and x.replace('\n', '').replace('\t', '').replace(' ', '') != ''] def parents(self, num: int = 1): """返回上面第num级父元素 \n @@ -392,7 +391,7 @@ def execute_session_find(page_or_ele, page_or_ele = page_or_ele.inner_ele else: # 传入的是SessionPage对象 page = page_or_ele - page_or_ele = fromstring(page_or_ele.response.text) + page_or_ele = fromstring(re.sub(r' ?', ' ', page_or_ele.response.text)) try: # 用lxml内置方法获取lxml的元素对象列表 From 0f9f52b1f6b5311aa3de4aabf000a812efdddf49 Mon Sep 17 00:00:00 2001 From: g1879 Date: Mon, 4 Jan 2021 17:23:31 +0800 Subject: [PATCH 28/94] =?UTF-8?q?=E5=A2=9E=E5=8A=A0comments=E5=B1=9E?= =?UTF-8?q?=E6=80=A7?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.en.md | 17 +++++++++-------- README.zh-cn.md | 1 + 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/README.en.md b/README.en.md index 83d9e4d..88c52ae 100644 --- a/README.en.md +++ b/README.en.md @@ -697,6 +697,7 @@ element.html # Return element outerHTML element.inner_html # Return element innerHTML element.tag # Return element tag name element.text # Return element innerText value +element.comments # Returns the list of comments within the element element.link # Returns absolute href or src value of the element. element.texts() # Returns the text of all direct child nodes in the element, including elements and text nodes, you can specify to return only text nodes element.attrs # Return a dictionary of all attributes of the element @@ -1905,15 +1906,17 @@ Return all attributes and values ​​of the element in a dictionary. Returns: dict - - ### text Returns the text inside the element. Returns: str +### comments +Returns the list of comments within the element + +Returns: list ### link @@ -1921,8 +1924,6 @@ Returns absolute href or src value of the element. Returns: str - - ### css_path Returns the absolute path of the element css selector. @@ -2377,15 +2378,17 @@ Returns the names and values of all attributes of the element in dictionary form Returns: dict - - ### text Returns the text within the element, namely innerText. Returns: str +### comments +Returns the list of comments within the element + +Returns: list ### link @@ -2393,8 +2396,6 @@ Returns absolute href or src value of the element. Returns: str - - ### css_path Returns the absolute path of the element css selector. diff --git a/README.zh-cn.md b/README.zh-cn.md index 5959311..0d8af07 100644 --- a/README.zh-cn.md +++ b/README.zh-cn.md @@ -663,6 +663,7 @@ element.html # 返回元素 outerHTML element.inner_html # 返回元素 innerHTML element.tag # 返回元素 tag name element.text # 返回元素 innerText 值 +element.comments # 返回元素内注释列表 element.link # 返回元素 href 或 src 绝对 url element.texts() # 返回元素内所有直接子节点的文本,包括元素和文本节点,可指定只返回文本节点 element.attrs # 返回元素所有属性的字典 From e03a67c3a5dc0429c82ea2d1ad5d523c599bb529 Mon Sep 17 00:00:00 2001 From: g1879 Date: Mon, 4 Jan 2021 17:42:45 +0800 Subject: [PATCH 29/94] =?UTF-8?q?=E5=AE=8C=E5=96=84texts()=E6=96=B9?= =?UTF-8?q?=E6=B3=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/driver_element.py | 6 ++++-- DrissionPage/session_element.py | 12 +++++++++--- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/DrissionPage/driver_element.py b/DrissionPage/driver_element.py index 01cc60e..e8376c0 100644 --- a/DrissionPage/driver_element.py +++ b/DrissionPage/driver_element.py @@ -155,9 +155,11 @@ class DriverElement(DrissionElement): :return: 文本列表 """ if text_node_only: - return self.eles('xpath:/text()') + texts = self.eles('xpath:/text()') else: - return [x if isinstance(x, str) else x.text for x in self.eles('xpath:./text() | *')] + texts = [x if isinstance(x, str) else x.text for x in self.eles('xpath:./text() | *')] + + return [x.strip(' ') for x in texts if x and x.replace('\n', '').replace('\t', '').replace(' ', '') != ''] def parents(self, num: int = 1): """返回上面第num级父元素 \n diff --git a/DrissionPage/session_element.py b/DrissionPage/session_element.py index 498c7d3..c76d9e3 100644 --- a/DrissionPage/session_element.py +++ b/DrissionPage/session_element.py @@ -54,7 +54,12 @@ class SessionElement(DrissionElement): str_list = [] if ele.tag == 'pre': pre = True + + current_tag = None for el in ele.eles('xpath:./text() | *'): + if current_tag in ('br', 'p') and str_list and str_list[-1] != '\n': + str_list.append('\n') + if isinstance(el, str): if el.replace(' ', '').replace('\n', '') != '': if pre: @@ -66,10 +71,10 @@ class SessionElement(DrissionElement): str_list.append('\n') else: str_list.append(' ') + current_tag = None else: str_list.extend(get_node(el, pre)) - if el.tag in ('br', 'p',) and str_list and str_list[-1] != '\n': - str_list.append('\n') + current_tag = el.tag return str_list @@ -138,7 +143,8 @@ class SessionElement(DrissionElement): else: texts = [x if isinstance(x, str) else x.text for x in self.eles('xpath:./text() | *')] - return [format_html(x) for x in texts if x and x.replace('\n', '').replace('\t', '').replace(' ', '') != ''] + return [format_html(x.strip(' ')) for x in texts if + x and x.replace('\n', '').replace('\t', '').replace(' ', '') != ''] def parents(self, num: int = 1): """返回上面第num级父元素 \n From f2ca4fad464283356a7302bc714023c2be214f3f Mon Sep 17 00:00:00 2001 From: g1879 Date: Tue, 5 Jan 2021 21:29:00 +0800 Subject: [PATCH 30/94] =?UTF-8?q?=E4=BC=98=E5=8C=96download()=EF=BC=8C?= =?UTF-8?q?=E6=88=90=E5=8A=9F=E6=97=B6=E8=BF=94=E5=9B=9E=E5=80=BC=E7=AC=AC?= =?UTF-8?q?=E4=BA=8C=E4=BD=8D=E6=98=AF=E5=B7=B2=E4=B8=8B=E8=BD=BD=E6=96=87?= =?UTF-8?q?=E4=BB=B6=E7=9A=84=E7=BB=9D=E5=AF=B9=E8=B7=AF=E5=BE=84?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/session_page.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/DrissionPage/session_page.py b/DrissionPage/session_page.py index d6ed958..47904f5 100644 --- a/DrissionPage/session_page.py +++ b/DrissionPage/session_page.py @@ -315,6 +315,12 @@ class SessionPage(object): :return: 下载是否成功(bool)和状态信息(成功时信息为文件路径)的元组 """ # 生成的response不写入self._response,是临时的 + if file_exists == 'skip' and Path(f'{goal_path}\\{rename}').exists(): + if show_msg: + print(f'{file_url}\n{goal_path}\\{rename}\nSkipped.\n') + + return False, 'Skipped because a file with the same name already exists.' + kwargs['stream'] = True if 'timeout' not in kwargs: @@ -383,9 +389,8 @@ class SessionPage(object): goal_path += goal_Path.drive if key == 0 and goal_Path.drive else re_SUB(r'[*:|<>?"]', '', i).strip() goal_path += '\\' if i != '\\' and key < len(goal_Path.parts) - 1 else '' - goal_Path = Path(goal_path) + goal_Path = Path(goal_path).absolute() goal_Path.mkdir(parents=True, exist_ok=True) - goal_path = goal_Path.absolute() full_path = Path(f'{goal_path}\\{full_name}') if full_path.exists(): @@ -448,7 +453,7 @@ class SessionPage(object): download_status, info = False, 'File size is 0.' else: - download_status, info = True, 'Success.' + download_status, info = True, str(full_path) finally: # 删除下载出错文件 From 25c59cf950473318f7e3444495d8910e65b1bdaf Mon Sep 17 00:00:00 2001 From: g1879 Date: Wed, 6 Jan 2021 17:45:10 +0800 Subject: [PATCH 31/94] =?UTF-8?q?=E9=81=BF=E5=85=8Ds=E6=A8=A1=E5=BC=8F?= =?UTF-8?q?=E8=AE=BE=E7=BD=AEcookie=E6=97=B6sameSite=E5=AD=97=E6=AE=B5?= =?UTF-8?q?=E5=AF=BC=E8=87=B4=E7=9A=84=E9=94=99=E8=AF=AF?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/drission.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/DrissionPage/drission.py b/DrissionPage/drission.py index 65bbb30..df28659 100644 --- a/DrissionPage/drission.py +++ b/DrissionPage/drission.py @@ -4,7 +4,6 @@ @Contact : g1879@qq.com @File : drission.py """ -from sys import exit from typing import Union from requests import Session @@ -13,6 +12,7 @@ from selenium import webdriver from selenium.common.exceptions import SessionNotCreatedException, WebDriverException from selenium.webdriver.chrome.options import Options from selenium.webdriver.chrome.webdriver import WebDriver +from sys import exit from tldextract import extract from .config import (_dict_to_chrome_options, _session_options_to_dict, @@ -194,7 +194,8 @@ class Drission(object): # 添加cookie到session if set_session: - kwargs = {x: cookie[x] for x in cookie if x not in ('name', 'value', 'httpOnly', 'expiry')} + kwargs = {x: cookie[x] for x in cookie + if x.lower() not in ('name', 'value', 'httponly', 'expiry', 'samesite')} if 'expiry' in cookie: kwargs['expires'] = cookie['expiry'] From b8a82d6bfc9d1b7359ae9885ed316ce0382d8d13 Mon Sep 17 00:00:00 2001 From: g1879 Date: Wed, 6 Jan 2021 17:47:07 +0800 Subject: [PATCH 32/94] =?UTF-8?q?=E9=81=BF=E5=85=8Dcookie=E4=B8=AD?= =?UTF-8?q?=E6=9F=90=E4=B8=AA=E5=80=BC=E6=98=AF=E7=A9=BA=E6=97=B6=E5=87=BA?= =?UTF-8?q?=E7=8E=B0=E7=9A=84=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/config.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/DrissionPage/config.py b/DrissionPage/config.py index a3e5253..d158d45 100644 --- a/DrissionPage/config.py +++ b/DrissionPage/config.py @@ -739,10 +739,11 @@ def _cookie_to_dict(cookie: Union[Cookie, str, dict]) -> dict: attr_val = attr.lstrip().split('=') if key == 0: + # TODO: 检查 cookie_dict['name'] = attr_val[0] - cookie_dict['value'] = attr_val[1] + cookie_dict['value'] = attr_val[1] if len(attr_val) == 2 else '' else: - cookie_dict[attr_val[0]] = attr_val[1] + cookie_dict[attr_val[0]] = attr_val[1] if len(attr_val) == 2 else '' return cookie_dict From 1649d8a3b61c92a8e04837bd3c662f8c95b867f5 Mon Sep 17 00:00:00 2001 From: g1879 Date: Thu, 7 Jan 2021 09:42:03 +0800 Subject: [PATCH 33/94] =?UTF-8?q?=E4=BC=98=E5=8C=96=5Ftry=5Fto=5Fconnect()?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/driver_page.py | 23 ++++++++++++++++------- DrissionPage/session_page.py | 10 ++++------ 2 files changed, 20 insertions(+), 13 deletions(-) diff --git a/DrissionPage/driver_page.py b/DrissionPage/driver_page.py index b370545..bb26380 100644 --- a/DrissionPage/driver_page.py +++ b/DrissionPage/driver_page.py @@ -6,7 +6,6 @@ """ from glob import glob from pathlib import Path -from time import time, sleep from typing import Union, List, Any, Tuple from urllib.parse import quote @@ -14,6 +13,7 @@ from selenium.common.exceptions import NoAlertPresentException from selenium.webdriver.chrome.webdriver import WebDriver from selenium.webdriver.remote.webelement import WebElement from selenium.webdriver.support.wait import WebDriverWait +from time import time, sleep from .common import str_to_loc, get_available_file_name, translate_loc, format_html from .driver_element import DriverElement, execute_driver_find @@ -100,14 +100,23 @@ class DriverPage(object): :param show_errmsg: 是否抛出异常 :return: 是否成功 """ - self.driver.get(to_url) - is_ok = self.check_page() - while times and is_ok is False: + def goto() -> bool: + try: + self.driver.get(to_url) + return True + except: + return False + + is_ok = self.check_page() if goto() else False + + for _ in range(times): + if is_ok is not False: + break + sleep(interval) - self.driver.get(to_url) - is_ok = self.check_page() - times -= 1 + print(f'重试 {to_url}') + is_ok = self.check_page() if goto() else False if is_ok is False and show_errmsg: raise ConnectionError('Connect error.') diff --git a/DrissionPage/session_page.py b/DrissionPage/session_page.py index 47904f5..c8b83ea 100644 --- a/DrissionPage/session_page.py +++ b/DrissionPage/session_page.py @@ -10,11 +10,11 @@ from pathlib import Path from random import randint from re import search as re_SEARCH from re import sub as re_SUB -from time import time, sleep from typing import Union, List, Tuple from urllib.parse import urlparse, quote, unquote from requests import Session, Response +from time import time, sleep from tldextract import extract from .common import str_to_loc, translate_loc, get_available_file_name, format_html @@ -199,15 +199,13 @@ class SessionPage(object): """ r = self._make_response(to_url, mode=mode, show_errmsg=show_errmsg, **kwargs)[0] - while times and (not r or r.content == b''): - if r is not None and r.status_code in (403, 404): + for _ in range(times): + if (r and r.content != b'') or (r is not None and r.status_code in (403, 404)): break - print('重试', to_url) + print(f'重试 {to_url}') sleep(interval) - r = self._make_response(to_url, mode=mode, show_errmsg=show_errmsg, **kwargs)[0] - times -= 1 return r From 4d1b8e5109377782a1488ef69ccca3107de18a3f Mon Sep 17 00:00:00 2001 From: g1879 Date: Fri, 8 Jan 2021 08:54:03 +0800 Subject: [PATCH 34/94] =?UTF-8?q?=E8=A7=A3=E5=86=B3d=E6=A8=A1=E5=BC=8F?= =?UTF-8?q?=E4=B8=8Bxpath=E4=B8=AD=E5=B8=A6=E6=9C=89=E5=9B=9E=E8=BD=A6?= =?UTF-8?q?=E7=AD=89=E7=89=B9=E6=AE=8A=E5=AD=97=E7=AC=A6=E6=97=B6=E5=87=BA?= =?UTF-8?q?=E9=94=99=E7=9A=84=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/driver_element.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/DrissionPage/driver_element.py b/DrissionPage/driver_element.py index e8376c0..266b635 100644 --- a/DrissionPage/driver_element.py +++ b/DrissionPage/driver_element.py @@ -5,7 +5,6 @@ @File : driver_element.py """ from pathlib import Path -from time import sleep from typing import Union, List, Any, Tuple from selenium.common.exceptions import TimeoutException, JavascriptException, InvalidElementStateException @@ -13,6 +12,7 @@ from selenium.webdriver.chrome.webdriver import WebDriver from selenium.webdriver.remote.webelement import WebElement from selenium.webdriver.support import expected_conditions as ec from selenium.webdriver.support.wait import WebDriverWait +from time import sleep from .common import DrissionElement, str_to_loc, get_available_file_name, translate_loc, format_html @@ -541,7 +541,6 @@ class DriverElement(DrissionElement): sib = sib.previousSibling; } ''' + txt4 + ''' - el = el.parentNode; } ''' + txt5 + ''' @@ -611,6 +610,7 @@ def execute_driver_find(page_or_ele, page = page_or_ele driver = page_or_ele.driver + # 设置等待对象 if timeout is not None and timeout != page.timeout: wait = WebDriverWait(driver, timeout=timeout) else: @@ -618,9 +618,12 @@ def execute_driver_find(page_or_ele, wait = page.wait try: + # 使用xpath查找 if loc[0] == 'xpath': return wait.until(ElementsByXpath(page, loc[1], mode, timeout)) - else: # 使用css selector查找 + + # 使用css selector查找 + else: if mode == 'single': return DriverElement(wait.until(ec.presence_of_element_located(loc)), page) elif mode == 'all': @@ -692,11 +695,11 @@ class ElementsByXpath(object): return_txt = 'return e.singleNodeValue;' js = """ - var e=document.evaluate('""" + xpath_txt + """', """ + node_txt + """, null, """ + type_txt + """,null); + var e=document.evaluate(arguments[1], """ + node_txt + """, null, """ + type_txt + """,null); """ + for_txt + """ """ + return_txt + """ """ - return driver.execute_script(js, node) + return driver.execute_script(js, node, xpath_txt) if isinstance(ele_or_driver, WebDriver): driver, the_node = ele_or_driver, 'document' From f4e80fdd9230f1da5275d7cd8678167393f697a2 Mon Sep 17 00:00:00 2001 From: g1879 Date: Fri, 8 Jan 2021 10:55:53 +0800 Subject: [PATCH 35/94] =?UTF-8?q?=E5=AE=8C=E5=96=84=5Ftry=5Fto=5Fconnect()?= =?UTF-8?q?=E9=80=BB=E8=BE=91?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/driver_page.py | 21 +++++++++++++-------- DrissionPage/session_page.py | 25 +++++++++++++++++++------ 2 files changed, 32 insertions(+), 14 deletions(-) diff --git a/DrissionPage/driver_page.py b/DrissionPage/driver_page.py index bb26380..f5832a6 100644 --- a/DrissionPage/driver_page.py +++ b/DrissionPage/driver_page.py @@ -100,26 +100,31 @@ class DriverPage(object): :param show_errmsg: 是否抛出异常 :return: 是否成功 """ + err = None - def goto() -> bool: + def go() -> bool: + nonlocal err try: self.driver.get(to_url) return True - except: + except Exception as e: + err = e return False - is_ok = self.check_page() if goto() else False + is_ok = False + + for _ in range(times + 1): + is_ok = self.check_page() if go() else False - for _ in range(times): if is_ok is not False: break - sleep(interval) - print(f'重试 {to_url}') - is_ok = self.check_page() if goto() else False + if _ < times: + sleep(interval) + print(f'重试 {to_url}') if is_ok is False and show_errmsg: - raise ConnectionError('Connect error.') + raise err if err is not None else ConnectionError('Connect error.') return is_ok diff --git a/DrissionPage/session_page.py b/DrissionPage/session_page.py index c8b83ea..bee8db4 100644 --- a/DrissionPage/session_page.py +++ b/DrissionPage/session_page.py @@ -197,15 +197,28 @@ class SessionPage(object): :param kwargs: 连接参数 :return: HTMLResponse对象 """ - r = self._make_response(to_url, mode=mode, show_errmsg=show_errmsg, **kwargs)[0] + err = None + r = None - for _ in range(times): - if (r and r.content != b'') or (r is not None and r.status_code in (403, 404)): + def go() -> Union[Response, None]: + nonlocal err + try: + return self._make_response(to_url, mode=mode, show_errmsg=True, **kwargs)[0] + except Exception as e: + err = e + return None + + for _ in range(times + 1): + r = go() + if r and (r.content != b'' or r.status_code in (403, 404)): break - print(f'重试 {to_url}') - sleep(interval) - r = self._make_response(to_url, mode=mode, show_errmsg=show_errmsg, **kwargs)[0] + if _ < times: + sleep(interval) + print(f'重试 {to_url}') + + if not r and show_errmsg: + raise err if err is not None else ConnectionError('Connect error.') return r From 94ab5975a990f38cb01a6f79deaf30563b00d582 Mon Sep 17 00:00:00 2001 From: g1879 Date: Fri, 8 Jan 2021 11:17:39 +0800 Subject: [PATCH 36/94] =?UTF-8?q?=E5=AE=8C=E5=96=84=5Ftry=5Fto=5Fconnect()?= =?UTF-8?q?=E9=80=BB=E8=BE=91?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/driver_page.py | 19 ++++++++----------- DrissionPage/session_page.py | 9 +++------ 2 files changed, 11 insertions(+), 17 deletions(-) diff --git a/DrissionPage/driver_page.py b/DrissionPage/driver_page.py index f5832a6..cb64f10 100644 --- a/DrissionPage/driver_page.py +++ b/DrissionPage/driver_page.py @@ -101,20 +101,17 @@ class DriverPage(object): :return: 是否成功 """ err = None - - def go() -> bool: - nonlocal err - try: - self.driver.get(to_url) - return True - except Exception as e: - err = e - return False - is_ok = False for _ in range(times + 1): - is_ok = self.check_page() if go() else False + try: + self.driver.get(to_url) + go_ok = True + except Exception as e: + err = e + go_ok = False + + is_ok = self.check_page() if go_ok else False if is_ok is not False: break diff --git a/DrissionPage/session_page.py b/DrissionPage/session_page.py index bee8db4..920c237 100644 --- a/DrissionPage/session_page.py +++ b/DrissionPage/session_page.py @@ -200,16 +200,13 @@ class SessionPage(object): err = None r = None - def go() -> Union[Response, None]: - nonlocal err + for _ in range(times + 1): try: - return self._make_response(to_url, mode=mode, show_errmsg=True, **kwargs)[0] + r = self._make_response(to_url, mode=mode, show_errmsg=True, **kwargs)[0] except Exception as e: err = e - return None + r = None - for _ in range(times + 1): - r = go() if r and (r.content != b'' or r.status_code in (403, 404)): break From 78554f8145f4338325279e46b97258d383f99db3 Mon Sep 17 00:00:00 2001 From: g1879 Date: Fri, 8 Jan 2021 12:48:38 +0800 Subject: [PATCH 37/94] =?UTF-8?q?=E5=85=83=E7=B4=A0=E5=AF=B9=E8=B1=A1?= =?UTF-8?q?=E6=B7=BB=E5=8A=A0row=5Ftext=E5=B1=9E=E6=80=A7?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/driver_element.py | 19 ++++++++++++------- DrissionPage/session_element.py | 10 ++++------ 2 files changed, 16 insertions(+), 13 deletions(-) diff --git a/DrissionPage/driver_element.py b/DrissionPage/driver_element.py index 266b635..9a66a5c 100644 --- a/DrissionPage/driver_element.py +++ b/DrissionPage/driver_element.py @@ -4,6 +4,7 @@ @Contact : g1879@qq.com @File : driver_element.py """ +import re from pathlib import Path from typing import Union, List, Any, Tuple @@ -78,13 +79,17 @@ class DriverElement(DrissionElement): @property def text(self) -> str: """返回元素内所有文本""" - return format_html(self.inner_ele.get_attribute('innerText'), False) - # return self.inner_ele.get_attribute('innerText') - # re_str = self.inner_ele.get_attribute('innerText') - # re_str = re.sub(r'\n{2,}', '\n', re_str) - # re_str = re.sub(r' {2,}', ' ', re_str) - # - # return format_html(re_str.strip('\n ')) + # return format_html(self.inner_ele.get_attribute('innerText'), False) + re_str = self.inner_ele.get_attribute('innerText') + re_str = re.sub(r'\n{2,}', '\n', re_str) + re_str = re.sub(r' {2,}', ' ', re_str) + + return format_html(re_str.strip('\n '), False) + + @property + def row_text(self) -> str: + """返回未格式化处理的元素内文本""" + return self.inner_ele.get_attribute('innerText') @property def link(self) -> str: diff --git a/DrissionPage/session_element.py b/DrissionPage/session_element.py index c76d9e3..5df2769 100644 --- a/DrissionPage/session_element.py +++ b/DrissionPage/session_element.py @@ -82,12 +82,10 @@ class SessionElement(DrissionElement): re_str = re.sub(r' {2,}', ' ', re_str) return format_html(re_str, False) - # re_str = str(self._inner_ele.text_content()) - # # re_str = re.sub(r'
', '\n', re_str) - # re_str = re.sub(r'\n{2,}', '\n', re_str) - # re_str = re.sub(r' {2,}', ' ', re_str) - # return format_html(re_str.strip('\n ')) - # # return format_html(re_str) + @property + def row_text(self) -> str: + """返回未格式化处理的元素内文本""" + return str(self._inner_ele.text_content()) @property def tag(self) -> str: From d2df6b041edd7e707cc93bffb011bee816907d58 Mon Sep 17 00:00:00 2001 From: g1879 Date: Sat, 9 Jan 2021 09:38:15 +0800 Subject: [PATCH 38/94] =?UTF-8?q?=E6=94=B9=E9=94=99=E5=88=AB=E5=AD=97?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/driver_element.py | 2 +- DrissionPage/session_element.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/DrissionPage/driver_element.py b/DrissionPage/driver_element.py index 9a66a5c..be5e2a2 100644 --- a/DrissionPage/driver_element.py +++ b/DrissionPage/driver_element.py @@ -87,7 +87,7 @@ class DriverElement(DrissionElement): return format_html(re_str.strip('\n '), False) @property - def row_text(self) -> str: + def raw_text(self) -> str: """返回未格式化处理的元素内文本""" return self.inner_ele.get_attribute('innerText') diff --git a/DrissionPage/session_element.py b/DrissionPage/session_element.py index 5df2769..beb6019 100644 --- a/DrissionPage/session_element.py +++ b/DrissionPage/session_element.py @@ -83,7 +83,7 @@ class SessionElement(DrissionElement): return format_html(re_str, False) @property - def row_text(self) -> str: + def raw_text(self) -> str: """返回未格式化处理的元素内文本""" return str(self._inner_ele.text_content()) From db70acfe28a8d6909bacd28debc1ec738f5ed047 Mon Sep 17 00:00:00 2001 From: g1879 Date: Sat, 9 Jan 2021 16:21:45 +0800 Subject: [PATCH 39/94] =?UTF-8?q?=E8=87=AA=E5=8A=A8=E4=B8=8B=E8=BD=BDchrom?= =?UTF-8?q?edriver=E6=97=B6=E6=98=BE=E7=A4=BA=E6=8F=90=E7=A4=BA=E4=BF=A1?= =?UTF-8?q?=E6=81=AF?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/drission.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/DrissionPage/drission.py b/DrissionPage/drission.py index df28659..f6c6c57 100644 --- a/DrissionPage/drission.py +++ b/DrissionPage/drission.py @@ -107,11 +107,13 @@ class Drission(object): from .easy_set import get_match_driver chrome_path = self._driver_options.get('binary_location', None) or None + print('自动下载chromedriver...') driver_path = get_match_driver(chrome_path=chrome_path, check_version=False, show_msg=False) if driver_path: try: self._driver = webdriver.Chrome(driver_path, options=options) + print('下载完成。') except: print('无法启动,请检查chromedriver版本与Chrome是否匹配,并手动设置。') exit(0) From 2b5a3ca73d194950b4cb591fde41f95ee3923922 Mon Sep 17 00:00:00 2001 From: g1879 Date: Sat, 9 Jan 2021 16:23:56 +0800 Subject: [PATCH 40/94] =?UTF-8?q?=E7=94=A8=E6=96=87=E6=9C=AC=E8=8E=B7?= =?UTF-8?q?=E5=8F=96=E5=85=83=E7=B4=A0=E6=97=B6=E6=94=AF=E6=8C=81=E4=BB=BB?= =?UTF-8?q?=E6=84=8F=E6=96=87=E6=9C=AC=E8=8A=82=E7=82=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/common.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/DrissionPage/common.py b/DrissionPage/common.py index b70b8e2..9037640 100644 --- a/DrissionPage/common.py +++ b/DrissionPage/common.py @@ -142,7 +142,7 @@ def str_to_loc(loc: str) -> tuple: r = re_SPLIT(r'([:=])', at_lst[1], maxsplit=1) if len(r) == 3: mode = 'exact' if r[1] == '=' else 'fuzzy' - arg_str = r[0] if r[0] == 'text()' else f'@{r[0]}' + arg_str = 'text()' if r[0] in ('text', 'tx') else f'@{r[0]}' loc_str = _make_xpath_str(at_lst[0], arg_str, r[2], mode) else: loc_str = f'//*[name()="{at_lst[0]}" and @{r[0]}]' @@ -193,7 +193,11 @@ def _make_xpath_str(tag: str, arg: str, val: str, mode: str = 'fuzzy') -> str: return f'//*[{tag_name}{arg}={_make_search_str(val)}]' elif mode == 'fuzzy': - return f"//*[{tag_name}contains({arg},{_make_search_str(val)})]" + if arg == 'text()': + tag_name = '' if tag == '*' else f'{tag}/' + return f'//{tag_name}text()[contains(., {_make_search_str(val)})]/..' + else: + return f"//*[{tag_name}contains({arg},{_make_search_str(val)})]" else: raise ValueError("Argument mode can only be 'exact' or 'fuzzy'.") From cb8a7e5c113097d9fcd43e48ae9487596a3b8e22 Mon Sep 17 00:00:00 2001 From: g1879 Date: Sat, 9 Jan 2021 22:07:19 +0800 Subject: [PATCH 41/94] =?UTF-8?q?=E5=AE=9A=E4=BD=8D=E8=AF=AD=E5=8F=A5?= =?UTF-8?q?=E4=B8=AD=EF=BC=8C=E5=9C=A8tag:=E6=A8=A1=E5=BC=8F=E4=B8=8B?= =?UTF-8?q?=E6=9F=A5=E8=AF=A2=E6=96=87=E6=9C=AC=E6=97=B6=EF=BC=8C=E7=94=A8?= =?UTF-8?q?text()=E5=92=8Ctx()=E4=BB=A3=E6=9B=BFtext=E5=92=8Ctx=EF=BC=8C?= =?UTF-8?q?=E9=81=BF=E5=85=8D=E5=85=83=E7=B4=A0=E4=B8=AD=E6=9C=89text?= =?UTF-8?q?=E6=88=96tx=E5=B1=9E=E6=80=A7=E6=97=B6=E6=97=A0=E6=B3=95?= =?UTF-8?q?=E5=AE=9A=E4=BD=8D?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DrissionPage/common.py b/DrissionPage/common.py index 9037640..11e6d9d 100644 --- a/DrissionPage/common.py +++ b/DrissionPage/common.py @@ -142,7 +142,7 @@ def str_to_loc(loc: str) -> tuple: r = re_SPLIT(r'([:=])', at_lst[1], maxsplit=1) if len(r) == 3: mode = 'exact' if r[1] == '=' else 'fuzzy' - arg_str = 'text()' if r[0] in ('text', 'tx') else f'@{r[0]}' + arg_str = 'text()' if r[0] in ('text()', 'tx()') else f'@{r[0]}' loc_str = _make_xpath_str(at_lst[0], arg_str, r[2], mode) else: loc_str = f'//*[name()="{at_lst[0]}" and @{r[0]}]' From ab372b0322209ebd046b2ae3c2bbfa131f6e772c Mon Sep 17 00:00:00 2001 From: g1879 Date: Sun, 10 Jan 2021 17:48:05 +0800 Subject: [PATCH 42/94] =?UTF-8?q?=E4=BF=AE=E6=94=B9=E6=B3=A8=E9=87=8A?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/driver_element.py | 106 ++++++++++++++------------- DrissionPage/mix_page.py | 126 ++++++++++++++++---------------- DrissionPage/session_element.py | 108 +++++++++++++-------------- 3 files changed, 173 insertions(+), 167 deletions(-) diff --git a/DrissionPage/driver_element.py b/DrissionPage/driver_element.py index be5e2a2..1f8f712 100644 --- a/DrissionPage/driver_element.py +++ b/DrissionPage/driver_element.py @@ -210,31 +210,32 @@ class DriverElement(DrissionElement): - 用loc元组查找: \n ele.ele((By.CLASS_NAME, 'ele_class')) - 返回第一个class为ele_class的子元素 \n - 用查询字符串查找: \n - 查找方式:属性、tag name和属性、文本、xpath、css selector、id、class \n - @表示属性,.表示class,#表示id,=表示精确匹配,:表示模糊匹配,无控制字符串时默认搜索该字符串 \n + 查找方式:属性、tag name和属性、文本、xpath、css selector、id、class \n + @表示属性,.表示class,#表示id,=表示精确匹配,:表示模糊匹配,无控制字符串时默认搜索该字符串 \n ele.ele('.ele_class') - 返回第一个 class 为 ele_class 的子元素 \n ele.ele('.:ele_class') - 返回第一个 class 中含有 ele_class 的子元素 \n - ele.ele('#ele_id') - 返回第一个 id 为 ele_id 的子元素 \n + ele.ele('#ele_id') - 返回第一个 id 为 ele_id 的子元素 \n ele.ele('#:ele_id') - 返回第一个 id 中含有 ele_id 的子元素 \n - ele.ele('@class:ele_class') - 返回第一个class含有ele_class的子元素 \n - ele.ele('@name=ele_name') - 返回第一个name等于ele_name的子元素 \n - ele.ele('@placeholder') - 返回第一个带placeholder属性的子元素 \n - ele.ele('tag:p') - 返回第一个

子元素 \n - ele.ele('tag:div@class:ele_class') - 返回第一个class含有ele_class的div子元素 \n - ele.ele('tag:div@class=ele_class') - 返回第一个class等于ele_class的div子元素 \n - ele.ele('tag:div@text():some_text') - 返回第一个文本含有some_text的div子元素 \n - ele.ele('tag:div@text()=some_text') - 返回第一个文本等于some_text的div子元素 \n - ele.ele('text:some_text') - 返回第一个文本含有some_text的子元素 \n - ele.ele('some_text') - 返回第一个文本含有some_text的子元素(等价于上一行) \n - ele.ele('text=some_text') - 返回第一个文本等于some_text的子元素 \n - ele.ele('xpath://div[@class="ele_class"]') - 返回第一个符合xpath的子元素 \n - ele.ele('css:div.ele_class') - 返回第一个符合css selector的子元素 \n - - 查询字符串还有最精简模式,用x代替xpath、c代替css、t代替tag、tx代替text: \n - ele.ele('xpath://div[@class="ele_class"]') - 等同于 ele.ele('x://div[@class="ele_class"]') \n - ele.ele('css:div.ele_class') - 等同于 ele.ele('c:div.ele_class') \n - ele.ele('tag:div') - 等同于 ele.ele('t:div') \n - ele.ele('text:some_text') - 等同于 ele.ele('tx:some_text') \n - ele.ele('text=some_text') - 等同于 ele.ele('tx=some_text') + ele.ele('@class:ele_class') - 返回第一个class含有ele_class的子元素 \n + ele.ele('@name=ele_name') - 返回第一个name等于ele_name的子元素 \n + ele.ele('@placeholder') - 返回第一个带placeholder属性的子元素 \n + ele.ele('tag:p') - 返回第一个

子元素 \n + ele.ele('tag:div@class:ele_class') - 返回第一个class含有ele_class的div子元素 \n + ele.ele('tag:div@class=ele_class') - 返回第一个class等于ele_class的div子元素 \n + ele.ele('tag:div@text():some_text') - 返回第一个文本含有some_text的div子元素 \n + ele.ele('tag:div@text()=some_text') - 返回第一个文本等于some_text的div子元素 \n + ele.ele('text:some_text') - 返回第一个文本含有some_text的子元素 \n + ele.ele('some_text') - 返回第一个文本含有some_text的子元素(等价于上一行) \n + ele.ele('text=some_text') - 返回第一个文本等于some_text的子元素 \n + ele.ele('xpath://div[@class="ele_class"]') - 返回第一个符合xpath的子元素 \n + ele.ele('css:div.ele_class') - 返回第一个符合css selector的子元素 \n + - 查询字符串还有最精简模式,用x代替xpath、c代替css、t代替tag、tx代替text: \n + ele.ele('x://div[@class="ele_class"]') - 等同于 ele.ele('xpath://div[@class="ele_class"]') \n + ele.ele('c:div.ele_class') - 等同于 ele.ele('css:div.ele_class') \n + ele.ele('t:div') - 等同于 ele.ele('tag:div') \n + ele.ele('t:div@tx()=some_text') - 等同于 ele.ele('tag:div@text()=some_text') \n + ele.ele('tx:some_text') - 等同于 ele.ele('text:some_text') \n + ele.ele('tx=some_text') - 等同于 ele.ele('text=some_text') :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 :param mode: 'single' 或 'all',对应查找一个或全部 :param timeout: 查找元素超时时间 @@ -267,36 +268,37 @@ class DriverElement(DrissionElement): def eles(self, loc_or_str: Union[Tuple[str, str], str], timeout: float = None): - """返回当前元素下级所有符合条件的子元素、属性或节点文本 \n - 示例: \n - - 用loc元组查找: \n - ele.eles((By.CLASS_NAME, 'ele_class')) - 返回所有class为ele_class的子元素 \n - - 用查询字符串查找: \n - 查找方式:属性、tag name和属性、文本、xpath、css selector、id、class \n - @表示属性,.表示class,#表示id,=表示精确匹配,:表示模糊匹配,无控制字符串时默认搜索该字符串 \n - ele.eles('.ele_class') - 返回所有 class 为 ele_class 的子元素 \n - ele.eles('.:ele_class') - 返回所有 class 中含有 ele_class 的子元素 \n - ele.eles('#ele_id') - 返回所有 id 为 ele_id 的子元素 \n - ele.eles('#:ele_id') - 返回所有 id 中含有 ele_id 的子元素 \n - ele.eles('@class:ele_class') - 返回所有class含有ele_class的子元素 \n - ele.eles('@name=ele_name') - 返回所有name等于ele_name的子元素 \n - ele.eles('@placeholder') - 返回所有带placeholder属性的子元素 \n - ele.eles('tag:p') - 返回所有

子元素 \n - ele.eles('tag:div@class:ele_class') - 返回所有class含有ele_class的div子元素 \n - ele.eles('tag:div@class=ele_class') - 返回所有class等于ele_class的div子元素 \n - ele.eles('tag:div@text():some_text') - 返回所有文本含有some_text的div子元素 \n - ele.eles('tag:div@text()=some_text') - 返回所有文本等于some_text的div子元素 \n - ele.eles('text:some_text') - 返回所有文本含有some_text的子元素 \n - ele.eles('some_text') - 返回所有文本含有some_text的子元素(等价于上一行) \n - ele.eles('text=some_text') - 返回所有文本等于some_text的子元素 \n - ele.eles('xpath://div[@class="ele_class"]') - 返回所有符合xpath的子元素 \n - ele.eles('css:div.ele_class') - 返回所有符合css selector的子元素 \n - - 查询字符串还有最精简模式,用x代替xpath、c代替css、t代替tag、tx代替text: \n - ele.eles('xpath://div[@class="ele_class"]') - 等同于 ele.eles('x://div[@class="ele_class"]') \n - ele.eles('css:div.ele_class') - 等同于 ele.eles('c:div.ele_class') \n - ele.eles('tag:div') - 等同于 ele.eles('t:div') \n - ele.eles('text:some_text') - 等同于 ele.eles('tx:some_text') \n - ele.eles('text=some_text') - 等同于 ele.eles('tx=some_text') + """返回当前元素下级所有符合条件的子元素、属性或节点文本 \n + 示例: \n + - 用loc元组查找: \n + ele.eles((By.CLASS_NAME, 'ele_class')) - 返回所有class为ele_class的子元素 \n + - 用查询字符串查找: \n + 查找方式:属性、tag name和属性、文本、xpath、css selector、id、class \n + @表示属性,.表示class,#表示id,=表示精确匹配,:表示模糊匹配,无控制字符串时默认搜索该字符串 \n + ele.eles('.ele_class') - 返回所有 class 为 ele_class 的子元素 \n + ele.eles('.:ele_class') - 返回所有 class 中含有 ele_class 的子元素 \n + ele.eles('#ele_id') - 返回所有 id 为 ele_id 的子元素 \n + ele.eles('#:ele_id') - 返回所有 id 中含有 ele_id 的子元素 \n + ele.eles('@class:ele_class') - 返回所有class含有ele_class的子元素 \n + ele.eles('@name=ele_name') - 返回所有name等于ele_name的子元素 \n + ele.eles('@placeholder') - 返回所有带placeholder属性的子元素 \n + ele.eles('tag:p') - 返回所有

子元素 \n + ele.eles('tag:div@class:ele_class') - 返回所有class含有ele_class的div子元素 \n + ele.eles('tag:div@class=ele_class') - 返回所有class等于ele_class的div子元素 \n + ele.eles('tag:div@text():some_text') - 返回所有文本含有some_text的div子元素 \n + ele.eles('tag:div@text()=some_text') - 返回所有文本等于some_text的div子元素 \n + ele.eles('text:some_text') - 返回所有文本含有some_text的子元素 \n + ele.eles('some_text') - 返回所有文本含有some_text的子元素(等价于上一行) \n + ele.eles('text=some_text') - 返回所有文本等于some_text的子元素 \n + ele.eles('xpath://div[@class="ele_class"]') - 返回所有符合xpath的子元素 \n + ele.eles('css:div.ele_class') - 返回所有符合css selector的子元素 \n + - 查询字符串还有最精简模式,用x代替xpath、c代替css、t代替tag、tx代替text: \n + ele.eles('x://div[@class="ele_class"]') - 等同于 ele.eles('xpath://div[@class="ele_class"]') \n + ele.eles('c:div.ele_class') - 等同于 ele.eles('css:div.ele_class') \n + ele.eles('t:div') - 等同于 ele.eles('tag:div') \n + ele.eles('t:div@tx()=some_text') - 等同于 ele.eles('tag:div@text()=some_text') \n + ele.eles('tx:some_text') - 等同于 ele.eles('text:some_text') \n + ele.eles('tx=some_text') - 等同于 ele.eles('text=some_text') :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 :param timeout: 查找元素超时时间 :return: DriverElement对象组成的列表 diff --git a/DrissionPage/mix_page.py b/DrissionPage/mix_page.py index 488f386..1d653f4 100644 --- a/DrissionPage/mix_page.py +++ b/DrissionPage/mix_page.py @@ -345,38 +345,39 @@ class MixPage(Null, SessionPage, DriverPage): mode: str = None, timeout: float = None) \ -> Union[DriverElement, SessionElement, str, List[SessionElement], List[DriverElement]]: - """返回页面中符合条件的元素、属性或节点文本,默认返回第一个 \n - 示例: \n - - 接收到元素对象时: \n - 返回元素对象对象 \n - - 用loc元组查找: \n - ele.ele((By.CLASS_NAME, 'ele_class')) - 返回第一个class为ele_class的子元素 \n - - 用查询字符串查找: \n - 查找方式:属性、tag name和属性、文本、xpath、css selector、id、class \n - @表示属性,.表示class,#表示id,=表示精确匹配,:表示模糊匹配,无控制字符串时默认搜索该字符串 \n - page.ele('.ele_class') - 返回第一个 class 为 ele_class 的元素 \n - page.ele('.:ele_class') - 返回第一个 class 中含有 ele_class 的元素 \n - page.ele('#ele_id') - 返回第一个 id 为 ele_id 的元素 \n - page.ele('#:ele_id') - 返回第一个 id 中含有 ele_id 的元素 \n - page.ele('@class:ele_class') - 返回第一个class含有ele_class的元素 \n - page.ele('@name=ele_name') - 返回第一个name等于ele_name的元素 \n - page.ele('@placeholder') - 返回第一个带placeholder属性的元素 \n - page.ele('tag:p') - 返回第一个

元素 \n - page.ele('tag:div@class:ele_class') - 返回第一个class含有ele_class的div元素 \n - page.ele('tag:div@class=ele_class') - 返回第一个class等于ele_class的div元素 \n - page.ele('tag:div@text():some_text') - 返回第一个文本含有some_text的div元素 \n - page.ele('tag:div@text()=some_text') - 返回第一个文本等于some_text的div元素 \n - page.ele('text:some_text') - 返回第一个文本含有some_text的元素 \n - page.ele('some_text') - 返回第一个文本含有some_text的元素(等价于上一行) \n - page.ele('text=some_text') - 返回第一个文本等于some_text的元素 \n - page.ele('xpath://div[@class="ele_class"]') - 返回第一个符合xpath的元素 \n - page.ele('css:div.ele_class') - 返回第一个符合css selector的元素 \n - - 查询字符串还有最精简模式,用x代替xpath、c代替css、t代替tag、tx代替text: \n - ele.ele('xpath://div[@class="ele_class"]') - 等同于 ele.ele('x://div[@class="ele_class"]') \n - ele.ele('css:div.ele_class') - 等同于 ele.ele('c:div.ele_class') \n - ele.ele('tag:div') - 等同于 ele.ele('t:div') \n - ele.ele('text:some_text') - 等同于 ele.ele('tx:some_text') \n - ele.ele('text=some_text') - 等同于 ele.ele('tx=some_text') + """返回页面中符合条件的元素、属性或节点文本,默认返回第一个 \n + 示例: \n + - 接收到元素对象时: \n + 返回元素对象对象 \n + - 用loc元组查找: \n + ele.ele((By.CLASS_NAME, 'ele_class')) - 返回第一个class为ele_class的子元素 \n + - 用查询字符串查找: \n + 查找方式:属性、tag name和属性、文本、xpath、css selector、id、class \n + @表示属性,.表示class,#表示id,=表示精确匹配,:表示模糊匹配,无控制字符串时默认搜索该字符串 \n + page.ele('.ele_class') - 返回第一个 class 为 ele_class 的元素 \n + page.ele('.:ele_class') - 返回第一个 class 中含有 ele_class 的元素 \n + page.ele('#ele_id') - 返回第一个 id 为 ele_id 的元素 \n + page.ele('#:ele_id') - 返回第一个 id 中含有 ele_id 的元素 \n + page.ele('@class:ele_class') - 返回第一个class含有ele_class的元素 \n + page.ele('@name=ele_name') - 返回第一个name等于ele_name的元素 \n + page.ele('@placeholder') - 返回第一个带placeholder属性的元素 \n + page.ele('tag:p') - 返回第一个

元素 \n + page.ele('tag:div@class:ele_class') - 返回第一个class含有ele_class的div元素 \n + page.ele('tag:div@class=ele_class') - 返回第一个class等于ele_class的div元素 \n + page.ele('tag:div@text():some_text') - 返回第一个文本含有some_text的div元素 \n + page.ele('tag:div@text()=some_text') - 返回第一个文本等于some_text的div元素 \n + page.ele('text:some_text') - 返回第一个文本含有some_text的元素 \n + page.ele('some_text') - 返回第一个文本含有some_text的元素(等价于上一行) \n + page.ele('text=some_text') - 返回第一个文本等于some_text的元素 \n + page.ele('xpath://div[@class="ele_class"]') - 返回第一个符合xpath的元素 \n + page.ele('css:div.ele_class') - 返回第一个符合css selector的元素 \n + - 查询字符串还有最精简模式,用x代替xpath、c代替css、t代替tag、tx代替text: \n + page.ele('x://div[@class="ele_class"]') - 等同于 page.ele('xpath://div[@class="ele_class"]') \n + page.ele('c:div.ele_class') - 等同于 page.ele('css:div.ele_class') \n + page.ele('t:div') - 等同于 page.ele('tag:div') \n + page.ele('t:div@tx()=some_text') - 等同于 page.ele('tag:div@text()=some_text') \n + page.ele('tx:some_text') - 等同于 page.ele('text:some_text') \n + page.ele('tx=some_text') - 等同于 page.ele('text=some_text') :param loc_or_ele: 元素的定位信息,可以是元素对象,loc元组,或查询字符串 :param mode: 'single' 或 'all‘,对应查找一个或全部 :param timeout: 查找元素超时时间,d模式专用 @@ -390,36 +391,37 @@ class MixPage(Null, SessionPage, DriverPage): def eles(self, loc_or_str: Union[Tuple[str, str], str], timeout: float = None) -> Union[List[DriverElement], List[SessionElement]]: - """返回页面中所有符合条件的元素、属性或节点文本 \n - 示例: \n - - 用loc元组查找: \n - page.eles((By.CLASS_NAME, 'ele_class')) - 返回所有class为ele_class的元素 \n - - 用查询字符串查找: \n - 查找方式:属性、tag name和属性、文本、xpath、css selector、id、class \n - @表示属性,.表示class,#表示id,=表示精确匹配,:表示模糊匹配,无控制字符串时默认搜索该字符串 \n - page.eles('.ele_class') - 返回所有 class 为 ele_class 的元素 \n - page.eles('.:ele_class') - 返回所有 class 中含有 ele_class 的元素 \n - page.eles('#ele_id') - 返回所有 id 为 ele_id 的元素 \n - page.eles('#:ele_id') - 返回所有 id 中含有 ele_id 的元素 \n - page.eles('@class:ele_class') - 返回所有class含有ele_class的元素 \n - page.eles('@name=ele_name') - 返回所有name等于ele_name的元素 \n - page.eles('@placeholder') - 返回所有带placeholder属性的元素 \n - page.eles('tag:p') - 返回所有

元素 \n - page.eles('tag:div@class:ele_class') - 返回所有class含有ele_class的div元素 \n - page.eles('tag:div@class=ele_class') - 返回所有class等于ele_class的div元素 \n - page.eles('tag:div@text():some_text') - 返回所有文本含有some_text的div元素 \n - page.eles('tag:div@text()=some_text') - 返回所有文本等于some_text的div元素 \n - page.eles('text:some_text') - 返回所有文本含有some_text的元素 \n - page.eles('some_text') - 返回所有文本含有some_text的元素(等价于上一行) \n - page.eles('text=some_text') - 返回所有文本等于some_text的元素 \n - page.eles('xpath://div[@class="ele_class"]') - 返回所有符合xpath的元素 \n - page.eles('css:div.ele_class') - 返回所有符合css selector的元素 \n - - 查询字符串还有最精简模式,用x代替xpath、c代替css、t代替tag、tx代替text: \n - ele.eles('xpath://div[@class="ele_class"]') - 等同于 ele.eles('x://div[@class="ele_class"]') \n - ele.eles('css:div.ele_class') - 等同于 ele.eles('c:div.ele_class') \n - ele.eles('tag:div') - 等同于 ele.eles('t:div') \n - ele.eles('text:some_text') - 等同于 ele.eles('tx:some_text') \n - ele.eles('text=some_text') - 等同于 ele.eles('tx=some_text') + """返回页面中所有符合条件的元素、属性或节点文本 \n + 示例: \n + - 用loc元组查找: \n + page.eles((By.CLASS_NAME, 'ele_class')) - 返回所有class为ele_class的元素 \n + - 用查询字符串查找: \n + 查找方式:属性、tag name和属性、文本、xpath、css selector、id、class \n + @表示属性,.表示class,#表示id,=表示精确匹配,:表示模糊匹配,无控制字符串时默认搜索该字符串 \n + page.eles('.ele_class') - 返回所有 class 为 ele_class 的元素 \n + page.eles('.:ele_class') - 返回所有 class 中含有 ele_class 的元素 \n + page.eles('#ele_id') - 返回所有 id 为 ele_id 的元素 \n + page.eles('#:ele_id') - 返回所有 id 中含有 ele_id 的元素 \n + page.eles('@class:ele_class') - 返回所有class含有ele_class的元素 \n + page.eles('@name=ele_name') - 返回所有name等于ele_name的元素 \n + page.eles('@placeholder') - 返回所有带placeholder属性的元素 \n + page.eles('tag:p') - 返回所有

元素 \n + page.eles('tag:div@class:ele_class') - 返回所有class含有ele_class的div元素 \n + page.eles('tag:div@class=ele_class') - 返回所有class等于ele_class的div元素 \n + page.eles('tag:div@text():some_text') - 返回所有文本含有some_text的div元素 \n + page.eles('tag:div@text()=some_text') - 返回所有文本等于some_text的div元素 \n + page.eles('text:some_text') - 返回所有文本含有some_text的元素 \n + page.eles('some_text') - 返回所有文本含有some_text的元素(等价于上一行) \n + page.eles('text=some_text') - 返回所有文本等于some_text的元素 \n + page.eles('xpath://div[@class="ele_class"]') - 返回所有符合xpath的元素 \n + page.eles('css:div.ele_class') - 返回所有符合css selector的元素 \n + - 查询字符串还有最精简模式,用x代替xpath、c代替css、t代替tag、tx代替text: \n + page.eles('x://div[@class="ele_class"]') - 等同于 page.eles('xpath://div[@class="ele_class"]') \n + page.eles('c:div.ele_class') - 等同于 page.eles('css:div.ele_class') \n + page.eles('t:div') - 等同于 page.eles('tag:div') \n + page.eles('t:div@tx()=some_text') - 等同于 page.eles('tag:div@text()=some_text') \n + page.eles('tx:some_text') - 等同于 page.eles('text:some_text') \n + page.eles('tx=some_text') - 等同于 page.eles('text=some_text') :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 :param timeout: 查找元素超时时间,d模式专用 :return: 元素对象或属性、文本节点文本组成的列表 diff --git a/DrissionPage/session_element.py b/DrissionPage/session_element.py index beb6019..0bcf345 100644 --- a/DrissionPage/session_element.py +++ b/DrissionPage/session_element.py @@ -201,35 +201,36 @@ class SessionElement(DrissionElement): def ele(self, loc_or_str: Union[Tuple[str, str], str], mode: str = None): """返回当前元素下级符合条件的子元素、属性或节点文本,默认返回第一个 \n - 示例: \n + 示例: \n - 用loc元组查找: \n ele.ele((By.CLASS_NAME, 'ele_class')) - 返回第一个class为ele_class的子元素 \n - 用查询字符串查找: \n - 查找方式:属性、tag name和属性、文本、xpath、css selector、id、class \n - @表示属性,.表示class,#表示id,=表示精确匹配,:表示模糊匹配,无控制字符串时默认搜索该字符串 \n + 查找方式:属性、tag name和属性、文本、xpath、css selector、id、class \n + @表示属性,.表示class,#表示id,=表示精确匹配,:表示模糊匹配,无控制字符串时默认搜索该字符串 \n ele.ele('.ele_class') - 返回第一个 class 为 ele_class 的子元素 \n ele.ele('.:ele_class') - 返回第一个 class 中含有 ele_class 的子元素 \n - ele.ele('#ele_id') - 返回第一个 id 为 ele_id 的子元素 \n + ele.ele('#ele_id') - 返回第一个 id 为 ele_id 的子元素 \n ele.ele('#:ele_id') - 返回第一个 id 中含有 ele_id 的子元素 \n - ele.ele('@class:ele_class') - 返回第一个class含有ele_class的子元素 \n - ele.ele('@name=ele_name') - 返回第一个name等于ele_name的子元素 \n - ele.ele('@placeholder') - 返回第一个带placeholder属性的子元素 \n - ele.ele('tag:p') - 返回第一个

子元素 \n - ele.ele('tag:div@class:ele_class') - 返回第一个class含有ele_class的div子元素 \n - ele.ele('tag:div@class=ele_class') - 返回第一个class等于ele_class的div子元素 \n - ele.ele('tag:div@text():some_text') - 返回第一个文本含有some_text的div子元素 \n - ele.ele('tag:div@text()=some_text') - 返回第一个文本等于some_text的div子元素 \n - ele.ele('text:some_text') - 返回第一个文本含有some_text的子元素 \n - ele.ele('some_text') - 返回第一个文本含有some_text的子元素(等价于上一行) \n - ele.ele('text=some_text') - 返回第一个文本等于some_text的子元素 \n - ele.ele('xpath://div[@class="ele_class"]') - 返回第一个符合xpath的子元素 \n - ele.ele('css:div.ele_class') - 返回第一个符合css selector的子元素 \n - - 查询字符串还有最精简模式,用x代替xpath、c代替css、t代替tag、tx代替text: \n - ele.ele('xpath://div[@class="ele_class"]') - 等同于 ele.ele('x://div[@class="ele_class"]') \n - ele.ele('css:div.ele_class') - 等同于 ele.ele('c:div.ele_class') \n - ele.ele('tag:div') - 等同于 ele.ele('t:div') \n - ele.ele('text:some_text') - 等同于 ele.ele('tx:some_text') \n - ele.ele('text=some_text') - 等同于 ele.ele('tx=some_text') + ele.ele('@class:ele_class') - 返回第一个class含有ele_class的子元素 \n + ele.ele('@name=ele_name') - 返回第一个name等于ele_name的子元素 \n + ele.ele('@placeholder') - 返回第一个带placeholder属性的子元素 \n + ele.ele('tag:p') - 返回第一个

子元素 \n + ele.ele('tag:div@class:ele_class') - 返回第一个class含有ele_class的div子元素 \n + ele.ele('tag:div@class=ele_class') - 返回第一个class等于ele_class的div子元素 \n + ele.ele('tag:div@text():some_text') - 返回第一个文本含有some_text的div子元素 \n + ele.ele('tag:div@text()=some_text') - 返回第一个文本等于some_text的div子元素 \n + ele.ele('text:some_text') - 返回第一个文本含有some_text的子元素 \n + ele.ele('some_text') - 返回第一个文本含有some_text的子元素(等价于上一行) \n + ele.ele('text=some_text') - 返回第一个文本等于some_text的子元素 \n + ele.ele('xpath://div[@class="ele_class"]') - 返回第一个符合xpath的子元素 \n + ele.ele('css:div.ele_class') - 返回第一个符合css selector的子元素 \n + - 查询字符串还有最精简模式,用x代替xpath、c代替css、t代替tag、tx代替text: \n + ele.ele('x://div[@class="ele_class"]') - 等同于 ele.ele('xpath://div[@class="ele_class"]') \n + ele.ele('c:div.ele_class') - 等同于 ele.ele('css:div.ele_class') \n + ele.ele('t:div') - 等同于 ele.ele('tag:div') \n + ele.ele('t:div@tx()=some_text') - 等同于 ele.ele('tag:div@text()=some_text') \n + ele.ele('tx:some_text') - 等同于 ele.ele('text:some_text') \n + ele.ele('tx=some_text') - 等同于 ele.ele('text=some_text') :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 :param mode: 'single' 或 'all‘,对应查找一个或全部 :return: SessionElement对象 @@ -260,36 +261,37 @@ class SessionElement(DrissionElement): return execute_session_find(element, loc_or_str, mode) def eles(self, loc_or_str: Union[Tuple[str, str], str]): - """返回当前元素下级所有符合条件的子元素、属性或节点文本 \n - 示例: \n - - 用loc元组查找: \n - ele.eles((By.CLASS_NAME, 'ele_class')) - 返回所有class为ele_class的子元素 \n - - 用查询字符串查找: \n - 查找方式:属性、tag name和属性、文本、xpath、css selector、id、class \n - @表示属性,.表示class,#表示id,=表示精确匹配,:表示模糊匹配,无控制字符串时默认搜索该字符串 \n - ele.eles('.ele_class') - 返回所有 class 为 ele_class 的子元素 \n - ele.eles('.:ele_class') - 返回所有 class 中含有 ele_class 的子元素 \n - ele.eles('#ele_id') - 返回所有 id 为 ele_id 的子元素 \n - ele.eles('#:ele_id') - 返回所有 id 中含有 ele_id 的子元素 \n - ele.eles('@class:ele_class') - 返回所有class含有ele_class的子元素 \n - ele.eles('@name=ele_name') - 返回所有name等于ele_name的子元素 \n - ele.eles('@placeholder') - 返回所有带placeholder属性的子元素 \n - ele.eles('tag:p') - 返回所有

子元素 \n - ele.eles('tag:div@class:ele_class') - 返回所有class含有ele_class的div子元素 \n - ele.eles('tag:div@class=ele_class') - 返回所有class等于ele_class的div子元素 \n - ele.eles('tag:div@text():some_text') - 返回所有文本含有some_text的div子元素 \n - ele.eles('tag:div@text()=some_text') - 返回所有文本等于some_text的div子元素 \n - ele.eles('text:some_text') - 返回所有文本含有some_text的子元素 \n - ele.eles('some_text') - 返回所有文本含有some_text的子元素(等价于上一行) \n - ele.eles('text=some_text') - 返回所有文本等于some_text的子元素 \n - ele.eles('xpath://div[@class="ele_class"]') - 返回所有符合xpath的子元素 \n - ele.eles('css:div.ele_class') - 返回所有符合css selector的子元素 \n - - 查询字符串还有最精简模式,用x代替xpath、c代替css、t代替tag、tx代替text: \n - ele.eles('xpath://div[@class="ele_class"]') - 等同于 ele.eles('x://div[@class="ele_class"]') \n - ele.eles('css:div.ele_class') - 等同于 ele.eles('c:div.ele_class') \n - ele.eles('tag:div') - 等同于 ele.eles('t:div') \n - ele.eles('text:some_text') - 等同于 ele.eles('tx:some_text') \n - ele.eles('text=some_text') - 等同于 ele.eles('tx=some_text') + """返回当前元素下级所有符合条件的子元素、属性或节点文本 \n + 示例: \n + - 用loc元组查找: \n + ele.eles((By.CLASS_NAME, 'ele_class')) - 返回所有class为ele_class的子元素 \n + - 用查询字符串查找: \n + 查找方式:属性、tag name和属性、文本、xpath、css selector、id、class \n + @表示属性,.表示class,#表示id,=表示精确匹配,:表示模糊匹配,无控制字符串时默认搜索该字符串 \n + ele.eles('.ele_class') - 返回所有 class 为 ele_class 的子元素 \n + ele.eles('.:ele_class') - 返回所有 class 中含有 ele_class 的子元素 \n + ele.eles('#ele_id') - 返回所有 id 为 ele_id 的子元素 \n + ele.eles('#:ele_id') - 返回所有 id 中含有 ele_id 的子元素 \n + ele.eles('@class:ele_class') - 返回所有class含有ele_class的子元素 \n + ele.eles('@name=ele_name') - 返回所有name等于ele_name的子元素 \n + ele.eles('@placeholder') - 返回所有带placeholder属性的子元素 \n + ele.eles('tag:p') - 返回所有

子元素 \n + ele.eles('tag:div@class:ele_class') - 返回所有class含有ele_class的div子元素 \n + ele.eles('tag:div@class=ele_class') - 返回所有class等于ele_class的div子元素 \n + ele.eles('tag:div@text():some_text') - 返回所有文本含有some_text的div子元素 \n + ele.eles('tag:div@text()=some_text') - 返回所有文本等于some_text的div子元素 \n + ele.eles('text:some_text') - 返回所有文本含有some_text的子元素 \n + ele.eles('some_text') - 返回所有文本含有some_text的子元素(等价于上一行) \n + ele.eles('text=some_text') - 返回所有文本等于some_text的子元素 \n + ele.eles('xpath://div[@class="ele_class"]') - 返回所有符合xpath的子元素 \n + ele.eles('css:div.ele_class') - 返回所有符合css selector的子元素 \n + - 查询字符串还有最精简模式,用x代替xpath、c代替css、t代替tag、tx代替text: \n + ele.eles('x://div[@class="ele_class"]') - 等同于 ele.eles('xpath://div[@class="ele_class"]') \n + ele.eles('c:div.ele_class') - 等同于 ele.eles('css:div.ele_class') \n + ele.eles('t:div') - 等同于 ele.eles('tag:div') \n + ele.eles('t:div@tx()=some_text') - 等同于 ele.eles('tag:div@text()=some_text') \n + ele.eles('tx:some_text') - 等同于 ele.eles('text:some_text') \n + ele.eles('tx=some_text') - 等同于 ele.eles('text=some_text') :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 :return: SessionElement对象组成的列表 """ From 671a8f3d5a97f406520814334b26a23363c73af3 Mon Sep 17 00:00:00 2001 From: g1879 Date: Sun, 10 Jan 2021 17:48:53 +0800 Subject: [PATCH 43/94] =?UTF-8?q?shadow=5Froot=5Felement=E7=9A=84ele()?= =?UTF-8?q?=E5=92=8Celes()=E6=94=AF=E6=8C=81=E6=9C=80=E7=AE=80=E6=9F=A5?= =?UTF-8?q?=E8=AF=A2=E5=AD=97=E7=AC=A6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/shadow_root_element.py | 29 +++++++++++++++++++++++------ 1 file changed, 23 insertions(+), 6 deletions(-) diff --git a/DrissionPage/shadow_root_element.py b/DrissionPage/shadow_root_element.py index 92e5ac0..f60cc84 100644 --- a/DrissionPage/shadow_root_element.py +++ b/DrissionPage/shadow_root_element.py @@ -93,6 +93,12 @@ class ShadowRootElement(DrissionElement): ele.ele('some_text') - 返回第一个文本含有some_text的子元素(等价于上一行) \n ele.ele('text=some_text') - 返回第一个文本等于some_text的子元素 \n ele.ele('css:div.ele_class') - 返回第一个符合css selector的子元素 \n + - 查询字符串还有最精简模式,用c代替css、t代替tag、tx代替text: \n + ele.ele('c:div.ele_class') - 等同于 ele.ele('css:div.ele_class') \n + ele.ele('t:div') - 等同于 ele.ele('tag:div') \n + ele.ele('t:div@tx()=some_text') - 等同于 ele.ele('tag:div@txet()=some_text') \n + ele.ele('tx:some_text') - 等同于 ele.ele('text:some_text') \n + ele.ele('tx=some_text') - 等同于 ele.ele('text=some_text') :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 :param mode: 'single' 或 'all',对应查找一个或全部 :param timeout: 查找元素超时时间 @@ -137,6 +143,12 @@ class ShadowRootElement(DrissionElement): ele.eles('some_text') - 返回所有文本含有some_text的子元素(等价于上一行) \n ele.eles('text=some_text') - 返回所有文本等于some_text的子元素 \n ele.eles('css:div.ele_class') - 返回所有符合css selector的子元素 \n + - 查询字符串还有最精简模式,用c代替css、t代替tag、tx代替text: \n + ele.eles('c:div.ele_class') - 等同于 ele.eles('css:div.ele_class') \n + ele.eles('t:div') - 等同于 ele.eles('tag:div') \n + ele.eles('t:div@tx()=some_text') - 等同于 ele.eles('tag:div@txet()=some_text') \n + ele.eles('tx:some_text') - 等同于 ele.eles('text:some_text') \n + ele.eles('tx=some_text') - 等同于 ele.eles('text=some_text') :param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串 :param timeout: 查找元素超时时间 :return: DriverElement对象组成的列表 @@ -235,12 +247,21 @@ def str_to_css_loc(loc: str) -> tuple: else: loc = loc.replace('.', '@class=', 1) - if loc.startswith('#'): + elif loc.startswith('#'): if loc.startswith(('#=', '#:',)): loc = loc.replace('#', '@id', 1) else: loc = loc.replace('#', '@id=', 1) + elif loc.startswith(('t:', 't=')): + loc = f'tag:{loc[2:]}' + + elif loc.startswith(('tx:', 'tx=')): + loc = f'text{loc[2:]}' + + elif loc.startswith(('x:', 'x=', 'xpath:', 'xpath=')): + raise ValueError('不支持xpath') + # 根据属性查找 if loc.startswith('@'): r = re_SPLIT(r'([:=])', loc[1:], maxsplit=1) @@ -261,7 +282,7 @@ def str_to_css_loc(loc: str) -> tuple: r = re_SPLIT(r'([:=])', at_lst[1], maxsplit=1) if len(r) == 3: - if r[0] == 'text()': + if r[0] in ('text()', 'tx()'): match = 'exact' if r[1] == '=' else 'fuzzy' return 'text', r[2], at_lst[0], match mode = '=' if r[1] == '=' else '*=' @@ -273,10 +294,6 @@ def str_to_css_loc(loc: str) -> tuple: elif loc.startswith(('css=', 'css:')): loc_str = loc[4:] - # 用xpath查找 - elif loc.startswith(('xpath=', 'xpath:')): - raise ValueError('不支持xpath') - # 根据文本查找 elif loc.startswith(('text=', 'text:')): match = 'exact' if loc[4] == '=' else 'fuzzy' From e785275cb01ed50fb2f026b988d92be0dfdd6bef Mon Sep 17 00:00:00 2001 From: g1879 Date: Mon, 11 Jan 2021 21:30:52 +0800 Subject: [PATCH 44/94] =?UTF-8?q?DriverElement=E5=A2=9E=E5=8A=A0sr?= =?UTF-8?q?=E5=B1=9E=E6=80=A7?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/driver_element.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/DrissionPage/driver_element.py b/DrissionPage/driver_element.py index 1f8f712..06fb5db 100644 --- a/DrissionPage/driver_element.py +++ b/DrissionPage/driver_element.py @@ -143,6 +143,11 @@ class DriverElement(DrissionElement): from .shadow_root_element import ShadowRootElement return ShadowRootElement(shadow, self) + @property + def sr(self): + """返回当前元素的shadow_root元素对象""" + return self.shadow_root + @property def before(self) -> str: """返回当前元素的::before伪元素内容""" From d6ed90f04e91fa0cd7101353a6e025f3d4fa946f Mon Sep 17 00:00:00 2001 From: g1879 Date: Thu, 14 Jan 2021 15:37:46 +0800 Subject: [PATCH 45/94] 1.8.0 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index ac7b8d7..6cbe69b 100644 --- a/setup.py +++ b/setup.py @@ -8,7 +8,7 @@ with open("README.md", "r", encoding='utf-8') as fh: setup( name="DrissionPage", - version="1.7.12", + version="1.8.0", author="g1879", author_email="g1879@qq.com", description="A module that integrates selenium and requests session, encapsulates common page operations.", From 8f457c71baca9d5c80c1c7380a2aeba26d3c14fa Mon Sep 17 00:00:00 2001 From: g1879 Date: Mon, 25 Jan 2021 20:44:48 +0800 Subject: [PATCH 46/94] update README.zh-cn.md. --- README.zh-cn.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.zh-cn.md b/README.zh-cn.md index 0d8af07..612e610 100644 --- a/README.zh-cn.md +++ b/README.zh-cn.md @@ -451,8 +451,8 @@ session_options = {'headers': {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac # 代理设置,可选 proxy = {'http': '127.0.0.1:1080', 'https': '127.0.0.1:1080'} -# 传入配置,driver_options 和 session_options 都是可选的,须要使用对应模式才须要传入 -drission = Drission(driver_options, session_options, proxy=proxy) +# 传入配置,driver_or_options 和 session_or_options 都是可选的,须要使用对应模式才须要传入 +drission = Drission(driver_or_options, session_or_options, proxy=proxy) ``` DriverOptions 和 SessionOptions 用法详见下文。 @@ -892,7 +892,7 @@ do.set_no_imgs(True) # 不加载图片 do.set_paths(driver_path='D:\\chromedriver.exe', chrome_path='D:\\chrome.exe') # 设置路径 do.set_headless(False).set_no_imgs(True) # 支持链式操作 -drission = Drission(driver_options=do) # 用配置对象创建 Drission 对象 +drission = Drission(driver_or_options=do) # 用配置对象创建 Drission 对象 page = MixPage(driver_options=do) # 用配置对象创建 MixPage 对象 do.save() # 保存当前打开的 ini 文件 From af392315468e9f15976ea6e70159f516242c221c Mon Sep 17 00:00:00 2001 From: g1879 Date: Mon, 25 Jan 2021 20:50:21 +0800 Subject: [PATCH 47/94] update README.zh-cn.md. --- README.zh-cn.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.zh-cn.md b/README.zh-cn.md index 612e610..4fb0e43 100644 --- a/README.zh-cn.md +++ b/README.zh-cn.md @@ -925,7 +925,7 @@ so.cookies = ['key1=val1; domain=xxxx', 'key2=val2; domain=xxxx'] # 设置 cook so.headers = {'User-Agent': 'xxxx', 'Accept-Charset': 'xxxx'} so.set_a_header('Connection', 'keep-alive') -drission = Drission(session_options=so) # 用配置对象创建 Drission 对象 +drission = Drission(session_or_options=so) # 用配置对象创建 Drission 对象 page = MixPage(session_options=so) # 用配置对象创建 MixPage 对象 so.save() # 保存当前打开的 ini 文件 From 1c2b20aea50d9721cfc28d88b0a109177a5374d0 Mon Sep 17 00:00:00 2001 From: g1879 Date: Mon, 25 Jan 2021 20:52:37 +0800 Subject: [PATCH 48/94] update README.en.md. --- README.en.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/README.en.md b/README.en.md index 88c52ae..0d8cddb 100644 --- a/README.en.md +++ b/README.en.md @@ -476,8 +476,8 @@ session_options = {'headers': {'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac O # Proxy settings, optional proxy = {'http': '127.0.0.1:1080','https': '127.0.0.1:1080'} -# Incoming configuration, driver_options and session_options are optional, you need to use the corresponding mode to pass in -drission = Drission(driver_options, session_options, proxy=proxy) +# Incoming configuration, driver_or_options and session_or_options are optional, you need to use the corresponding mode to pass in +drission = Drission(driver_or_options, session_or_options, proxy=proxy) ``` The usage of DriverOptions and SessionOptions is detailed below. @@ -931,7 +931,7 @@ do.set_no_imgs(True) # Do not load pictures do.set_paths(driver_path='D:\\chromedriver.exe', chrome_path='D:\\chrome.exe') # set path do.set_headless(False).set_no_imgs(True) # Support chain operation -drission = Drission(driver_options=do) # Create Drission object with configuration object +drission = Drission(driver_or_options=do) # Create Drission object with configuration object page = MixPage(driver_options=do) # Create MixPage object with configuration object do.save() # save the currently opened ini file @@ -966,7 +966,7 @@ so.cookies = ['key1=val1; domain=xxxx','key2=val2; domain=xxxx'] # set cookies so.headers = {'User-Agent':'xxxx','Accept-Charset':'xxxx'} so.set_a_header('Connection','keep-alive') -drission = Drission(session_options=so) # Create Drission object with configuration object +drission = Drission(session_or_options=so) # Create Drission object with configuration object page = MixPage(session_options=so) # Create MixPage object with configuration object so.save() # Save the currently opened ini file From 1cec5933044b9c3a91dda66849095023a49af00b Mon Sep 17 00:00:00 2001 From: g1879 Date: Thu, 28 Jan 2021 11:46:52 +0800 Subject: [PATCH 49/94] =?UTF-8?q?=E4=BF=AE=E6=AD=A3=E5=8F=8D=E7=88=AC?= =?UTF-8?q?=E4=BB=A3=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/drission.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/DrissionPage/drission.py b/DrissionPage/drission.py index f6c6c57..b45c0f1 100644 --- a/DrissionPage/drission.py +++ b/DrissionPage/drission.py @@ -121,14 +121,15 @@ class Drission(object): print('无法启动,请检查chromedriver版本与Chrome是否匹配,并手动设置。') exit(0) - # 反爬设置,似乎没用 - self._driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", { - "source": """ - Object.defineProperty(navigator, 'webdriver', { - get: () => Chrome - }) - """ - }) + # 反爬设置 + try: + self._driver.execute_script('Object.defineProperty(navigator,"webdriver",{get:() => Chrome,});') + except: + pass + + # self._driver.execute_cdp_cmd( + # 'Page.addScriptToEvaluateOnNewDocument', + # {'source': 'Object.defineProperty(navigator,"webdriver",{get:() => Chrome,});'}) return self._driver From 633450836378bb824444248ef50a722aa6ced51d Mon Sep 17 00:00:00 2001 From: g1879 Date: Thu, 28 Jan 2021 11:47:33 +0800 Subject: [PATCH 50/94] 1.8.1 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 6cbe69b..f0afc32 100644 --- a/setup.py +++ b/setup.py @@ -8,7 +8,7 @@ with open("README.md", "r", encoding='utf-8') as fh: setup( name="DrissionPage", - version="1.8.0", + version="1.8.1", author="g1879", author_email="g1879@qq.com", description="A module that integrates selenium and requests session, encapsulates common page operations.", From 9e4f392346dc301ab8434d94ac702ee578ed2a54 Mon Sep 17 00:00:00 2001 From: g1879 Date: Thu, 28 Jan 2021 15:22:39 +0800 Subject: [PATCH 51/94] update README.zh-cn.md. --- README.zh-cn.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.zh-cn.md b/README.zh-cn.md index 4fb0e43..4540e91 100644 --- a/README.zh-cn.md +++ b/README.zh-cn.md @@ -437,6 +437,7 @@ drission = Drission(read_file=False) ```python from DrissionPage.config import DriverOptions +from DrissionPage import Drission # 创建 driver 配置对象,read_file = False 表示不读取 ini 文件 do = DriverOptions(read_file=False) From 9c96d164859bc6bc9a9b8c7880e9748071c5894a Mon Sep 17 00:00:00 2001 From: g1879 Date: Thu, 28 Jan 2021 15:24:06 +0800 Subject: [PATCH 52/94] update README.zh-cn.md. --- README.zh-cn.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.zh-cn.md b/README.zh-cn.md index 4540e91..53281a5 100644 --- a/README.zh-cn.md +++ b/README.zh-cn.md @@ -493,7 +493,7 @@ page.post(url, data, **kwargs) # 只有 session 模式才有 post 方法 page.get(url, retry=5, interval=0.5) ``` -Tips:若连接出错,程序会自动重试2次,可指定重试次数和等待间隔。 +Tips:若连接出错,程序会自动重试3次,可指定重试次数和等待间隔。 **切换模式** From 0b7d55ff53190885724b1dbd84fa4ebe4e74b583 Mon Sep 17 00:00:00 2001 From: g1879 Date: Fri, 29 Jan 2021 10:43:41 +0800 Subject: [PATCH 53/94] =?UTF-8?q?set=5Fwindow=5Fsize()=E5=8A=A0=E5=85=A5?= =?UTF-8?q?=E6=9C=80=E5=B0=8F=E5=8C=96=E5=8A=9F=E8=83=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/driver_page.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/DrissionPage/driver_page.py b/DrissionPage/driver_page.py index cb64f10..31a17c4 100644 --- a/DrissionPage/driver_page.py +++ b/DrissionPage/driver_page.py @@ -529,16 +529,19 @@ class DriverPage(object): self.driver.back() def set_window_size(self, x: int = None, y: int = None) -> None: - """设置浏览器窗口大小,默认最大化 \n + """设置浏览器窗口大小,默认最大化,任一参数为0最小化 \n :param x: 浏览器窗口高 :param y: 浏览器窗口宽 :return: None """ - if not x and not y: + if x is None and y is None: self.driver.maximize_window() + elif x == 0 or y == 0: + self.driver.minimize_window() + else: - if x <= 0 or y <= 0: + if x < 0 or y < 0: raise ValueError('Arguments x and y must greater than 0.') new_x = x or self.driver.get_window_size()['width'] From 1cab918598363b2aea7d6a93bdcad0f3f0ad7bee Mon Sep 17 00:00:00 2001 From: g1879 Date: Thu, 4 Feb 2021 11:30:28 +0800 Subject: [PATCH 54/94] =?UTF-8?q?url=E8=BD=AC=E7=A0=81=E5=A2=9E=E5=8A=A0?= =?UTF-8?q?=E5=BF=BD=E7=95=A5!?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/driver_page.py | 2 +- DrissionPage/session_page.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/DrissionPage/driver_page.py b/DrissionPage/driver_page.py index 31a17c4..8328be1 100644 --- a/DrissionPage/driver_page.py +++ b/DrissionPage/driver_page.py @@ -139,7 +139,7 @@ class DriverPage(object): :param interval: 重试间隔(秒) :return: 目标url是否可用 """ - to_url = quote(url, safe='/:&?=%;#@') + to_url = quote(url, safe='/:&?=%;#@+!') retry = int(retry) if retry is not None else int(self.retry_times) interval = int(interval) if interval is not None else int(self.retry_interval) diff --git a/DrissionPage/session_page.py b/DrissionPage/session_page.py index 920c237..e6190fa 100644 --- a/DrissionPage/session_page.py +++ b/DrissionPage/session_page.py @@ -235,7 +235,7 @@ class SessionPage(object): :param kwargs: 连接参数 :return: url是否可用 """ - to_url = quote(url, safe='/:&?=%;#@+') + to_url = quote(url, safe='/:&?=%;#@+!') retry = int(retry) if retry is not None else int(self.retry_times) interval = int(interval) if interval is not None else int(self.retry_interval) @@ -278,7 +278,7 @@ class SessionPage(object): :param kwargs: 连接参数 :return: url是否可用 """ - to_url = quote(url, safe='/:&?=%;#@') + to_url = quote(url, safe='/:&?=%;#@+!') retry = int(retry) if retry is not None else int(self.retry_times) interval = int(interval) if interval is not None else int(self.retry_interval) @@ -499,7 +499,7 @@ class SessionPage(object): if mode not in ('get', 'post'): raise ValueError("Argument mode can only be 'get' or 'post'.") - url = quote(url, safe='/:&?=%;#@+') + url = quote(url, safe='/:&?=%;#@+!') # 设置referer和host值 kwargs_set = set(x.lower() for x in kwargs) From 7fcde1a67dbec0d89327b19edeb1da94986940b2 Mon Sep 17 00:00:00 2001 From: g1879 Date: Thu, 4 Feb 2021 18:35:57 +0800 Subject: [PATCH 55/94] =?UTF-8?q?download()=E5=A2=9E=E5=8A=A0retry?= =?UTF-8?q?=EF=BC=8C=E6=9C=AA=E5=AE=8C=E6=88=90?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/session_page.py | 260 +++++++++++++++++++---------------- 1 file changed, 144 insertions(+), 116 deletions(-) diff --git a/DrissionPage/session_page.py b/DrissionPage/session_page.py index e6190fa..a223c35 100644 --- a/DrissionPage/session_page.py +++ b/DrissionPage/session_page.py @@ -310,6 +310,8 @@ class SessionPage(object): post_data: dict = None, show_msg: bool = False, show_errmsg: bool = False, + retry: int = None, + interval: float = None, **kwargs) -> tuple: """下载一个文件 \n :param file_url: 文件url @@ -319,163 +321,189 @@ class SessionPage(object): :param post_data: post方式的数据 :param show_msg: 是否显示下载信息 :param show_errmsg: 是否抛出和显示异常 + :param retry: 重试次数 + :param interval: 重试间隔时间 :param kwargs: 连接参数 :return: 下载是否成功(bool)和状态信息(成功时信息为文件路径)的元组 """ - # 生成的response不写入self._response,是临时的 if file_exists == 'skip' and Path(f'{goal_path}\\{rename}').exists(): if show_msg: print(f'{file_url}\n{goal_path}\\{rename}\nSkipped.\n') return False, 'Skipped because a file with the same name already exists.' - kwargs['stream'] = True + def do(url: str, + goal: str, + new_name: str = None, + exists: str = 'rename', + data: dict = None, + msg: bool = False, + errmsg: bool = False, + **args) -> tuple: + args['stream'] = True - if 'timeout' not in kwargs: - kwargs['timeout'] = 20 + if 'timeout' not in args: + args['timeout'] = 20 - mode = 'post' if post_data else 'get' - r, info = self._make_response(file_url, mode=mode, data=post_data, show_errmsg=show_errmsg, **kwargs) + mode = 'post' if data else 'get' + # 生成的response不写入self._response,是临时的 + r, info = self._make_response(url, mode=mode, data=data, show_errmsg=errmsg, **args) - if r is None: - if show_msg: - print(info) + if r is None: + if msg: + print(info) - return False, info + return False, info - if not r.ok: - if show_errmsg: - raise ConnectionError(f'Status code: {r.status_code}.') + if not r.ok: + if errmsg: + raise ConnectionError(f'Status code: {r.status_code}.') - return False, f'Status code: {r.status_code}.' + return False, f'Status code: {r.status_code}.' - # -------------------获取文件名------------------- - file_name = '' - content_disposition = r.headers.get('content-disposition') + # -------------------获取文件名------------------- + file_name = '' + content_disposition = r.headers.get('content-disposition') - # 使用header里的文件名 - if content_disposition: - file_name = r.headers[content_disposition[0]].encode('ISO-8859-1').decode('utf-8') - file_name = re.search(r'filename *= *"?([^";]+)', file_name) - if file_name: - file_name = file_name.group(1) + # 使用header里的文件名 + if content_disposition: + file_name = r.headers[content_disposition[0]].encode('ISO-8859-1').decode('utf-8') + file_name = re.search(r'filename *= *"?([^";]+)', file_name) - if file_name[0] == file_name[-1] == "'": - file_name = file_name[1:-1] + if file_name: + file_name = file_name.group(1) - # 在url里获取文件名 - if not file_name and os_PATH.basename(file_url): - file_name = os_PATH.basename(file_url).split("?")[0] + if file_name[0] == file_name[-1] == "'": + file_name = file_name[1:-1] - # 找不到则用时间和随机数生成文件名 - if not file_name: - file_name = f'untitled_{time()}_{randint(0, 100)}' + # 在url里获取文件名 + if not file_name and os_PATH.basename(url): + file_name = os_PATH.basename(url).split("?")[0] - # 去除非法字符 - file_name = re_SUB(r'[\\/*:|<>?"]', '', file_name).strip() - file_name = unquote(file_name) + # 找不到则用时间和随机数生成文件名 + if not file_name: + file_name = f'untitled_{time()}_{randint(0, 100)}' - # -------------------重命名,不改变扩展名------------------- - if rename: - rename = re_SUB(r'[\\/*:|<>?"]', '', rename).strip() - ext_name = file_name.split('.')[-1] + # 去除非法字符 + file_name = re_SUB(r'[\\/*:|<>?"]', '', file_name).strip() + file_name = unquote(file_name) - if '.' in rename or ext_name == file_name: - full_name = rename - else: - full_name = f'{rename}.{ext_name}' + # -------------------重命名,不改变扩展名------------------- + if new_name: + new_name = re_SUB(r'[\\/*:|<>?"]', '', new_name).strip() + ext_name = file_name.split('.')[-1] - else: - full_name = file_name - - # -------------------生成路径------------------- - goal_Path = Path(goal_path) - goal_path = '' - skip = False - - for key, i in enumerate(goal_Path.parts): # 去除路径中的非法字符 - goal_path += goal_Path.drive if key == 0 and goal_Path.drive else re_SUB(r'[*:|<>?"]', '', i).strip() - goal_path += '\\' if i != '\\' and key < len(goal_Path.parts) - 1 else '' - - goal_Path = Path(goal_path).absolute() - goal_Path.mkdir(parents=True, exist_ok=True) - full_path = Path(f'{goal_path}\\{full_name}') - - if full_path.exists(): - if file_exists == 'rename': - full_name = get_available_file_name(goal_path, full_name) - full_path = Path(f'{goal_path}\\{full_name}') - - elif file_exists == 'skip': - skip = True - - elif file_exists == 'overwrite': - pass + if '.' in new_name or ext_name == file_name: + full_name = new_name + else: + full_name = f'{new_name}.{ext_name}' else: - raise ValueError("Argument file_exists can only be 'skip', 'overwrite', 'rename'.") + full_name = file_name - # -------------------打印要下载的文件------------------- - if show_msg: - print(file_url) - print(full_name if file_name == full_name else f'{file_name} -> {full_name}') - print(f'Downloading to: {goal_path}') + # -------------------生成路径------------------- + goal_Path = Path(goal) + goal = '' + skip = False + for key, i in enumerate(goal_Path.parts): # 去除路径中的非法字符 + goal += goal_Path.drive if key == 0 and goal_Path.drive else re_SUB(r'[*:|<>?"]', '', i).strip() + goal += '\\' if i != '\\' and key < len(goal_Path.parts) - 1 else '' + + goal_Path = Path(goal).absolute() + goal_Path.mkdir(parents=True, exist_ok=True) + full_path = Path(f'{goal}\\{full_name}') + + if full_path.exists(): + if file_exists == 'rename': + full_name = get_available_file_name(goal, full_name) + full_path = Path(f'{goal}\\{full_name}') + + elif exists == 'skip': + skip = True + + elif exists == 'overwrite': + pass + + else: + raise ValueError("Argument file_exists can only be 'skip', 'overwrite', 'rename'.") + + # -------------------打印要下载的文件------------------- + if msg: + print(file_url) + print(full_name if file_name == full_name else f'{file_name} -> {full_name}') + print(f'Downloading to: {goal}') + + if skip: + print('Skipped.\n') + + # -------------------开始下载------------------- if skip: - print('Skipped.\n') + return False, 'Skipped because a file with the same name already exists.' - # -------------------开始下载------------------- - if skip: - return False, 'Skipped because a file with the same name already exists.' + # 获取远程文件大小 + content_length = r.headers.get('content-length') + file_size = int(content_length) if content_length else None - # 获取远程文件大小 - content_length = r.headers.get('content-length') - file_size = int(content_length) if content_length else None + # 已下载文件大小和下载状态 + downloaded_size, download_status = 0, False - # 已下载文件大小和下载状态 - downloaded_size, download_status = 0, False + try: + with open(str(full_path), 'wb') as tmpFile: + for chunk in r.iter_content(chunk_size=1024): + if chunk: + tmpFile.write(chunk) - try: - with open(str(full_path), 'wb') as tmpFile: - for chunk in r.iter_content(chunk_size=1024): - if chunk: - tmpFile.write(chunk) + # 如表头有返回文件大小,显示进度 + if msg and file_size: + downloaded_size += 1024 + rate = downloaded_size / file_size if downloaded_size < file_size else 1 + print('\r {:.0%} '.format(rate), end="") - # 如表头有返回文件大小,显示进度 - if show_msg and file_size: - downloaded_size += 1024 - rate = downloaded_size / file_size if downloaded_size < file_size else 1 - print('\r {:.0%} '.format(rate), end="") + except Exception as e: + if errmsg: + raise ConnectionError(e) - except Exception as e: - if show_errmsg: - raise ConnectionError(e) - - download_status, info = False, f'Download failed.\n{e}' - - else: - if full_path.stat().st_size == 0: - if show_errmsg: - raise ValueError('File size is 0.') - - download_status, info = False, 'File size is 0.' + download_status, info = False, f'Download failed.\n{e}' else: - download_status, info = True, str(full_path) + if full_path.stat().st_size == 0: + if errmsg: + raise ValueError('File size is 0.') - finally: - # 删除下载出错文件 - if not download_status and full_path.exists(): - full_path.unlink() + download_status, info = False, 'File size is 0.' - r.close() + else: + download_status, info = True, str(full_path) - # -------------------显示并返回值------------------- - if show_msg: - print(info, '\n') + finally: + # 删除下载出错文件 + if not download_status and full_path.exists(): + full_path.unlink() - info = f'{goal_path}\\{full_name}' if download_status else info - return download_status, info + r.close() + + # -------------------显示并返回值------------------- + if msg: + print(info, '\n') + + info = f'{goal}\\{full_name}' if download_status else info + return download_status, info + + retry_times = retry or self.retry_times + retry_interval = interval or self.retry_interval + result = do(file_url, goal_path, rename, file_exists, post_data, show_msg, show_errmsg, **kwargs) + + if not result[0] and not str(result[1]).startswith('Skipped'): + for i in range(retry_times): + sleep(retry_interval) + + print(f'重试 {file_url}') + result = do(file_url, goal_path, rename, file_exists, post_data, show_msg, show_errmsg, **kwargs) + if result[0]: + break + + return result def _make_response(self, url: str, From 0791cff9d665b35f31fbc8d9a6c4579c02892773 Mon Sep 17 00:00:00 2001 From: g1879 Date: Fri, 5 Feb 2021 21:00:32 +0800 Subject: [PATCH 56/94] =?UTF-8?q?=E5=BE=AE=E8=B0=83?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/drission.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DrissionPage/drission.py b/DrissionPage/drission.py index b45c0f1..3c49500 100644 --- a/DrissionPage/drission.py +++ b/DrissionPage/drission.py @@ -121,7 +121,7 @@ class Drission(object): print('无法启动,请检查chromedriver版本与Chrome是否匹配,并手动设置。') exit(0) - # 反爬设置 + # 反反爬设置 try: self._driver.execute_script('Object.defineProperty(navigator,"webdriver",{get:() => Chrome,});') except: From 889eb2074641bee74a62d53e936f9e4d65523afd Mon Sep 17 00:00:00 2001 From: g1879 Date: Fri, 5 Feb 2021 21:01:10 +0800 Subject: [PATCH 57/94] =?UTF-8?q?click()=E6=94=AF=E6=8C=81=E5=81=8F?= =?UTF-8?q?=E7=A7=BB=E7=82=B9=E5=87=BB=EF=BC=8C=E6=9C=AA=E5=AE=8C=E6=88=90?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/driver_element.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/DrissionPage/driver_element.py b/DrissionPage/driver_element.py index 06fb5db..83b4001 100644 --- a/DrissionPage/driver_element.py +++ b/DrissionPage/driver_element.py @@ -324,7 +324,7 @@ class DriverElement(DrissionElement): return None if r == 'none' else r - def click(self, by_js=None) -> bool: + def click(self, by_js: bool = None, x: int = None, y: int = None) -> bool: """点击元素 \n 尝试点击10次,若都失败就改用js点击 \n :param by_js: 是否用js点击,为True时直接用js点击,为False时重试失败也不会改用js @@ -532,12 +532,14 @@ class DriverElement(DrissionElement): if(nth>1){path = '/' + tag + '[' + nth + ']' + path;} else{path = '/' + tag + path;}''' txt5 = '''return path;''' + elif mode == 'css': txt1 = '' # txt2 = '''return '#' + el.id + path;''' txt3 = '' txt4 = '''path = '>' + ":nth-child(" + nth + ")" + path;''' txt5 = '''return path.substr(1);''' + else: raise ValueError(f"Argument mode can only be 'xpath' or 'css', not '{mode}'.") From 1f3edb7dc77b197cba3df2f5596edacbe775fefd Mon Sep 17 00:00:00 2001 From: g1879 Date: Fri, 5 Feb 2021 21:01:56 +0800 Subject: [PATCH 58/94] =?UTF-8?q?download()=E6=94=AF=E6=8C=81=E9=87=8D?= =?UTF-8?q?=E8=AF=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/mix_page.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/DrissionPage/mix_page.py b/DrissionPage/mix_page.py index 1d653f4..c5245c6 100644 --- a/DrissionPage/mix_page.py +++ b/DrissionPage/mix_page.py @@ -263,6 +263,8 @@ class MixPage(Null, SessionPage, DriverPage): post_data: dict = None, show_msg: bool = False, show_errmsg: bool = False, + retry: int = None, + interval: float = None, **kwargs) -> Tuple[bool, str]: """下载一个文件 \n d模式下下载前先同步cookies \n @@ -273,12 +275,16 @@ class MixPage(Null, SessionPage, DriverPage): :param post_data: post方式的数据 :param show_msg: 是否显示下载信息 :param show_errmsg: 是否显示和抛出异常 + :param retry: 重试次数 + :param interval: 重试间隔时间 :param kwargs: 连接参数 :return: 下载是否成功(bool)和状态信息(成功时信息为文件路径)的元组 """ if self.mode == 'd': self.cookies_to_session() - return super().download(file_url, goal_path, rename, file_exists, post_data, show_msg, show_errmsg, **kwargs) + + return super().download(file_url, goal_path, rename, file_exists, post_data, show_msg, show_errmsg, retry, + interval, **kwargs) # ----------------重写DriverPage的函数----------------------- From bb9ec1c9ada2980d0883c41c195dddec31ced065 Mon Sep 17 00:00:00 2001 From: g1879 Date: Fri, 5 Feb 2021 21:02:52 +0800 Subject: [PATCH 59/94] =?UTF-8?q?get()=E5=90=8E=E6=89=A7=E8=A1=8C=E5=8F=8D?= =?UTF-8?q?=E5=8F=8D=E7=88=AC?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/driver_page.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/DrissionPage/driver_page.py b/DrissionPage/driver_page.py index 8328be1..d5ae53f 100644 --- a/DrissionPage/driver_page.py +++ b/DrissionPage/driver_page.py @@ -149,6 +149,11 @@ class DriverPage(object): self._url = to_url self._url_available = self._try_to_connect(to_url, times=retry, interval=interval, show_errmsg=show_errmsg) + try: + self._driver.execute_script('Object.defineProperty(navigator,"webdriver",{get:() => Chrome,});') + except: + pass + return self._url_available def ele(self, From 3c6fbf268a068fef7ef52288249b84e631448b38 Mon Sep 17 00:00:00 2001 From: g1879 Date: Sun, 7 Feb 2021 10:40:28 +0800 Subject: [PATCH 60/94] =?UTF-8?q?input()=E5=85=81=E8=AE=B8=E6=8E=A5?= =?UTF-8?q?=E6=94=B6=E7=BB=84=E5=90=88=E9=94=AE=EF=BC=9B=E5=A2=9E=E5=8A=A0?= =?UTF-8?q?click=5Fat()=E6=96=B9=E6=B3=95=EF=BC=9Bclick()=E9=87=8D?= =?UTF-8?q?=E8=AF=95=E6=AC=A1=E6=95=B0=E6=94=B9=E4=B8=BA3=E6=AC=A1?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/driver_element.py | 32 +++++++++++++++++++++++++------- 1 file changed, 25 insertions(+), 7 deletions(-) diff --git a/DrissionPage/driver_element.py b/DrissionPage/driver_element.py index 83b4001..f85c6b3 100644 --- a/DrissionPage/driver_element.py +++ b/DrissionPage/driver_element.py @@ -324,14 +324,14 @@ class DriverElement(DrissionElement): return None if r == 'none' else r - def click(self, by_js: bool = None, x: int = None, y: int = None) -> bool: + def click(self, by_js: bool = None) -> bool: """点击元素 \n - 尝试点击10次,若都失败就改用js点击 \n + 尝试点击3次,若都失败就改用js点击 \n :param by_js: 是否用js点击,为True时直接用js点击,为False时重试失败也不会改用js :return: 是否点击成功 """ if not by_js: - for _ in range(10): + for _ in range(3): try: self.inner_ele.click() return True @@ -345,17 +345,35 @@ class DriverElement(DrissionElement): return False - def input(self, value: str, clear: bool = True) -> bool: - """输入文本 \n - :param value: 文本值 + def click_at(self, x: int = None, y: int = None, by_js=True) -> None: + """带偏移量点击本元素,相对于左上角坐标。不传入x或y值时点击元素中点 \n + :param x: 相对元素左上角坐标的x轴偏移量 + :param y: 相对元素左上角坐标的y轴偏移量 + :param by_js: 是否用js点击 + :return: None + """ + x = self.location['x'] + x if x is not None else self.location['x'] + self.size['width'] // 2 + y = self.location['y'] + y if y is not None else self.location['y'] + self.size['height'] // 2 + + if by_js: + self.page.run_script(f'document.elementFromPoint({x}, {y}).click();') + else: + from selenium.webdriver import ActionChains + ActionChains(self.page.driver).move_by_offset(x, y).click().perform() + + def input(self, value: Union[str, tuple], clear: bool = True) -> bool: + """输入文本或组合键 \n + :param value: 文本值或按键组合 :param clear: 输入前是否清空文本框 :return: 是否输入成功 """ try: if clear: self.clear() - self.inner_ele.send_keys(value) + + self.inner_ele.send_keys(*value) return True + except Exception as e: print(e) return False From 066caf2f23b01a03bea33fedeb2280ae3c72b7fc Mon Sep 17 00:00:00 2001 From: g1879 Date: Sun, 7 Feb 2021 14:10:48 +0800 Subject: [PATCH 61/94] 1.9.0 --- README.en.md | 30 +++++++++++++++++++++--------- README.zh-cn.md | 5 +++-- setup.py | 2 +- 3 files changed, 25 insertions(+), 12 deletions(-) diff --git a/README.en.md b/README.en.md index 0d8cddb..370ac63 100644 --- a/README.en.md +++ b/README.en.md @@ -734,8 +734,9 @@ Element operation is unique to d mode. Calling the following method will automat ```python element.click(by_js) # Click the element, you can choose whether to click with js -element.input(value) # input text -element.run_script(js) # Run JavaScript script on the element +element.click_at(x, y, by_js) # Click this element with offset, relative to the upper left corner coordinate. Click the midpoint of the element when the x or y value is not passed in, and you can choose whether to click with js +element.input(value, clear) # Input text or key combination, and input the key combination in tuple format. The clear parameter is whether to clear the element before input. +element.run_script(js, *args) # Run JavaScript script on the element element.submit() # Submit element.clear() # Clear the element element.screenshot(path, filename) # Take a screenshot of the element @@ -2160,20 +2161,31 @@ Click on the element. If it is unsuccessful, click in js mode. You can specify w Parameter Description: -- by_js: bool - whether to click with js +- by_js: bool - whether to click with js Returns: bool +### click_at() - -### input() - -Enter text and return whether it is successful. +Click this element with offset, relative to the upper left corner coordinate. Click the midpoint of the element when the +x or y value is not passed in, and you can choose whether to click with js. Parameter Description: -- value: str - text value -- clear: bool - whether to clear the text box before typing +- x: int - The x-axis offset relative to the upper left corner of the element +- y: int - The y-axis offset relative to the upper left corner of the element +- by_js: bool - whether to click with js + +Returns: None + +### input() + +Enter text or key combination and return whether it is successful. + +Parameter Description: + +- value: Union[str, tuple] - Text value or key combination +- clear: bool - whether to clear the text box before typing Returns: bool diff --git a/README.zh-cn.md b/README.zh-cn.md index 53281a5..06f9fe1 100644 --- a/README.zh-cn.md +++ b/README.zh-cn.md @@ -701,8 +701,9 @@ element.is_displayed() # 返回元素是否可见 ```python element.click(by_js) # 点击元素,可选择是否用 js 方式点击 -element.input(value) # 输入文本 -element.run_script(js) # 对元素运行 JavaScript 脚本 +element.click_at(x, y, by_js) # 带偏移量点击本元素,相对于左上角坐标。不传入 x 或 y 值时点击元素中点,可选择是否用 js 方式点击 +element.input(value, clear) # 输入文本或组合键,组合键用 tuple 格式输入。clear 参数为输入前是否清空元素。 +element.run_script(js, *args) # 对元素运行 JavaScript 脚本 element.submit() # 提交 element.clear() # 清空元素 element.screenshot(path, filename) # 对元素截图 diff --git a/setup.py b/setup.py index f0afc32..ca25b45 100644 --- a/setup.py +++ b/setup.py @@ -8,7 +8,7 @@ with open("README.md", "r", encoding='utf-8') as fh: setup( name="DrissionPage", - version="1.8.1", + version="1.9.0", author="g1879", author_email="g1879@qq.com", description="A module that integrates selenium and requests session, encapsulates common page operations.", From 30cd456cd1377738d8fc486ff0d4dcca1973f6cf Mon Sep 17 00:00:00 2001 From: g1879 Date: Mon, 8 Feb 2021 15:11:22 +0800 Subject: [PATCH 62/94] =?UTF-8?q?=E4=BC=98=E5=8C=96set=5Fattr()?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/driver_element.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DrissionPage/driver_element.py b/DrissionPage/driver_element.py index f85c6b3..cae78e9 100644 --- a/DrissionPage/driver_element.py +++ b/DrissionPage/driver_element.py @@ -460,7 +460,7 @@ class DriverElement(DrissionElement): :return: 是否设置成功 """ try: - self.run_script(f"arguments[0].{attr} = '{value}';") + self.run_script(f"arguments[0].setAttribute(arguments[1], arguments[2]);", attr, value) return True except: return False From 1636bf24213cbd88bd18c1087d5537168135b8d5 Mon Sep 17 00:00:00 2001 From: g1879 Date: Wed, 10 Feb 2021 10:30:05 +0800 Subject: [PATCH 63/94] 1.9.1 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index ca25b45..1e4ace0 100644 --- a/setup.py +++ b/setup.py @@ -8,7 +8,7 @@ with open("README.md", "r", encoding='utf-8') as fh: setup( name="DrissionPage", - version="1.9.0", + version="1.9.1", author="g1879", author_email="g1879@qq.com", description="A module that integrates selenium and requests session, encapsulates common page operations.", From 23e86b17a3486088770673c337dd7ae500742368 Mon Sep 17 00:00:00 2001 From: g1879 Date: Mon, 22 Feb 2021 16:25:33 +0800 Subject: [PATCH 64/94] =?UTF-8?q?=E4=BF=AE=E5=A4=8Dclick=5Fat()=E9=97=AE?= =?UTF-8?q?=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/driver_element.py | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/DrissionPage/driver_element.py b/DrissionPage/driver_element.py index cae78e9..7344a98 100644 --- a/DrissionPage/driver_element.py +++ b/DrissionPage/driver_element.py @@ -352,14 +352,24 @@ class DriverElement(DrissionElement): :param by_js: 是否用js点击 :return: None """ - x = self.location['x'] + x if x is not None else self.location['x'] + self.size['width'] // 2 - y = self.location['y'] + y if y is not None else self.location['y'] + self.size['height'] // 2 - if by_js: - self.page.run_script(f'document.elementFromPoint({x}, {y}).click();') + x = self.location['x'] + x if x is not None else self.location['x'] + self.size['width'] // 2 + y = self.location['y'] + y if y is not None else self.location['y'] + self.size['height'] // 2 + js = f""" + var ev = document.createEvent('HTMLEvents'); + ev.clientX = {x}; + ev.clientY = {y}; + ev.initEvent('click', false, true); + arguments[0].dispatchEvent(ev); + """ + self.run_script(js) + else: + x = x if x is not None else self.size['width'] // 2 + y = y if y is not None else self.size['height'] // 2 + from selenium.webdriver import ActionChains - ActionChains(self.page.driver).move_by_offset(x, y).click().perform() + ActionChains(self.page.driver).move_to_element_with_offset(self.inner_ele, x, y).click().perform() def input(self, value: Union[str, tuple], clear: bool = True) -> bool: """输入文本或组合键 \n From 6b6c018c455a1431d6faa8de6c47c2107598f8b2 Mon Sep 17 00:00:00 2001 From: g1879 Date: Mon, 22 Feb 2021 16:25:41 +0800 Subject: [PATCH 65/94] 1.9.2 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 1e4ace0..7157564 100644 --- a/setup.py +++ b/setup.py @@ -8,7 +8,7 @@ with open("README.md", "r", encoding='utf-8') as fh: setup( name="DrissionPage", - version="1.9.1", + version="1.9.2", author="g1879", author_email="g1879@qq.com", description="A module that integrates selenium and requests session, encapsulates common page operations.", From 696711f9ee28e24c3e53a351f4824c1cce5a5865 Mon Sep 17 00:00:00 2001 From: g1879 Date: Mon, 22 Feb 2021 16:28:24 +0800 Subject: [PATCH 66/94] =?UTF-8?q?=E4=BF=AE=E5=A4=8Dclick=5Fat()=E9=97=AE?= =?UTF-8?q?=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/driver_element.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DrissionPage/driver_element.py b/DrissionPage/driver_element.py index 7344a98..84dfab5 100644 --- a/DrissionPage/driver_element.py +++ b/DrissionPage/driver_element.py @@ -345,7 +345,7 @@ class DriverElement(DrissionElement): return False - def click_at(self, x: int = None, y: int = None, by_js=True) -> None: + def click_at(self, x: int = None, y: int = None, by_js=False) -> None: """带偏移量点击本元素,相对于左上角坐标。不传入x或y值时点击元素中点 \n :param x: 相对元素左上角坐标的x轴偏移量 :param y: 相对元素左上角坐标的y轴偏移量 From 38086b11af5cef55986814a60152d1abba074e0e Mon Sep 17 00:00:00 2001 From: g1879 Date: Tue, 23 Feb 2021 09:43:15 +0800 Subject: [PATCH 67/94] =?UTF-8?q?click=5Fat()=E5=8F=82=E6=95=B0=E6=94=AF?= =?UTF-8?q?=E6=8C=81=E6=8E=A5=E6=94=B6=E5=AD=97=E7=AC=A6=E4=B8=B2?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/driver_element.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/DrissionPage/driver_element.py b/DrissionPage/driver_element.py index 84dfab5..a1c54df 100644 --- a/DrissionPage/driver_element.py +++ b/DrissionPage/driver_element.py @@ -345,7 +345,7 @@ class DriverElement(DrissionElement): return False - def click_at(self, x: int = None, y: int = None, by_js=False) -> None: + def click_at(self, x: Union[int, str] = None, y: Union[int, str] = None, by_js=False) -> None: """带偏移量点击本元素,相对于左上角坐标。不传入x或y值时点击元素中点 \n :param x: 相对元素左上角坐标的x轴偏移量 :param y: 相对元素左上角坐标的y轴偏移量 @@ -353,8 +353,8 @@ class DriverElement(DrissionElement): :return: None """ if by_js: - x = self.location['x'] + x if x is not None else self.location['x'] + self.size['width'] // 2 - y = self.location['y'] + y if y is not None else self.location['y'] + self.size['height'] // 2 + x = self.location['x'] + int(x) if x is not None else self.location['x'] + self.size['width'] // 2 + y = self.location['y'] + int(y) if y is not None else self.location['y'] + self.size['height'] // 2 js = f""" var ev = document.createEvent('HTMLEvents'); ev.clientX = {x}; @@ -365,8 +365,8 @@ class DriverElement(DrissionElement): self.run_script(js) else: - x = x if x is not None else self.size['width'] // 2 - y = y if y is not None else self.size['height'] // 2 + x = int(x) if x is not None else self.size['width'] // 2 + y = int(y) if y is not None else self.size['height'] // 2 from selenium.webdriver import ActionChains ActionChains(self.page.driver).move_to_element_with_offset(self.inner_ele, x, y).click().perform() From ebe3177b0ca8dc428f43aba40cd67d726c2bbfe0 Mon Sep 17 00:00:00 2001 From: g1879 Date: Thu, 25 Feb 2021 17:27:30 +0800 Subject: [PATCH 68/94] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E6=9C=89=E6=97=B6?= =?UTF-8?q?=E6=96=87=E4=BB=B6=E4=B8=8B=E8=BD=BD=E8=8E=B7=E5=8F=96=E6=96=87?= =?UTF-8?q?=E4=BB=B6=E5=90=8D=E5=87=BA=E9=94=99=E7=9A=84=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/session_page.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DrissionPage/session_page.py b/DrissionPage/session_page.py index a223c35..f4c5959 100644 --- a/DrissionPage/session_page.py +++ b/DrissionPage/session_page.py @@ -367,7 +367,7 @@ class SessionPage(object): # 使用header里的文件名 if content_disposition: - file_name = r.headers[content_disposition[0]].encode('ISO-8859-1').decode('utf-8') + file_name = content_disposition.encode('ISO-8859-1').decode('utf-8') file_name = re.search(r'filename *= *"?([^";]+)', file_name) if file_name: From 19329c4235f23726d74528095982d268581e49ab Mon Sep 17 00:00:00 2001 From: g1879 Date: Thu, 25 Feb 2021 17:28:32 +0800 Subject: [PATCH 69/94] =?UTF-8?q?=E5=BE=AE=E8=B0=83?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/driver_element.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/DrissionPage/driver_element.py b/DrissionPage/driver_element.py index a1c54df..4edc21d 100644 --- a/DrissionPage/driver_element.py +++ b/DrissionPage/driver_element.py @@ -434,7 +434,8 @@ class DriverElement(DrissionElement): name = filename or self.tag path = Path(path).absolute() path.mkdir(parents=True, exist_ok=True) - name = get_available_file_name(str(path), f'{name}.png') + name = f'{name}.png' if not name.endswith('.png') else name + name = get_available_file_name(str(path), name) # 等待元素加载完成 if self.tag == 'img': From c6d525482693a1db53893434a843ac3f2fbab245 Mon Sep 17 00:00:00 2001 From: g1879 Date: Thu, 25 Feb 2021 17:29:07 +0800 Subject: [PATCH 70/94] 1.9.3 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 7157564..05f7080 100644 --- a/setup.py +++ b/setup.py @@ -8,7 +8,7 @@ with open("README.md", "r", encoding='utf-8') as fh: setup( name="DrissionPage", - version="1.9.2", + version="1.9.3", author="g1879", author_email="g1879@qq.com", description="A module that integrates selenium and requests session, encapsulates common page operations.", From fa847ffb9b93f1e1f88acf46e9160371f4cc00f7 Mon Sep 17 00:00:00 2001 From: g1879 Date: Thu, 4 Mar 2021 00:49:54 +0800 Subject: [PATCH 71/94] =?UTF-8?q?=E5=AE=8C=E5=96=84=E5=90=AF=E5=8A=A8chrom?= =?UTF-8?q?e=E9=80=BB=E8=BE=91?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/drission.py | 25 ++++++++++++++++++++----- DrissionPage/easy_set.py | 6 +++--- 2 files changed, 23 insertions(+), 8 deletions(-) diff --git a/DrissionPage/drission.py b/DrissionPage/drission.py index 3c49500..0006a66 100644 --- a/DrissionPage/drission.py +++ b/DrissionPage/drission.py @@ -96,9 +96,21 @@ class Drission(object): if options.debugger_address and _check_port(options.debugger_address) is False: from subprocess import Popen - port = options.debugger_address.split(':')[-1] + port = options.debugger_address[options.debugger_address.rfind(':') + 1:] - Popen(f'{chrome_path} --remote-debugging-port={port}', shell=False) + try: + Popen(f'{chrome_path} --remote-debugging-port={port}', shell=False) + + except FileNotFoundError: + from DrissionPage.easy_set import _get_chrome_path + + chrome_path = _get_chrome_path(show_msg=False) + + if not chrome_path: + raise FileNotFoundError('无法找到chrome.exe路径,请手动配置。') + + Popen(f'"{chrome_path}" --remote-debugging-port={port}', shell=False) + options.binary_location = chrome_path try: self._driver = webdriver.Chrome(driver_path, options=options) @@ -106,9 +118,9 @@ class Drission(object): except (WebDriverException, SessionNotCreatedException): from .easy_set import get_match_driver - chrome_path = self._driver_options.get('binary_location', None) or None print('自动下载chromedriver...') - driver_path = get_match_driver(chrome_path=chrome_path, check_version=False, show_msg=False) + chrome_path = None if chrome_path == 'chrome.exe' else chrome_path + driver_path = get_match_driver(chrome_path=chrome_path, check_version=False) if driver_path: try: @@ -326,7 +338,7 @@ class Drission(object): def _check_port(debugger_address: str) -> Union[bool, None]: - """检查端口是否可用 \n + """检查端口是否被占用 \n :param debugger_address: 浏览器地址及端口 :return: bool """ @@ -345,3 +357,6 @@ def _check_port(debugger_address: str) -> Union[bool, None]: return True except socket.error: return False + finally: + if s: + s.close() diff --git a/DrissionPage/easy_set.py b/DrissionPage/easy_set.py index 2d67807..d3f0330 100644 --- a/DrissionPage/easy_set.py +++ b/DrissionPage/easy_set.py @@ -208,11 +208,11 @@ def get_match_driver(ini_path: Union[str, None] = 'default', chrome_path = chrome_path or _get_chrome_path(ini_path, show_msg) chrome_path = Path(chrome_path).absolute() if chrome_path else None if show_msg: - print('chrome.exe路径', chrome_path, '\n') + print('chrome.exe路径', chrome_path) ver = _get_chrome_version(chrome_path) if show_msg: - print('version', ver, '\n') + print('version', ver) zip_path = _download_driver(ver, save_path, show_msg=show_msg) @@ -225,7 +225,7 @@ def get_match_driver(ini_path: Union[str, None] = 'default', driver_path = None if show_msg: - print('\n解压路径', driver_path, '\n') + print('解压路径', driver_path) if driver_path: Path(zip_path).unlink() From 6482dfec517efd71650498052a6053cff3af5f9a Mon Sep 17 00:00:00 2001 From: g1879 Date: Mon, 15 Mar 2021 17:45:29 +0800 Subject: [PATCH 72/94] =?UTF-8?q?=E4=BC=98=E5=8C=96=E5=90=AF=E5=8A=A8?= =?UTF-8?q?=E6=B5=8F=E8=A7=88=E5=99=A8=E5=92=8C=E8=8E=B7=E5=8F=96=E9=A9=B1?= =?UTF-8?q?=E5=8A=A8=E9=80=BB=E8=BE=91=EF=BC=8C=E6=9C=AA=E5=AE=8C=E6=88=90?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/common.py | 22 +++++++++++ DrissionPage/drission.py | 42 +++++++++++++++++---- DrissionPage/easy_set.py | 80 +++++++++++++++++++++------------------- 3 files changed, 99 insertions(+), 45 deletions(-) diff --git a/DrissionPage/common.py b/DrissionPage/common.py index 11e6d9d..d0e950c 100644 --- a/DrissionPage/common.py +++ b/DrissionPage/common.py @@ -317,3 +317,25 @@ def unzip(zip_path: str, to_path: str) -> Union[list, None]: with ZipFile(zip_path, 'r') as f: return [f.extract(f.namelist()[0], path=to_path)] + + +def get_exe_path_from_port(port: Union[str, int]) -> Union[str, None]: + """获取端口号第一条进程的可执行文件路径 \n + :param port: 端口号 + :return: 可执行文件的绝对路径 + """ + from os import popen + from time import perf_counter + process = popen(f'netstat -ano |findstr {port}').read().split('\n')[0] + t = perf_counter() + + while not process and perf_counter() - t < 10: + process = popen(f'netstat -ano |findstr {port}').read().split('\n')[0] + + processid = process[process.rfind(' ') + 1:] + + if not processid: + return + else: + file_lst = popen(f'wmic process where processid={processid} get executablepath').read().split('\n') + return file_lst[2].strip() if len(file_lst) > 2 else None diff --git a/DrissionPage/drission.py b/DrissionPage/drission.py index 0006a66..6a51c2f 100644 --- a/DrissionPage/drission.py +++ b/DrissionPage/drission.py @@ -35,6 +35,7 @@ class Drission(object): """ self._session = None self._driver = None + self._debugger = None self._proxy = proxy om = OptionsManager(ini_path) if session_or_options is None or driver_or_options is None else None @@ -94,41 +95,61 @@ class Drission(object): driver_path = self._driver_options.get('driver_path', None) or 'chromedriver' chrome_path = self._driver_options.get('binary_location', None) or 'chrome.exe' + # -----------若指定debug端口且该端口未在使用中,则先启动浏览器进程----------- if options.debugger_address and _check_port(options.debugger_address) is False: from subprocess import Popen port = options.debugger_address[options.debugger_address.rfind(':') + 1:] try: - Popen(f'{chrome_path} --remote-debugging-port={port}', shell=False) + self._debugger = Popen(f'{chrome_path} --remote-debugging-port={port}', shell=False) + if chrome_path == 'chrome.exe': + from common import get_exe_path_from_port + chrome_path = get_exe_path_from_port(port) + + # 启动不了进程,主动找浏览器执行文件启动 except FileNotFoundError: from DrissionPage.easy_set import _get_chrome_path - chrome_path = _get_chrome_path(show_msg=False) if not chrome_path: raise FileNotFoundError('无法找到chrome.exe路径,请手动配置。') - Popen(f'"{chrome_path}" --remote-debugging-port={port}', shell=False) - options.binary_location = chrome_path + self._debugger = Popen(f'"{chrome_path}" --remote-debugging-port={port}', shell=False) + # -----------创建WebDriver对象----------- try: self._driver = webdriver.Chrome(driver_path, options=options) + # 若版本不对,获取对应chromedriver再试 except (WebDriverException, SessionNotCreatedException): from .easy_set import get_match_driver - - print('自动下载chromedriver...') chrome_path = None if chrome_path == 'chrome.exe' else chrome_path - driver_path = get_match_driver(chrome_path=chrome_path, check_version=False) + driver_path = get_match_driver(chrome_path=chrome_path, check_version=False, show_msg=False) if driver_path: try: self._driver = webdriver.Chrome(driver_path, options=options) - print('下载完成。') except: print('无法启动,请检查chromedriver版本与Chrome是否匹配,并手动设置。') exit(0) + + # 当找不到driver且chrome_path为None时,说明安装的版本过高,改在系统路径中查找 + elif chrome_path is None and driver_path is None: + from DrissionPage.easy_set import _get_chrome_path + chrome_path = _get_chrome_path(show_msg=False, from_ini=False, from_regedit=False) + driver_path = get_match_driver(chrome_path=chrome_path, check_version=False, show_msg=False) + + if driver_path: + options.binary_location = chrome_path + try: + self._driver = webdriver.Chrome(driver_path, options=options) + except: + print('无法启动,请检查chromedriver版本与Chrome是否匹配,并手动设置。') + exit(0) + else: + print('无法启动,请检查chromedriver版本与Chrome是否匹配,并手动设置。') + exit(0) else: print('无法启动,请检查chromedriver版本与Chrome是否匹配,并手动设置。') exit(0) @@ -145,6 +166,11 @@ class Drission(object): return self._driver + @property + def debugger_progress(self): + """调试浏览器进程""" + return self._debugger + @property def driver_options(self) -> dict: """返回driver配置信息""" diff --git a/DrissionPage/easy_set.py b/DrissionPage/easy_set.py index d3f0330..edc4417 100644 --- a/DrissionPage/easy_set.py +++ b/DrissionPage/easy_set.py @@ -242,13 +242,17 @@ def get_match_driver(ini_path: Union[str, None] = 'default', return driver_path -def _get_chrome_path(ini_path: str = None, show_msg: bool = True) -> Union[str, None]: +def _get_chrome_path(ini_path: str = None, + show_msg: bool = True, + from_ini: bool = True, + from_regedit: bool = True, + from_system_path: bool = True, ) -> Union[str, None]: """从ini文件或系统变量中获取chrome.exe的路径 \n :param ini_path: ini文件路径 :return: chrome.exe路径 """ # -----------从ini文件中获取-------------- - if ini_path: + if ini_path and from_ini: try: path = OptionsManager(ini_path).chrome_options['binary_location'] except KeyError: @@ -261,46 +265,48 @@ def _get_chrome_path(ini_path: str = None, show_msg: bool = True) -> Union[str, return str(path) # -----------从注册表中获取-------------- - import winreg - try: - key = winreg.OpenKey(winreg.HKEY_LOCAL_MACHINE, - r'SOFTWARE\Microsoft\Windows\CurrentVersion\App Paths\chrome.exe', - reserved=0, access=winreg.KEY_READ) - k = winreg.EnumValue(key, 0) - winreg.CloseKey(key) - - if show_msg: - print('注册表中', end='') - - return k[1] - - except FileNotFoundError: - pass - - # -----------从系统路径中获取-------------- - paths = popen('set path').read().lower() - r = RE_SEARCH(r'[^;]*chrome[^;]*', paths) - - if r: - path = Path(r.group(0)) if 'chrome.exe' in r.group(0) else Path(r.group(0)) / 'chrome.exe' - - if path.exists(): - if show_msg: - print('系统中', end='') - return str(path) - - paths = paths.split(';') - - for path in paths: - path = Path(path) / 'chrome.exe' - + if from_regedit: + import winreg try: + key = winreg.OpenKey(winreg.HKEY_LOCAL_MACHINE, + r'SOFTWARE\Microsoft\Windows\CurrentVersion\App Paths\chrome.exe', + reserved=0, access=winreg.KEY_READ) + k = winreg.EnumValue(key, 0) + winreg.CloseKey(key) + + if show_msg: + print('注册表中', end='') + + return k[1] + + except FileNotFoundError: + pass + + # -----------从系统变量中获取-------------- + if from_system_path: + paths = popen('set path').read().lower() + r = RE_SEARCH(r'[^;]*chrome[^;]*', paths) + + if r: + path = Path(r.group(0)) if 'chrome.exe' in r.group(0) else Path(r.group(0)) / 'chrome.exe' + if path.exists(): if show_msg: print('系统变量中', end='') return str(path) - except OSError: - pass + + paths = paths.split(';') + + for path in paths: + path = Path(path) / 'chrome.exe' + + try: + if path.exists(): + if show_msg: + print('系统变量中', end='') + return str(path) + except OSError: + pass def _get_chrome_version(path: str) -> Union[str, None]: From c9f73760ffa41ef938d386e26c6d04d963178c0a Mon Sep 17 00:00:00 2001 From: g1879 Date: Wed, 24 Mar 2021 08:47:50 +0800 Subject: [PATCH 73/94] =?UTF-8?q?=E5=AE=8C=E5=96=84=E5=88=87=E5=85=A5ifram?= =?UTF-8?q?e=E9=80=BB=E8=BE=91?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/driver_page.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DrissionPage/driver_page.py b/DrissionPage/driver_page.py index d5ae53f..430ca4e 100644 --- a/DrissionPage/driver_page.py +++ b/DrissionPage/driver_page.py @@ -450,7 +450,7 @@ class DriverPage(object): self.driver.switch_to.parent_frame() # 传入id或name - elif ':' not in loc_or_ele and '=' not in loc_or_ele: + elif ':' not in loc_or_ele and '=' not in loc_or_ele and not loc_or_ele.startswith(('#', '.')): self.driver.switch_to.frame(loc_or_ele) # 传入控制字符串 From cb5aa18a4cfe05dc6fd371f5040e2b4a40c068fa Mon Sep 17 00:00:00 2001 From: g1879 Date: Fri, 26 Mar 2021 23:17:14 +0800 Subject: [PATCH 74/94] =?UTF-8?q?DriverElement=E5=A2=9E=E5=8A=A0selected?= =?UTF-8?q?=5Foption=E5=B1=9E=E6=80=A7?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/driver_element.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/DrissionPage/driver_element.py b/DrissionPage/driver_element.py index 4edc21d..3c352fe 100644 --- a/DrissionPage/driver_element.py +++ b/DrissionPage/driver_element.py @@ -464,6 +464,17 @@ class DriverElement(DrissionElement): print(e) return False + @property + def selected_option(self): + """返回下拉列表中被选中的option元素 \n + :return: DriverElement对象或None + """ + if self.tag != 'select': + return None + else: + ele = self.run_script('return arguments[0].options[arguments[0].selectedIndex];') + return None if ele is None else DriverElement(ele, self.page) + def set_attr(self, attr: str, value: str) -> bool: """设置元素属性 \n :param attr: 属性名 From 9efe63269f98c7325993082cf8eaac2e74bbc808 Mon Sep 17 00:00:00 2001 From: g1879 Date: Sun, 28 Mar 2021 20:21:08 +0800 Subject: [PATCH 75/94] =?UTF-8?q?=E5=AE=8C=E5=96=84select=EF=BC=8C?= =?UTF-8?q?=E6=9C=AA=E5=AE=8C=E6=88=90?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/driver_element.py | 148 +++++++++++++++++++++++++++------ 1 file changed, 121 insertions(+), 27 deletions(-) diff --git a/DrissionPage/driver_element.py b/DrissionPage/driver_element.py index 3c352fe..29cd994 100644 --- a/DrissionPage/driver_element.py +++ b/DrissionPage/driver_element.py @@ -23,6 +23,7 @@ class DriverElement(DrissionElement): def __init__(self, ele: WebElement, page=None): super().__init__(ele, page) + self._select = None def __repr__(self): attrs = [f"{attr}='{self.attrs[attr]}'" for attr in self.attrs] @@ -55,7 +56,7 @@ class DriverElement(DrissionElement): @property def tag(self) -> str: """返回元素类型""" - return self._inner_ele.tag_name + return self._inner_ele.tag_name.lower() @property def attrs(self) -> dict: @@ -158,7 +159,19 @@ class DriverElement(DrissionElement): """返回当前元素的::after伪元素内容""" return self.get_style_property('content', 'after') + @property + def select(self): + """返回专门处理下拉列表的Select类,非下拉列表元素返回False""" + if self._select is None: + if self.tag != 'select': + self._select = False + else: + self._select = Select(self) + + return self._select + # -----------------共有函数------------------- + def texts(self, text_node_only: bool = False) -> list: """返回元素内所有直接子节点的文本,包括元素和文本节点 \n :param text_node_only: 是否只返回文本节点 @@ -449,32 +462,6 @@ class DriverElement(DrissionElement): return img_path - def select(self, text: str) -> bool: - """选择下拉列表中子元素 \n - :param text: 要选择的文本 - :return: 是否选择成功 - """ - from selenium.webdriver.support.select import Select - ele = Select(self.inner_ele) - - try: - ele.select_by_visible_text(text) - return True - except Exception as e: - print(e) - return False - - @property - def selected_option(self): - """返回下拉列表中被选中的option元素 \n - :return: DriverElement对象或None - """ - if self.tag != 'select': - return None - else: - ele = self.run_script('return arguments[0].options[arguments[0].selectedIndex];') - return None if ele is None else DriverElement(ele, self.page) - def set_attr(self, attr: str, value: str) -> bool: """设置元素属性 \n :param attr: 属性名 @@ -787,3 +774,110 @@ class ElementsByXpath(object): else format_html(x) for x in get_nodes(the_node, xpath_txt=self.xpath) if x != '\n']) + + +class Select(object): + def __init__(self, ele: DriverElement): + if ele.tag != 'select': + raise TypeError(f"Select only works on elements, not on {ele.tag}") - from selenium.webdriver.support.select import Select + from selenium.webdriver.support.select import Select as sl self.inner_ele = ele - self.select_ele = Select(ele.inner_ele) + self.select_ele = sl(ele.inner_ele) @property def is_multi(self) -> bool: @@ -817,7 +817,7 @@ class Select(object): def select(self, text_value_index: Union[str, int, list, tuple] = None, para_type: str = 'text') -> bool: - """选择下拉列表中子元素 \n + """选定下拉列表中子元素 \n :param text_value_index: 根据文本、值选或序号择选项,若允许多选,传入list或tuple可多选 :param para_type: 参数类型,可选'text'、'value'、'index' :return: 是否选择成功 @@ -849,7 +849,7 @@ class Select(object): def select_multi(self, text_value_index: Union[list, tuple] = None, para_type: str = 'text') -> Union[bool, list]: - """选择下拉列表中子元素 \n + """选定下拉列表中多个子元素 \n :param text_value_index: 根据文本、值选或序号择选项,若允许多选,传入list或tuple可多选 :param para_type: 参数类型,可选'text'、'value'、'index' :return: 是否选择成功 @@ -871,11 +871,64 @@ class Select(object): else: raise TypeError('只能传入list或tuple类型。') - def deselect(self): - """清除传入的选项""" - pass + def deselect(self, + text_value_index: Union[str, int, list, tuple] = None, + para_type: str = 'text') -> bool: + """取消选定下拉列表中子元素 \n + :param text_value_index: 根据文本、值选或序号择选项,若允许多选,传入list或tuple可多选 + :param para_type: 参数类型,可选'text'、'value'、'index' + :return: 是否选择成功 + """ + if para_type not in ('text', 'value', 'index'): + raise ValueError('para_type参数只能传入“text”、“value”或“index”') - def 反选(self): + if not self.is_multi and isinstance(text_value_index, (list, tuple)): + raise TypeError('单选下拉列表不能传入list和tuple') + + if isinstance(text_value_index, (str, int)): + try: + if para_type == 'text': + self.select_ele.deselect_by_visible_text(text_value_index) + elif para_type == 'value': + self.select_ele.deselect_by_value(text_value_index) + elif para_type == 'index': + self.select_ele.deselect_by_index(int(text_value_index)) + return True + except: + return False + + elif isinstance(text_value_index, (list, tuple)): + self.deselect_multi(text_value_index, para_type) + + else: + raise TypeError('只能传入str、int、list和tuple类型。') + + def deselect_multi(self, + text_value_index: Union[list, tuple] = None, + para_type: str = 'text') -> Union[bool, list]: + """取消选定下拉列表中多个子元素 \n + :param text_value_index: 根据文本、值选或序号择选项,若允许多选,传入list或tuple可多选 + :param para_type: 参数类型,可选'text'、'value'、'index' + :return: 是否选择成功 + """ + if para_type not in ('text', 'value', 'index'): + raise ValueError('para_type参数只能传入“text”、“value”或“index”') + + if isinstance(text_value_index, (list, tuple)): + fail_list = [] + for i in text_value_index: + if not isinstance(i, (int, str)): + raise TypeError('列表只能由str或int组成') + + if not self.deselect(i, para_type): + fail_list.append(i) + + return fail_list or True + + else: + raise TypeError('只能传入list或tuple类型。') + + def invert(self): if not self.is_multi: raise NotImplementedError("You may only deselect options of a multi-select") From ef41df46294aa1be2e114d4dfc1e276b76861f2e Mon Sep 17 00:00:00 2001 From: g1879 Date: Sat, 3 Apr 2021 22:42:28 +0800 Subject: [PATCH 77/94] =?UTF-8?q?=E4=BC=98=E5=8C=96Select=E7=B1=BB?= =?UTF-8?q?=E9=80=BB=E8=BE=91?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/driver_element.py | 101 +++++++++++++++------------------ 1 file changed, 45 insertions(+), 56 deletions(-) diff --git a/DrissionPage/driver_element.py b/DrissionPage/driver_element.py index ca8cf04..bd5b88d 100644 --- a/DrissionPage/driver_element.py +++ b/DrissionPage/driver_element.py @@ -785,6 +785,18 @@ class Select(object): self.inner_ele = ele self.select_ele = sl(ele.inner_ele) + def __call__(self, + text_value_index: Union[str, int, list, tuple] = None, + para_type: str = 'text', + deselect: bool = False) -> bool: + """选定或取消选定下拉列表中子元素 \n + :param text_value_index: 根据文本、值选或序号择选项,若允许多选,传入list或tuple可多选 + :param para_type: 参数类型,可选 'text'、'value'、'index' + :param deselect: 是否取消选择 + :return: 是否选择成功 + """ + return self.select(text_value_index, para_type, deselect) + @property def is_multi(self) -> bool: """返回是否多选表单""" @@ -816,42 +828,55 @@ class Select(object): def select(self, text_value_index: Union[str, int, list, tuple] = None, - para_type: str = 'text') -> bool: - """选定下拉列表中子元素 \n + para_type: str = 'text', + deselect: bool = False) -> bool: + """选定或取消选定下拉列表中子元素 \n :param text_value_index: 根据文本、值选或序号择选项,若允许多选,传入list或tuple可多选 - :param para_type: 参数类型,可选'text'、'value'、'index' + :param para_type: 参数类型,可选 'text'、'value'、'index' + :param deselect: 是否取消选择 :return: 是否选择成功 """ - if para_type not in ('text', 'value', 'index'): - raise ValueError('para_type参数只能传入“text”、“value”或“index”') - if not self.is_multi and isinstance(text_value_index, (list, tuple)): raise TypeError('单选下拉列表不能传入list和tuple') if isinstance(text_value_index, (str, int)): try: if para_type == 'text': - self.select_ele.select_by_visible_text(text_value_index) + if deselect: + self.select_ele.deselect_by_visible_text(text_value_index) + else: + self.select_ele.select_by_visible_text(text_value_index) elif para_type == 'value': - self.select_ele.select_by_value(text_value_index) + if deselect: + self.select_ele.deselect_by_value(text_value_index) + else: + self.select_ele.select_by_value(text_value_index) elif para_type == 'index': - self.select_ele.select_by_index(int(text_value_index)) + if deselect: + self.select_ele.deselect_by_index(int(text_value_index)) + else: + self.select_ele.select_by_index(int(text_value_index)) + else: + raise ValueError('para_type参数只能传入"text"、"value"或"index"。') return True + except: return False elif isinstance(text_value_index, (list, tuple)): - self.select_multi(text_value_index, para_type) + self.select_multi(text_value_index, para_type, deselect) else: raise TypeError('只能传入str、int、list和tuple类型。') def select_multi(self, text_value_index: Union[list, tuple] = None, - para_type: str = 'text') -> Union[bool, list]: - """选定下拉列表中多个子元素 \n + para_type: str = 'text', + deselect: bool = False) -> Union[bool, list]: + """选定或取消选定下拉列表中多个子元素 \n :param text_value_index: 根据文本、值选或序号择选项,若允许多选,传入list或tuple可多选 - :param para_type: 参数类型,可选'text'、'value'、'index' + :param para_type: 参数类型,可选 'text'、'value'、'index' + :param deselect: 是否取消选择 :return: 是否选择成功 """ if para_type not in ('text', 'value', 'index'): @@ -863,7 +888,7 @@ class Select(object): if not isinstance(i, (int, str)): raise TypeError('列表只能由str或int组成') - if not self.select(i, para_type): + if not self.select(i, para_type, deselect): fail_list.append(i) return fail_list or True @@ -876,59 +901,23 @@ class Select(object): para_type: str = 'text') -> bool: """取消选定下拉列表中子元素 \n :param text_value_index: 根据文本、值选或序号择选项,若允许多选,传入list或tuple可多选 - :param para_type: 参数类型,可选'text'、'value'、'index' + :param para_type: 参数类型,可选 'text'、'value'、'index' :return: 是否选择成功 """ - if para_type not in ('text', 'value', 'index'): - raise ValueError('para_type参数只能传入“text”、“value”或“index”') - - if not self.is_multi and isinstance(text_value_index, (list, tuple)): - raise TypeError('单选下拉列表不能传入list和tuple') - - if isinstance(text_value_index, (str, int)): - try: - if para_type == 'text': - self.select_ele.deselect_by_visible_text(text_value_index) - elif para_type == 'value': - self.select_ele.deselect_by_value(text_value_index) - elif para_type == 'index': - self.select_ele.deselect_by_index(int(text_value_index)) - return True - except: - return False - - elif isinstance(text_value_index, (list, tuple)): - self.deselect_multi(text_value_index, para_type) - - else: - raise TypeError('只能传入str、int、list和tuple类型。') + return self.select(text_value_index, para_type, True) def deselect_multi(self, text_value_index: Union[list, tuple] = None, para_type: str = 'text') -> Union[bool, list]: """取消选定下拉列表中多个子元素 \n :param text_value_index: 根据文本、值选或序号择选项,若允许多选,传入list或tuple可多选 - :param para_type: 参数类型,可选'text'、'value'、'index' + :param para_type: 参数类型,可选 'text'、'value'、'index' :return: 是否选择成功 """ - if para_type not in ('text', 'value', 'index'): - raise ValueError('para_type参数只能传入“text”、“value”或“index”') + return self.select_multi(text_value_index, para_type, True) - if isinstance(text_value_index, (list, tuple)): - fail_list = [] - for i in text_value_index: - if not isinstance(i, (int, str)): - raise TypeError('列表只能由str或int组成') - - if not self.deselect(i, para_type): - fail_list.append(i) - - return fail_list or True - - else: - raise TypeError('只能传入list或tuple类型。') - - def invert(self): + def invert(self) -> None: + """反选""" if not self.is_multi: raise NotImplementedError("You may only deselect options of a multi-select") From 9ef22857123f9d30e37639fd93355f20665ae529 Mon Sep 17 00:00:00 2001 From: g1879 Date: Sun, 4 Apr 2021 16:36:47 +0800 Subject: [PATCH 78/94] =?UTF-8?q?scroll=5Fto()=E5=A2=9E=E5=8A=A0'half'?= =?UTF-8?q?=E6=96=B9=E5=BC=8F=EF=BC=8C=E6=BB=9A=E5=8A=A8=E5=8D=8A=E9=A1=B5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/driver_page.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/DrissionPage/driver_page.py b/DrissionPage/driver_page.py index 430ca4e..8be327b 100644 --- a/DrissionPage/driver_page.py +++ b/DrissionPage/driver_page.py @@ -491,8 +491,8 @@ class DriverPage(object): ele.run_script("arguments[0].scrollIntoView();") def scroll_to(self, mode: str = 'bottom', pixel: int = 300) -> None: - """按参数指示方式滚动页面 \n - :param mode: 可选滚动方向:'top', 'bottom', 'rightmost', 'leftmost', 'up', 'down', 'left', 'right' + """按参数指示方式滚动页面 \n + :param mode: 可选滚动方向:'top', 'bottom', 'half', 'rightmost', 'leftmost', 'up', 'down', 'left', 'right' :param pixel: 滚动的像素 :return: None """ @@ -503,6 +503,10 @@ class DriverPage(object): self.driver.execute_script( "window.scrollTo(document.documentElement.scrollLeft,document.body.scrollHeight);") + elif mode == 'half': + self.driver.execute_script( + "window.scrollTo(document.documentElement.scrollLeft,document.body.scrollHeight/2);") + elif mode == 'rightmost': self.driver.execute_script("window.scrollTo(document.body.scrollWidth,document.documentElement.scrollTop);") @@ -523,7 +527,7 @@ class DriverPage(object): else: raise ValueError( - "Argument mode can only be 'top', 'bottom', 'rightmost', 'leftmost', 'up', 'down', 'left', 'right'.") + "Argument mode can only be 'top', 'bottom', 'half', 'rightmost', 'leftmost', 'up', 'down', 'left', 'right'.") def refresh(self) -> None: """刷新当前页面""" From 2882f177451fc84693fecacd959f2cd5d7d1155f Mon Sep 17 00:00:00 2001 From: g1879 Date: Mon, 12 Apr 2021 10:35:39 +0800 Subject: [PATCH 79/94] =?UTF-8?q?to=5Fiframe()=E6=94=B9=E5=90=8D=E4=B8=BAt?= =?UTF-8?q?o=5Fframe()?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/driver_page.py | 30 +++++++++++++++--------------- README.en.md | 32 +++++++++++++++++--------------- README.zh-cn.md | 5 +++-- 3 files changed, 35 insertions(+), 32 deletions(-) diff --git a/DrissionPage/driver_page.py b/DrissionPage/driver_page.py index 8be327b..e8dee6f 100644 --- a/DrissionPage/driver_page.py +++ b/DrissionPage/driver_page.py @@ -5,15 +5,15 @@ @File : driver_page.py """ from glob import glob -from pathlib import Path -from typing import Union, List, Any, Tuple -from urllib.parse import quote +from pathlib import Path from selenium.common.exceptions import NoAlertPresentException from selenium.webdriver.chrome.webdriver import WebDriver from selenium.webdriver.remote.webelement import WebElement from selenium.webdriver.support.wait import WebDriverWait from time import time, sleep +from typing import Union, List, Any, Tuple +from urllib.parse import quote from .common import str_to_loc, get_available_file_name, translate_loc, format_html from .driver_element import DriverElement, execute_driver_find @@ -421,18 +421,18 @@ class DriverPage(object): tab = self.driver.window_handles[tab] if isinstance(tab, int) else tab self.driver.switch_to.window(tab) - def to_iframe(self, loc_or_ele: Union[int, str, tuple, WebElement, DriverElement] = 'main') -> None: - """跳转到iframe \n - 可接收iframe序号(0开始)、id或name、查询字符串、loc元组、WebElement对象、DriverElement对象, \n - 传入'main'跳到最高层,传入'parent'跳到上一层 \n - 示例: \n - to_iframe('tag:iframe') - 通过传入iframe的查询字符串定位 \n - to_iframe('iframe_id') - 通过iframe的id属性定位 \n - to_iframe('iframe_name') - 通过iframe的name属性定位 \n - to_iframe(iframe_element) - 通过传入元素对象定位 \n - to_iframe(0) - 通过iframe的序号定位 \n - to_iframe('main') - 跳到最高层 \n - to_iframe('parent') - 跳到上一层 \n + def to_frame(self, loc_or_ele: Union[int, str, tuple, WebElement, DriverElement] = 'main') -> None: + """跳转到frame \n + 可接收frame序号(0开始)、id或name、查询字符串、loc元组、WebElement对象、DriverElement对象, \n + 传入'main'跳到最高层,传入'parent'跳到上一层 \n + 示例: \n + to_frame('tag:iframe') - 通过传入frame的查询字符串定位 \n + to_frame('iframe_id') - 通过frame的id属性定位 \n + to_frame('iframe_name') - 通过frame的name属性定位 \n + to_frame(iframe_element) - 通过传入元素对象定位 \n + to_frame(0) - 通过frame的序号定位 \n + to_frame('main') - 跳到最高层 \n + to_frame('parent') - 跳到上一层 \n :param loc_or_ele: iframe的定位信息 :return: None """ diff --git a/README.en.md b/README.en.md index 370ac63..a0a1e80 100644 --- a/README.en.md +++ b/README.en.md @@ -564,11 +564,12 @@ When calling a method that only belongs to d mode, it will automatically switch ```python page.set_cookies() # set cookies -page.get_cookies() # Get cookies, which can be returned by list or dict +page.get_cookies() # Get cookies, which can be returned by list or dict page.change_mode() # Switch mode, it will automatically copy cookies page.cookies_to_session() # Copy cookies from WebDriver object to Session object page.cookies_to_driver() # Copy cookies from Session object to WebDriver object -page.get(url, retry, interval, **kwargs) # Use get to access the web page, you can specify the number of retries and the interval +page.get(url, retry, interval, + **kwargs) # Use get to access the web page, you can specify the number of retries and the interval page.ele(loc_or_ele, timeout) # Get the first element, node or attribute that meets the conditions page.eles(loc_or_ele, timeout) # Get all eligible elements, nodes or attributes page.download(url, save_path, rename, file_exists, **kwargs) # download file @@ -576,7 +577,8 @@ page.close_driver() # Close the WebDriver object page.close_session() # Close the Session object # s mode unique: -page.post(url, data, retry, interval, **kwargs) # To access the webpage in post mode, you can specify the number of retries and the interval +page.post(url, data, retry, interval, + **kwargs) # To access the webpage in post mode, you can specify the number of retries and the interval # d mode unique: page.wait_ele(loc_or_ele, mode, timeout) # Wait for the element to be deleted, displayed, and hidden from the dom @@ -585,10 +587,11 @@ page.create_tab(url) # Create and locate a tab page, which is at the end page.to_tab(num_or_handle) # Jump to tab page page.close_current_tab() # Close the current tab page page.close_other_tabs(num_or_handles) # Close other tabs -page.to_iframe(iframe) # cut into iframe +page.to_frame(iframe) # cut into iframe page.screenshot(path) # Page screenshot page.scrool_to_see(element) # Scroll until an element is visible -page.scroll_to(mode, pixel) # Scroll the page as indicated by the parameter, and the scroll direction is optional:'top','bottom','rightmost','leftmost','up','down','left', ' right' +page.scroll_to(mode, + pixel) # Scroll the page as indicated by the parameter, and the scroll direction is optional:'top','bottom','rightmost','leftmost','up','down','left', ' right' page.refresh() # refresh the current page page.back() # Browser back page.et_window_size(x, y) # Set the browser window size, maximize by default @@ -1725,9 +1728,7 @@ Parameter Description: Returns: None - - -### to_iframe() +### to_frame() Jump to iframe, jump to the highest level by default, compatible with selenium native parameters. @@ -1736,13 +1737,14 @@ Parameter Description: - loc_or_ele:[int, str, tuple, WebElement, DriverElement] - Find the condition of iframe element, can receive iframe serial number (starting at 0), id or name, query string, loc parameter, WebElement object, DriverElement object, and pass in ' main' jump to the highest level, and pass in'parent' to jump to the upper level Example: -- to_iframe('tag:iframe')- locate by the query string passed in iframe -- to_iframe('iframe_id')- Positioning by the id attribute of the iframe -- to_iframe('iframe_name')- locate by the name attribute of iframe -- to_iframe(iframe_element)- locate by passing in the element object -- to_iframe(0)- locate by the serial number of the iframe -- to_iframe('main')- jump to the top level -- to_iframe('parent')- jump to the previous level + +- to_frame('tag:iframe')- locate by the query string passed in iframe +- to_frame('iframe_id')- Positioning by the id attribute of the iframe +- to_frame('iframe_name')- locate by the name attribute of iframe +- to_frame(iframe_element)- locate by passing in the element object +- to_frame(0)- locate by the serial number of the iframe +- to_frame('main')- jump to the top level +- to_frame('parent')- jump to the previous level Returns: None diff --git a/README.zh-cn.md b/README.zh-cn.md index 06f9fe1..8ed6283 100644 --- a/README.zh-cn.md +++ b/README.zh-cn.md @@ -551,10 +551,11 @@ page.create_tab(url) # 新建并定位到一个标签页,该标签页在最后 page.to_tab(num_or_handle) # 跳转到标签页 page.close_current_tab() # 关闭当前标签页 page.close_other_tabs(num_or_handles) # 关闭其它标签页 -page.to_iframe(iframe) # 切入 iframe +page.to_frame(iframe) # 切入 iframe page.screenshot(path) # 页面截图 page.scroll_to_see(element) # 滚动直到某元素可见 -page.scroll_to(mode, pixel) # 按参数指示方式滚动页面,可选滚动方向:'top', 'bottom', 'rightmost', 'leftmost', 'up', 'down', 'left', 'right' +page.scroll_to(mode, + pixel) # 按参数指示方式滚动页面,可选滚动方向:'top', 'bottom', 'rightmost', 'leftmost', 'up', 'down', 'left', 'right' page.refresh() # 刷新当前页面 page.back() # 浏览器后退 page.et_window_size(x, y) # 设置浏览器窗口大小,默认最大化 From 36e4362d710784c57c6a8397ca4d6e86db95b9c8 Mon Sep 17 00:00:00 2001 From: g1879 Date: Mon, 12 Apr 2021 10:35:58 +0800 Subject: [PATCH 80/94] =?UTF-8?q?=E8=B0=83=E6=95=B4=E6=B3=A8=E9=87=8A?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/drission.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/DrissionPage/drission.py b/DrissionPage/drission.py index 6a51c2f..214837f 100644 --- a/DrissionPage/drission.py +++ b/DrissionPage/drission.py @@ -4,8 +4,6 @@ @Contact : g1879@qq.com @File : drission.py """ -from typing import Union - from requests import Session from requests.cookies import RequestsCookieJar from selenium import webdriver @@ -14,6 +12,7 @@ from selenium.webdriver.chrome.options import Options from selenium.webdriver.chrome.webdriver import WebDriver from sys import exit from tldextract import extract +from typing import Union from .config import (_dict_to_chrome_options, _session_options_to_dict, SessionOptions, DriverOptions, _chrome_options_to_dict, OptionsManager, _cookies_to_tuple) @@ -278,6 +277,10 @@ class Drission(object): self.driver.add_cookie(cookie) def _set_session(self, data: dict) -> None: + """根据传入字典对session进行设置 \n + :param data: session配置字典 + :return: None + """ if self._session is None: self._session = Session() From d1aa92ee1590c2be8a5c480d46295c928d0b053c Mon Sep 17 00:00:00 2001 From: g1879 Date: Mon, 12 Apr 2021 15:48:14 +0800 Subject: [PATCH 81/94] =?UTF-8?q?=E4=BF=AE=E6=AD=A3=E5=8F=98=E9=87=8F?= =?UTF-8?q?=E7=B1=BB=E5=9E=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/config.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/DrissionPage/config.py b/DrissionPage/config.py index d158d45..e04c057 100644 --- a/DrissionPage/config.py +++ b/DrissionPage/config.py @@ -8,11 +8,10 @@ from configparser import RawConfigParser, NoSectionError, NoOptionError from http.cookiejar import Cookie from pathlib import Path -from typing import Any, Union - from requests.cookies import RequestsCookieJar from selenium import webdriver from selenium.webdriver.chrome.options import Options +from typing import Any, Union class OptionsManager(object): @@ -399,7 +398,7 @@ class SessionOptions(object): path = path / 'config.ini' if path.is_dir() else path if path.exists(): - om = OptionsManager(path) + om = OptionsManager(str(path)) else: om = OptionsManager(self.ini_path or str(Path(__file__).parent / 'configs.ini')) @@ -466,7 +465,7 @@ class DriverOptions(Options): path = path / 'config.ini' if path.is_dir() else path if path.exists(): - om = OptionsManager(path) + om = OptionsManager(str(path)) else: om = OptionsManager(self.ini_path or str(Path(__file__).parent / 'configs.ini')) From 4a5c977a0717968d319369055dc8408eab1dc519 Mon Sep 17 00:00:00 2001 From: g1879 Date: Mon, 12 Apr 2021 15:49:24 +0800 Subject: [PATCH 82/94] =?UTF-8?q?=E5=BB=BA=E7=AB=8B=E8=B0=83=E8=AF=95?= =?UTF-8?q?=E6=B5=8F=E8=A7=88=E5=99=A8=E6=97=B6=E4=BC=9A=E5=8A=A0=E5=85=A5?= =?UTF-8?q?=E8=AE=BE=E7=BD=AE=E9=A1=B9=EF=BC=9B=E5=A2=9E=E5=8A=A0kill=5Fbr?= =?UTF-8?q?owser()=E6=96=B9=E6=B3=95=E3=80=82=E5=BE=85=E6=B5=8B=E8=AF=95?= =?UTF-8?q?=E3=80=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/drission.py | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/DrissionPage/drission.py b/DrissionPage/drission.py index 214837f..217fb0a 100644 --- a/DrissionPage/drission.py +++ b/DrissionPage/drission.py @@ -98,9 +98,12 @@ class Drission(object): if options.debugger_address and _check_port(options.debugger_address) is False: from subprocess import Popen port = options.debugger_address[options.debugger_address.rfind(':') + 1:] + args = ' '.join(self._driver_options['arguments']) + if self._proxy: + args = f'{args} --proxy-server={self._proxy["http"]}' try: - self._debugger = Popen(f'{chrome_path} --remote-debugging-port={port}', shell=False) + self._debugger = Popen(f'{chrome_path} --remote-debugging-port={port} {args}', shell=False) if chrome_path == 'chrome.exe': from common import get_exe_path_from_port @@ -114,7 +117,7 @@ class Drission(object): if not chrome_path: raise FileNotFoundError('无法找到chrome.exe路径,请手动配置。') - self._debugger = Popen(f'"{chrome_path}" --remote-debugging-port={port}', shell=False) + self._debugger = Popen(f'"{chrome_path}" --remote-debugging-port={port} {args}', shell=False) # -----------创建WebDriver对象----------- try: @@ -165,11 +168,6 @@ class Drission(object): return self._driver - @property - def debugger_progress(self): - """调试浏览器进程""" - return self._debugger - @property def driver_options(self) -> dict: """返回driver配置信息""" @@ -216,6 +214,16 @@ class Drission(object): for cookie in cookies: self.set_cookies(cookie, set_driver=True) + @property + def debugger_progress(self): + """调试浏览器进程""" + return self._debugger + + def kill_browser(self): + """关闭浏览器进程(如果可以)""" + if self.debugger_progress: + self.debugger_progress.kill() + def set_cookies(self, cookies: Union[RequestsCookieJar, list, tuple, str, dict], set_session: bool = False, From 5a349b24e9e0c17c1e9b76f6ebd9b01224f9483a Mon Sep 17 00:00:00 2001 From: g1879 Date: Mon, 12 Apr 2021 17:50:11 +0800 Subject: [PATCH 83/94] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E5=8F=98=E9=87=8F?= =?UTF-8?q?=E7=B1=BB=E5=9E=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/easy_set.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/DrissionPage/easy_set.py b/DrissionPage/easy_set.py index edc4417..999cebc 100644 --- a/DrissionPage/easy_set.py +++ b/DrissionPage/easy_set.py @@ -5,12 +5,12 @@ @File : driver_page.py """ from os import popen + from pathlib import Path from pprint import pprint from re import search as RE_SEARCH -from typing import Union - from selenium import webdriver +from typing import Union from DrissionPage.config import OptionsManager, DriverOptions from DrissionPage.drission import Drission @@ -210,7 +210,7 @@ def get_match_driver(ini_path: Union[str, None] = 'default', if show_msg: print('chrome.exe路径', chrome_path) - ver = _get_chrome_version(chrome_path) + ver = _get_chrome_version(str(chrome_path)) if show_msg: print('version', ver) From 4f32553a3cf9a4c1ba2ca749bf462ba2c72153b6 Mon Sep 17 00:00:00 2001 From: g1879 Date: Mon, 12 Apr 2021 17:50:53 +0800 Subject: [PATCH 84/94] =?UTF-8?q?=E5=AE=8C=E5=96=84kill=5Fbrowser()?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/drission.py | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/DrissionPage/drission.py b/DrissionPage/drission.py index 217fb0a..e15f87b 100644 --- a/DrissionPage/drission.py +++ b/DrissionPage/drission.py @@ -219,10 +219,35 @@ class Drission(object): """调试浏览器进程""" return self._debugger - def kill_browser(self): + def kill_browser(self) -> None: """关闭浏览器进程(如果可以)""" if self.debugger_progress: self.debugger_progress.kill() + return + + address = self.driver_options.get('debugger_address', '').split(':') + if len(address) == 1: + self.close_driver() + + elif len(address) == 2: + ip, port = address + if ip not in ('127.0.0.1', 'localhost') or not port.isdigit(): + return + + from os import popen + progresses = popen(f'netstat -nao | findstr :{port}').read().split('\n') + txt = '' + for progress in progresses: + if 'LISTENING' in progress: + txt = progress + break + + if not txt: + return + + pid = txt[txt.rfind(' ') + 1:] + if popen(f'tasklist | findstr {pid}').read().lower().startswith('chrome.exe'): + popen(f'taskkill /pid {pid} /F') def set_cookies(self, cookies: Union[RequestsCookieJar, list, tuple, str, dict], From cc5ad77629db29aed472423545dbdcf3dbd0feef Mon Sep 17 00:00:00 2001 From: g1879 Date: Tue, 13 Apr 2021 15:51:38 +0800 Subject: [PATCH 85/94] =?UTF-8?q?=E4=BC=98=E5=8C=96=E5=88=9B=E5=BB=BAdrive?= =?UTF-8?q?r=E9=80=BB=E8=BE=91?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/drission.py | 141 +++++++++++++++++++++++---------------- 1 file changed, 83 insertions(+), 58 deletions(-) diff --git a/DrissionPage/drission.py b/DrissionPage/drission.py index e15f87b..524cf25 100644 --- a/DrissionPage/drission.py +++ b/DrissionPage/drission.py @@ -83,12 +83,12 @@ class Drission(object): 如设置了本地调试浏览器,可自动接入或打开浏览器进程。 """ if self._driver is None: - if isinstance(self._driver_options, dict): - options = _dict_to_chrome_options(self._driver_options) - else: + if not isinstance(self._driver_options, dict): raise TypeError('Driver options invalid') - if self._proxy: + options = _dict_to_chrome_options(self._driver_options) + + if not self._driver_options.get('debugger_address', None) and self._proxy: options.add_argument(f'--proxy-server={self._proxy["http"]}') driver_path = self._driver_options.get('driver_path', None) or 'chromedriver' @@ -98,63 +98,13 @@ class Drission(object): if options.debugger_address and _check_port(options.debugger_address) is False: from subprocess import Popen port = options.debugger_address[options.debugger_address.rfind(':') + 1:] - args = ' '.join(self._driver_options['arguments']) - if self._proxy: - args = f'{args} --proxy-server={self._proxy["http"]}' - try: - self._debugger = Popen(f'{chrome_path} --remote-debugging-port={port} {args}', shell=False) - - if chrome_path == 'chrome.exe': - from common import get_exe_path_from_port - chrome_path = get_exe_path_from_port(port) - - # 启动不了进程,主动找浏览器执行文件启动 - except FileNotFoundError: - from DrissionPage.easy_set import _get_chrome_path - chrome_path = _get_chrome_path(show_msg=False) - - if not chrome_path: - raise FileNotFoundError('无法找到chrome.exe路径,请手动配置。') - - self._debugger = Popen(f'"{chrome_path}" --remote-debugging-port={port} {args}', shell=False) + # 启动浏览器进程,同时返回该进程使用的 chrome.exe 路径 + chrome_path, self._debugger = _create_chrome(chrome_path, port, + self._driver_options['arguments'], self._proxy) # -----------创建WebDriver对象----------- - try: - self._driver = webdriver.Chrome(driver_path, options=options) - - # 若版本不对,获取对应chromedriver再试 - except (WebDriverException, SessionNotCreatedException): - from .easy_set import get_match_driver - chrome_path = None if chrome_path == 'chrome.exe' else chrome_path - driver_path = get_match_driver(chrome_path=chrome_path, check_version=False, show_msg=False) - - if driver_path: - try: - self._driver = webdriver.Chrome(driver_path, options=options) - except: - print('无法启动,请检查chromedriver版本与Chrome是否匹配,并手动设置。') - exit(0) - - # 当找不到driver且chrome_path为None时,说明安装的版本过高,改在系统路径中查找 - elif chrome_path is None and driver_path is None: - from DrissionPage.easy_set import _get_chrome_path - chrome_path = _get_chrome_path(show_msg=False, from_ini=False, from_regedit=False) - driver_path = get_match_driver(chrome_path=chrome_path, check_version=False, show_msg=False) - - if driver_path: - options.binary_location = chrome_path - try: - self._driver = webdriver.Chrome(driver_path, options=options) - except: - print('无法启动,请检查chromedriver版本与Chrome是否匹配,并手动设置。') - exit(0) - else: - print('无法启动,请检查chromedriver版本与Chrome是否匹配,并手动设置。') - exit(0) - else: - print('无法启动,请检查chromedriver版本与Chrome是否匹配,并手动设置。') - exit(0) + self._driver = _create_driver(chrome_path, driver_path, options) # 反反爬设置 try: @@ -422,3 +372,78 @@ def _check_port(debugger_address: str) -> Union[bool, None]: finally: if s: s.close() + + +def _create_chrome(chrome_path: str, port: str, args: list, proxy: dict) -> tuple: + """创建 chrome 进程 \n + :param chrome_path: chrome.exe 路径 + :param port: 进程运行的端口号 + :param args: chrome 配置参数 + :return: chrome.exe 路径和进程对象组成的元组 + """ + from subprocess import Popen + args = ' '.join(args) + if proxy: + args = f'{args} --proxy-server={proxy["http"]}' + + try: + debugger = Popen(f'{chrome_path} --remote-debugging-port={port} {args}', shell=False) + + if chrome_path == 'chrome.exe': + from common import get_exe_path_from_port + chrome_path = get_exe_path_from_port(port) + + # 传入的路径找不到,主动在ini文件、注册表、系统变量中找 + except FileNotFoundError: + from DrissionPage.easy_set import _get_chrome_path + chrome_path = _get_chrome_path(show_msg=False) + + if not chrome_path: + raise FileNotFoundError('无法找到chrome.exe路径,请手动配置。') + + debugger = Popen(f'"{chrome_path}" --remote-debugging-port={port} {args}', shell=False) + + return chrome_path, debugger + + +def _create_driver(chrome_path: str, driver_path: str, options: Options) -> WebDriver: + """创建 WebDriver 对象 \n + :param chrome_path: chrome.exe 路径 + :param driver_path: chromedriver.exe 路径 + :param options: Options 对象 + :return: WebDriver 对象 + """ + + def show_err_and_exit(): + print('无法启动,请检查chromedriver版本与Chrome是否匹配,并手动设置。') + exit(0) + + try: + return webdriver.Chrome(driver_path, options=options) + + # 若版本不对,获取对应 chromedriver 再试 + except (WebDriverException, SessionNotCreatedException): + from .easy_set import get_match_driver + chrome_path = None if chrome_path == 'chrome.exe' else chrome_path + driver_path = get_match_driver(chrome_path=chrome_path, check_version=False, show_msg=False) + + if driver_path: + try: + return webdriver.Chrome(driver_path, options=options) + except: + pass + + # 当找不到 driver 且 chrome_path 为 None 时,说明安装的版本过高,改在系统路径中查找 + elif chrome_path is None and driver_path is None: + from DrissionPage.easy_set import _get_chrome_path + chrome_path = _get_chrome_path(show_msg=False, from_ini=False, from_regedit=False) + driver_path = get_match_driver(chrome_path=chrome_path, check_version=False, show_msg=False) + + if driver_path: + options.binary_location = chrome_path + try: + return webdriver.Chrome(driver_path, options=options) + except: + pass + + show_err_and_exit() From c8499eb9fabd1d0a503f77e9aecb5be2aba7014a Mon Sep 17 00:00:00 2001 From: g1879 Date: Tue, 13 Apr 2021 15:56:55 +0800 Subject: [PATCH 86/94] =?UTF-8?q?=E5=BE=AE=E8=B0=83?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/drission.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/DrissionPage/drission.py b/DrissionPage/drission.py index 524cf25..653b989 100644 --- a/DrissionPage/drission.py +++ b/DrissionPage/drission.py @@ -413,11 +413,6 @@ def _create_driver(chrome_path: str, driver_path: str, options: Options) -> WebD :param options: Options 对象 :return: WebDriver 对象 """ - - def show_err_and_exit(): - print('无法启动,请检查chromedriver版本与Chrome是否匹配,并手动设置。') - exit(0) - try: return webdriver.Chrome(driver_path, options=options) @@ -446,4 +441,5 @@ def _create_driver(chrome_path: str, driver_path: str, options: Options) -> WebD except: pass - show_err_and_exit() + print('无法启动,请检查chromedriver版本与Chrome是否匹配,并手动设置。') + exit(0) From 612b9879a00b03dbf8d02f3bef4d018ab61b4112 Mon Sep 17 00:00:00 2001 From: g1879 Date: Tue, 13 Apr 2021 16:58:44 +0800 Subject: [PATCH 87/94] =?UTF-8?q?=E4=BC=98=E5=8C=96=E5=88=9B=E5=BB=BA?= =?UTF-8?q?=E6=B5=8F=E8=A7=88=E5=99=A8=E8=BF=9B=E7=A8=8B=E7=9A=84=E9=80=BB?= =?UTF-8?q?=E8=BE=91?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/drission.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/DrissionPage/drission.py b/DrissionPage/drission.py index 653b989..2a1862c 100644 --- a/DrissionPage/drission.py +++ b/DrissionPage/drission.py @@ -382,10 +382,22 @@ def _create_chrome(chrome_path: str, port: str, args: list, proxy: dict) -> tupl :return: chrome.exe 路径和进程对象组成的元组 """ from subprocess import Popen - args = ' '.join(args) + + # ----------为路径加上双引号,避免路径中的空格产生异常---------- + args1 = [] + for arg in args: + if arg.startswith(('--user-data-dir', '--disk-cache-dir')): + index = arg.find('=') + 1 + args1.append(f'{arg[:index]}"{arg[index:].strip()}"') + else: + args1.append(arg) + + args = ' '.join(args1) + if proxy: args = f'{args} --proxy-server={proxy["http"]}' + # ----------创建浏览器进程---------- try: debugger = Popen(f'{chrome_path} --remote-debugging-port={port} {args}', shell=False) From 88506ac7fcbc1434026e49bde4c2379d8eeea687 Mon Sep 17 00:00:00 2001 From: g1879 Date: Tue, 13 Apr 2021 17:49:14 +0800 Subject: [PATCH 88/94] =?UTF-8?q?=E8=B0=83=E6=95=B4=E6=B3=A8=E9=87=8A?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/driver_element.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/DrissionPage/driver_element.py b/DrissionPage/driver_element.py index bd5b88d..2f92282 100644 --- a/DrissionPage/driver_element.py +++ b/DrissionPage/driver_element.py @@ -6,14 +6,13 @@ """ import re from pathlib import Path -from typing import Union, List, Any, Tuple - from selenium.common.exceptions import TimeoutException, JavascriptException, InvalidElementStateException from selenium.webdriver.chrome.webdriver import WebDriver from selenium.webdriver.remote.webelement import WebElement from selenium.webdriver.support import expected_conditions as ec from selenium.webdriver.support.wait import WebDriverWait from time import sleep +from typing import Union, List, Any, Tuple from .common import DrissionElement, str_to_loc, get_available_file_name, translate_loc, format_html @@ -122,7 +121,8 @@ class DriverElement(DrissionElement): return self._get_brother(1, 'ele', 'prev') @property - def comments(self): + def comments(self) -> list: + """返回元素注释文本组成的列表""" return self.eles('xpath:.//comment()') # -----------------driver独占属性------------------- From 46afbc767f9241098564ba91c9bc0005d40975ae Mon Sep 17 00:00:00 2001 From: g1879 Date: Tue, 13 Apr 2021 17:51:23 +0800 Subject: [PATCH 89/94] =?UTF-8?q?1.10.0=20=E6=9C=AA=E5=AE=8C=E6=88=90?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.en.md | 41 ++++++++++++++++++++++++----------------- README.zh-cn.md | 25 +++++++++++++++++++++++-- 2 files changed, 47 insertions(+), 19 deletions(-) diff --git a/README.en.md b/README.en.md index a0a1e80..a8f63fa 100644 --- a/README.en.md +++ b/README.en.md @@ -591,7 +591,7 @@ page.to_frame(iframe) # cut into iframe page.screenshot(path) # Page screenshot page.scrool_to_see(element) # Scroll until an element is visible page.scroll_to(mode, - pixel) # Scroll the page as indicated by the parameter, and the scroll direction is optional:'top','bottom','rightmost','leftmost','up','down','left', ' right' + pixel) # Scroll the page as indicated by the parameter, and the scroll direction is optional:'top', 'bottom', 'rightmost', 'leftmost', 'up', 'down', 'left', ' right', 'half' page.refresh() # refresh the current page page.back() # Browser back page.et_window_size(x, y) # Set the browser window size, maximize by default @@ -743,12 +743,29 @@ element.run_script(js, *args) # Run JavaScript script on the element element.submit() # Submit element.clear() # Clear the element element.screenshot(path, filename) # Take a screenshot of the element -element.select(text) # Select the drop- down list based on the text element.set_attr(attr, value) # Set element attribute value element.remove_attr(attr) # remove a element attribute element.drag(x, y, speed, shake) # Drag the relative distance of the element, you can set the speed and whether to shake randomly element.drag_to(ele_or_loc, speed, shake) # Drag the element to another element or a certain coordinate, you can set the speed and whether to shake randomly element.hover() # Hover the mouse over the element + +# select function: +element.select.is_multi # Whether to select multiple lists +element.select.options # Return all list item objects +element.select.selected_option # Return the first selected option element +element.select.selected_options # Return a list of all selected option elements + +element.select(text) # Select the drop-down list item based on the text +element.select(value,'value') # Select the drop-down list item according to the value +element.select(index,'index') # Select the drop-down list item according to the serial number + +element.select.deselect(text) # Deselect drop-down list items based on the text (valid for multiple selection lists) +element.select.deselect(value,'value') # Deselect drop-down list items according to value (valid for multiple selection lists) +element.select.deselect(index,'index') # Deselect drop-down list items according to the serial number (valid for multiple selection lists) +# Note: When the list is a multi-selection list, the first parameter above can receive list or tuple, and select or deselect multiple items at the same time + +element.select.clear() # Clear multiple selection list options +element.select.invert() # Invert multiple selection list options ``` @@ -1997,15 +2014,17 @@ Returns the content of the ::before pseudo- element of the current element Returns: str - - ### after Returns the content of the ::after pseudo element of the current element Returns: str +#### select +If it is a select element, it returns the Select object, otherwise it returns None. + +Returns: Union[Select, None] ### texts() @@ -2013,7 +2032,7 @@ Returns the text of all direct child nodes within the element, including element Parameter Description: -- text_node_only: bool - whether to return only text nodes +- text_node_only: bool - whether to return only text nodes Returns: List[str] @@ -2267,18 +2286,6 @@ Returns: str -### select() - -Select from the drop- down list. - -Parameter Description: - -- text: str - option text - -Returns: bool - success - - - ### set_attr() Set element attributes. diff --git a/README.zh-cn.md b/README.zh-cn.md index 8ed6283..0208297 100644 --- a/README.zh-cn.md +++ b/README.zh-cn.md @@ -454,6 +454,9 @@ proxy = {'http': '127.0.0.1:1080', 'https': '127.0.0.1:1080'} # 传入配置,driver_or_options 和 session_or_options 都是可选的,须要使用对应模式才须要传入 drission = Drission(driver_or_options, session_or_options, proxy=proxy) + +# 关闭浏览器,debug 模式下须要显式调用这句,浏览器才能关掉 +drission.kill_browser() ``` DriverOptions 和 SessionOptions 用法详见下文。 @@ -555,7 +558,7 @@ page.to_frame(iframe) # 切入 iframe page.screenshot(path) # 页面截图 page.scroll_to_see(element) # 滚动直到某元素可见 page.scroll_to(mode, - pixel) # 按参数指示方式滚动页面,可选滚动方向:'top', 'bottom', 'rightmost', 'leftmost', 'up', 'down', 'left', 'right' + pixel) # 按参数指示方式滚动页面,可选滚动方向:'top', 'bottom', 'rightmost', 'leftmost', 'up', 'down', 'left', 'right', 'half' page.refresh() # 刷新当前页面 page.back() # 浏览器后退 page.et_window_size(x, y) # 设置浏览器窗口大小,默认最大化 @@ -708,12 +711,30 @@ element.run_script(js, *args) # 对元素运行 JavaScript 脚本 element.submit() # 提交 element.clear() # 清空元素 element.screenshot(path, filename) # 对元素截图 -element.select(text) # 根据文本选择下拉列表 element.set_attr(attr, value) # 设置元素属性值 element.remove_attr(attr) # 删除属性 element.drag(x, y, speed, shake) # 拖动元素相对距离,可设置速度和是否随机抖动 element.drag_to(ele_or_loc, speed, shake) # 拖动元素到另一个元素或某个坐标,可设置速度和是否随机抖动 element.hover() # 在元素上悬停鼠标 + +# select 功能: +element.select.is_multi # 是否多选列表 +element.select.options # 返回所有列表项对象 +element.select.selected_option # 返回第一个被选中的 option 元素 +element.select.selected_options # 返回所有被选中的 option 元素列表 + +element.select(text) # 根据文本选择下拉列表项 +element.select(value, 'value') # 根据 value 选择下拉列表项 +element.select(index, 'index') # 根据序号选择下拉列表项 + +# 多选列表功能: +element.select.deselect(text) # 根据文本取消选择下拉列表项(多选列表时有效) +element.select.deselect(value, 'value') # 根据 value 取消选择下拉列表项(多选列表时有效) +element.select.deselect(index, 'index') # 根据序号取消选择下拉列表项(多选列表时有效) +# 注:当列表为多选列表时,以上的第一个参数可以接收 list 或 tuple,同时选择或取消选择多个项 + +element.select.clear() # 清空多选列表选项 +element.select.invert() # 反选多选列表选项 ``` From 4c2364b08a361be0be188d78ec82497b15994e11 Mon Sep 17 00:00:00 2001 From: g1879 Date: Wed, 14 Apr 2021 09:54:10 +0800 Subject: [PATCH 90/94] =?UTF-8?q?=E5=BE=AE=E8=B0=83?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/drission.py | 2 +- DrissionPage/driver_element.py | 25 +++++++++++++++---------- 2 files changed, 16 insertions(+), 11 deletions(-) diff --git a/DrissionPage/drission.py b/DrissionPage/drission.py index 2a1862c..2b4904c 100644 --- a/DrissionPage/drission.py +++ b/DrissionPage/drission.py @@ -392,7 +392,7 @@ def _create_chrome(chrome_path: str, port: str, args: list, proxy: dict) -> tupl else: args1.append(arg) - args = ' '.join(args1) + args = ' '.join(set(args1)) if proxy: args = f'{args} --proxy-server={proxy["http"]}' diff --git a/DrissionPage/driver_element.py b/DrissionPage/driver_element.py index 2f92282..f8c6e1c 100644 --- a/DrissionPage/driver_element.py +++ b/DrissionPage/driver_element.py @@ -777,7 +777,12 @@ class ElementsByXpath(object): class Select(object): + """Select 类专门用于处理 d 模式下 select 标签""" + def __init__(self, ele: DriverElement): + """初始化 \n + :param ele: select 元素对象 + """ if ele.tag != 'select': raise TypeError(f"Select only works on