From 343e3a55f6b36ce6387c555d836d9c12f12fd181 Mon Sep 17 00:00:00 2001 From: kedaji Date: Thu, 12 Jan 2023 17:46:17 +0800 Subject: [PATCH 1/4] =?UTF-8?q?=E5=AE=9E=E7=8E=B0=E7=AB=AF=E5=8F=A3?= =?UTF-8?q?=E5=92=8C=E7=94=A8=E6=88=B7=E6=95=B0=E6=8D=AE=E7=9B=AE=E5=BD=95?= =?UTF-8?q?=E8=87=AA=E5=8A=A8=E5=88=86=E9=85=8D=EF=BC=8C=E6=94=AF=E6=8C=81?= =?UTF-8?q?=E5=A4=9A=E5=BC=80=E6=B5=8F=E8=A7=88=E5=99=A8?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .gitignore | 4 + DrissionPage/__init__.py | 1 + DrissionPage/config.py | 21 ++-- DrissionPage/configs.ini | 1 + DrissionPage/dynamic_port_allocator.py | 109 +++++++++++++++++++++ DrissionPage/dynamic_port_allocator.pyi.py | 16 +++ 6 files changed, 145 insertions(+), 7 deletions(-) create mode 100644 DrissionPage/dynamic_port_allocator.py create mode 100644 DrissionPage/dynamic_port_allocator.pyi.py diff --git a/.gitignore b/.gitignore index 11614af..a62efc5 100644 --- a/.gitignore +++ b/.gitignore @@ -6,6 +6,9 @@ __pycache__/ # C extensions *.so +# Intellij IDEA +.idea/ + # Distribution / packaging .Python build/ @@ -20,6 +23,7 @@ parts/ sdist/ var/ wheels/ +test/ share/python-wheels/ *.egg-info/ .installed.cfg diff --git a/DrissionPage/__init__.py b/DrissionPage/__init__.py index f76ac4a..7e86dab 100644 --- a/DrissionPage/__init__.py +++ b/DrissionPage/__init__.py @@ -14,3 +14,4 @@ from .session_page import SessionPage from .drission import Drission from .config import DriverOptions, SessionOptions from .action_chains import ActionChains +from .dynamic_port_allocator import DynamicPortAllocator \ No newline at end of file diff --git a/DrissionPage/config.py b/DrissionPage/config.py index b332296..7be5cbf 100644 --- a/DrissionPage/config.py +++ b/DrissionPage/config.py @@ -3,12 +3,15 @@ @Author : g1879 @Contact : g1879@qq.com """ +import os + from configparser import RawConfigParser, NoSectionError, NoOptionError from http.cookiejar import Cookie from pathlib import Path from requests.cookies import RequestsCookieJar from selenium.webdriver.chrome.options import Options +from .dynamic_port_allocator import DynamicPortAllocator class OptionsManager(object): @@ -486,14 +489,17 @@ class DriverOptions(Options): self._arguments = options_dict.get('arguments', []) self._extensions = options_dict.get('extensions', []) self._experimental_options = options_dict.get('experimental_options', {}) - self._debugger_address = options_dict.get('debugger_address', None) + # 从配置文件中读取默认用户数据父级目录 + DynamicPortAllocator.default_user_data_parent = options_dict.get('default_user_data_parent', '') + if not os.path.exists(DynamicPortAllocator.default_user_data_parent): + os.makedirs(DynamicPortAllocator.default_user_data_parent) + # 端口和调试地址将不再从配置文件中读取,而是直接动态分配 + port_and_user_data_path = DynamicPortAllocator.allocate() + self._user_data_path = port_and_user_data_path[1] + self.set_argument('--user-data-dir', self._user_data_path) + port = port_and_user_data_path[0] + self.debugger_address = f"127.0.0.1:{port}" self.page_load_strategy = options_dict.get('page_load_strategy', 'normal') - - for arg in self._arguments: - if arg.startswith('--user-data-dir='): - self.set_paths(user_data_path=arg[16:]) - break - self.timeouts = options_dict.get('timeouts', {'implicit': 10, 'pageLoad': 30, 'script': 30}) return @@ -754,6 +760,7 @@ class DriverOptions(Options): if browser_path is not None: self.binary_location = str(browser_path) + # TODO 允许用户自定义端口 if local_port is not None: self.debugger_address = '' if local_port == '' else f'127.0.0.1:{local_port}' diff --git a/DrissionPage/configs.ini b/DrissionPage/configs.ini index 3ea4625..f8d1f42 100644 --- a/DrissionPage/configs.ini +++ b/DrissionPage/configs.ini @@ -10,6 +10,7 @@ extensions = [] experimental_options = {'prefs': {'profile.default_content_settings.popups': 0, 'profile.default_content_setting_values': {'notifications': 2}, 'plugins.plugins_list': [{'enabled': False, 'name': 'Chrome PDF Viewer'}]}, 'useAutomationExtension': False, 'excludeSwitches': ['enable-automation']} timeouts = {'implicit': 10.0, 'pageLoad': 30.0, 'script': 30.0} page_load_strategy = normal +default_user_data_parent = D:\\tmp [session_options] headers = { diff --git a/DrissionPage/dynamic_port_allocator.py b/DrissionPage/dynamic_port_allocator.py new file mode 100644 index 0000000..c4a504e --- /dev/null +++ b/DrissionPage/dynamic_port_allocator.py @@ -0,0 +1,109 @@ +""" +@author: kedaji +""" +import os +import socket +import shutil + + +class DynamicPortAllocator: + """动态端口分配器""" + + mapping = {} + default_user_data_parent = None + + def __init__(self): + """没有什么用的初始化方法,只是为了复合Python代码规范""" + pass + + @staticmethod + def allocate(port=None, user_data_dir=None): + """将端口与用户数据目录做个映射,如果没有设置端口和数据目录,系统将动态分配端口和用户数据目录""" + + # 是否已指定端口 + if port is None: + # 动态分配端口:从min_port开始向后查询系统未使用的端口号,遇到空闲端口就进行分配,查到max_port停止 + # TODO 动态分配端口的范围从配置文件中读取 + min_port = 8080 + max_port = 10086 + allocated = False + for tmp_port in range(min_port, max_port + 1): + if DynamicPortAllocator.port_allocated(tmp_port): + continue + + if not DynamicPortAllocator.port_occupied(tmp_port): + port = tmp_port + allocated = True + break + + if not allocated: + raise BlockingIOError("未发现可使用的端口,当前的端口选择范围:" + str(min_port) + " ~ " + str(max_port)) + + else: + # 查询端口是否已被本程序使用 + if DynamicPortAllocator.port_allocated(port): + raise BlockingIOError(str(port) + " 端口已被分配") + + # 查询端口是否已被操作系统的某程序所占用 + if DynamicPortAllocator.port_occupied(port): + raise BlockingIOError(str(port) + " 端口已被占用") + + # 是否已指定用户数据目录 + if user_data_dir is None: + # 自动分配用户数据目录 + if DynamicPortAllocator.default_user_data_parent is None: + raise ValueError("请设置默认的用户数据父级目录") + user_data_dir = DynamicPortAllocator.default_user_data_parent + os.path.sep + "chrome_" + str(port) + + # 用户数据目录是否已被使用 + if user_data_dir in DynamicPortAllocator.mapping.keys(): + raise RecursionError("当前用户数据路径:" + user_data_dir + "\n已被端口为 " + str(port) + "的托管浏览器使用") + + # 创建用户数据目录 + if not os.path.exists(user_data_dir): + os.makedirs(user_data_dir) + else: + # 清空目录中的内容 + shutil.rmtree(user_data_dir) + os.mkdir(user_data_dir) + + # 映射 + DynamicPortAllocator.mapping[port] = user_data_dir + DynamicPortAllocator.mapping[user_data_dir] = port + + return port, user_data_dir + + @staticmethod + def get_mapped_user_data_path(port): + """获取端口对应的用户数据路径 + :param port: 端口号 + """ + return DynamicPortAllocator.mapping[port] + + @staticmethod + def get_mapped_port(user_data_path): + """获取对应用户数据路径对应的端口号 + :param user_data_path: 用户数据目录 + """ + return DynamicPortAllocator.mapping[user_data_path] + + @staticmethod + def port_allocated(port): + """查询端口是否已被分配至某个被托管的浏览器 + :param port: 端口号 + """ + if port in DynamicPortAllocator.mapping.keys(): + return True + return False + + @staticmethod + def port_occupied(port): + """查询端口是否被操作系统中的某个进程所占用 + :param port:端口号 + """ + sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + result = sock.connect_ex(('127.0.0.1', port)) + if result == 0: + return True + return False + diff --git a/DrissionPage/dynamic_port_allocator.pyi.py b/DrissionPage/dynamic_port_allocator.pyi.py new file mode 100644 index 0000000..7d2017c --- /dev/null +++ b/DrissionPage/dynamic_port_allocator.pyi.py @@ -0,0 +1,16 @@ +class DynamicPortAllocator: + + @staticmethod + def allocate(port: int, user_data_path: str) -> tuple: ... + + @staticmethod + def get_mapped_user_data_path(port: int) -> str: ... + + @staticmethod + def get_mapped_port(user_data_path: str) -> int: ... + + @staticmethod + def port_allocated(port: int) -> bool: ... + + @staticmethod + def port_occupied(port: int) -> bool: ... From 5b7ecbc504538a1e7f94d651c7a244d202b10b23 Mon Sep 17 00:00:00 2001 From: kedaji Date: Thu, 12 Jan 2023 17:51:19 +0800 Subject: [PATCH 2/4] =?UTF-8?q?=E6=B7=BB=E5=8A=A0author?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/dynamic_port_allocator.pyi.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/DrissionPage/dynamic_port_allocator.pyi.py b/DrissionPage/dynamic_port_allocator.pyi.py index 7d2017c..d8babee 100644 --- a/DrissionPage/dynamic_port_allocator.pyi.py +++ b/DrissionPage/dynamic_port_allocator.pyi.py @@ -1,3 +1,6 @@ +""" +@author: kedaji +""" class DynamicPortAllocator: @staticmethod From bcdea4b4804dfd8ca17eb2777b6415920a520f41 Mon Sep 17 00:00:00 2001 From: kedaji Date: Fri, 13 Jan 2023 14:45:15 +0800 Subject: [PATCH 3/4] =?UTF-8?q?=E5=A4=9A=E5=BC=80=E6=B5=8F=E8=A7=88?= =?UTF-8?q?=E5=99=A8=E6=94=AF=E6=8C=81=E5=8F=AF=E9=85=8D=E7=BD=AE=EF=BC=9B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/config.py | 57 ++++++++++++++++++---- DrissionPage/config.pyi | 4 ++ DrissionPage/configs.ini | 3 +- DrissionPage/dynamic_port_allocator.pyi.py | 2 + 4 files changed, 55 insertions(+), 11 deletions(-) diff --git a/DrissionPage/config.py b/DrissionPage/config.py index 7be5cbf..005da7a 100644 --- a/DrissionPage/config.py +++ b/DrissionPage/config.py @@ -4,6 +4,8 @@ @Contact : g1879@qq.com """ import os +import shutil +import sys from configparser import RawConfigParser, NoSectionError, NoOptionError from http.cookiejar import Cookie @@ -478,6 +480,8 @@ class DriverOptions(Options): """ super().__init__() self._user_data_path = None + self.multiple_open_browser = None + self.default_user_data_parent = None if read_file: self.ini_path = ini_path or str(Path(__file__).parent / 'configs.ini') @@ -489,17 +493,21 @@ class DriverOptions(Options): self._arguments = options_dict.get('arguments', []) self._extensions = options_dict.get('extensions', []) self._experimental_options = options_dict.get('experimental_options', {}) - # 从配置文件中读取默认用户数据父级目录 - DynamicPortAllocator.default_user_data_parent = options_dict.get('default_user_data_parent', '') - if not os.path.exists(DynamicPortAllocator.default_user_data_parent): - os.makedirs(DynamicPortAllocator.default_user_data_parent) - # 端口和调试地址将不再从配置文件中读取,而是直接动态分配 - port_and_user_data_path = DynamicPortAllocator.allocate() - self._user_data_path = port_and_user_data_path[1] - self.set_argument('--user-data-dir', self._user_data_path) - port = port_and_user_data_path[0] - self.debugger_address = f"127.0.0.1:{port}" + self.default_user_data_parent = options_dict.get("default_user_data_parent", os.path.dirname(sys.argv[0])) + # 使用中间值避免重复分配的问题 + tmp_bool = options_dict.get("multiple_open_browser", True) + if tmp_bool is True: + self.set_multiple_open_browser() + else: + self._debugger_address = options_dict.get('debugger_address', None) + self.page_load_strategy = options_dict.get('page_load_strategy', 'normal') + + for arg in self._arguments: + if arg.startswith('--user-data-dir='): + self.set_paths(user_data_path=arg[16:]) + break + self.timeouts = options_dict.get('timeouts', {'implicit': 10, 'pageLoad': 30, 'script': 30}) return @@ -508,6 +516,13 @@ class DriverOptions(Options): self.timeouts = {'implicit': 10, 'pageLoad': 30, 'script': 30} self._debugger_address = '127.0.0.1:9222' + def __del__(self): + """清理生成的目录""" + print("调用del") + for key in DynamicPortAllocator.mapping: + if key is str: + shutil.rmtree(key) + @property def driver_path(self): """chromedriver文件路径""" @@ -738,6 +753,28 @@ class DriverOptions(Options): self.page_load_strategy = value.lower() return self + def set_multiple_open_browser(self, default_user_data_parent=None): + """设置是否支持多开浏览器 + 开启后,系统将自动分配浏览器端口号与用户数据目录,不会产生浏览器端口冲突的问题,但是浏览器接管功能将会失效 + :param default_user_data_parent 浏览器用户数据目录的父目录,如果不设置将使用配置文件中的预设的目录,如果配置文件也没有设置默认数据父目录,系统将在脚本执行的路径设置为默认数据父目录 + """ + if self.multiple_open_browser is not True: + if default_user_data_parent is not None: + self.default_user_data_parent = default_user_data_parent + # 从配置文件中读取默认用户数据父级目录 + DynamicPortAllocator.default_user_data_parent = self.default_user_data_parent + if not os.path.exists(DynamicPortAllocator.default_user_data_parent): + os.makedirs(DynamicPortAllocator.default_user_data_parent) + # 端口和调试地址将不再从配置文件中读取,而是直接动态分配 + port_and_user_data_path = DynamicPortAllocator.allocate() + self._user_data_path = port_and_user_data_path[1] + self.set_argument('--user-data-dir', self._user_data_path) + port = port_and_user_data_path[0] + self.debugger_address = f"127.0.0.1:{port}" + self.multiple_open_browser = True + + return self + def set_paths(self, driver_path=None, chrome_path=None, browser_path=None, local_port=None, debugger_address=None, download_path=None, user_data_path=None, cache_path=None): """快捷的路径设置函数 \n diff --git a/DrissionPage/config.pyi b/DrissionPage/config.pyi index 4319af3..fd55542 100644 --- a/DrissionPage/config.pyi +++ b/DrissionPage/config.pyi @@ -158,6 +158,8 @@ class SessionOptions(object): class DriverOptions(Options): def __init__(self, read_file: bool = True, ini_path: str = None): + self.default_user_data_parent = None + self.multiple_open_browser = False self.ini_path: str = ... self._driver_path: str = ... self._user_data_path: str = ... @@ -215,6 +217,8 @@ class DriverOptions(Options): def set_page_load_strategy(self, value: str) -> 'DriverOptions': ... + def set_multiple_open_browser(self, default_user_data_parent: str=None) -> 'DriverOptions': ... + def set_paths(self, driver_path: Union[str, Path] = None, chrome_path: Union[str, Path] = None, diff --git a/DrissionPage/configs.ini b/DrissionPage/configs.ini index f8d1f42..2dbe975 100644 --- a/DrissionPage/configs.ini +++ b/DrissionPage/configs.ini @@ -10,7 +10,8 @@ extensions = [] experimental_options = {'prefs': {'profile.default_content_settings.popups': 0, 'profile.default_content_setting_values': {'notifications': 2}, 'plugins.plugins_list': [{'enabled': False, 'name': 'Chrome PDF Viewer'}]}, 'useAutomationExtension': False, 'excludeSwitches': ['enable-automation']} timeouts = {'implicit': 10.0, 'pageLoad': 30.0, 'script': 30.0} page_load_strategy = normal -default_user_data_parent = D:\\tmp +default_user_data_parent = D:/tmp +multiple_open_browser = True [session_options] headers = { diff --git a/DrissionPage/dynamic_port_allocator.pyi.py b/DrissionPage/dynamic_port_allocator.pyi.py index d8babee..fe05c0b 100644 --- a/DrissionPage/dynamic_port_allocator.pyi.py +++ b/DrissionPage/dynamic_port_allocator.pyi.py @@ -1,6 +1,8 @@ """ @author: kedaji """ + + class DynamicPortAllocator: @staticmethod From d1f9e459c54c4be3c350c939ef3e86e047bd20f1 Mon Sep 17 00:00:00 2001 From: kedaji Date: Fri, 13 Jan 2023 14:57:10 +0800 Subject: [PATCH 4/4] =?UTF-8?q?=E4=BF=AE=E6=94=B9=E9=85=8D=E7=BD=AE?= =?UTF-8?q?=E9=A1=B9=E9=A1=BA=E5=BA=8F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/configs.ini | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/DrissionPage/configs.ini b/DrissionPage/configs.ini index 2dbe975..d9314a1 100644 --- a/DrissionPage/configs.ini +++ b/DrissionPage/configs.ini @@ -10,8 +10,9 @@ extensions = [] experimental_options = {'prefs': {'profile.default_content_settings.popups': 0, 'profile.default_content_setting_values': {'notifications': 2}, 'plugins.plugins_list': [{'enabled': False, 'name': 'Chrome PDF Viewer'}]}, 'useAutomationExtension': False, 'excludeSwitches': ['enable-automation']} timeouts = {'implicit': 10.0, 'pageLoad': 30.0, 'script': 30.0} page_load_strategy = normal -default_user_data_parent = D:/tmp multiple_open_browser = True +default_user_data_parent = D:/tmp + [session_options] headers = {