mirror of
https://gitee.com/g1879/DrissionPage.git
synced 2024-12-10 04:00:23 +08:00
调整项目文件目录
This commit is contained in:
parent
19f99b4d62
commit
aafbc7a839
@ -4,10 +4,10 @@
|
||||
@Contact : g1879@qq.com
|
||||
"""
|
||||
# 常用页面类
|
||||
from .chromium_page import ChromiumPage
|
||||
from .session_page import SessionPage
|
||||
from .web_page import WebPage
|
||||
from ._pages.chromium_page import ChromiumPage
|
||||
from ._pages.session_page import SessionPage
|
||||
from ._pages.web_page import WebPage
|
||||
|
||||
# 启动配置类
|
||||
from .configs.chromium_options import ChromiumOptions
|
||||
from .configs.session_options import SessionOptions
|
||||
from ._configs.chromium_options import ChromiumOptions
|
||||
from ._configs.session_options import SessionOptions
|
||||
|
@ -10,10 +10,10 @@ from urllib.parse import quote
|
||||
|
||||
from DownloadKit import DownloadKit
|
||||
|
||||
from .commons.constants import Settings, NoneElement
|
||||
from .commons.locator import get_loc
|
||||
from .commons.web import format_html
|
||||
from .errors import ElementNotFoundError
|
||||
from DrissionPage._commons.constants import Settings, NoneElement
|
||||
from DrissionPage._commons.locator import get_loc
|
||||
from DrissionPage._commons.web import format_html
|
||||
from DrissionPage.errors import ElementNotFoundError
|
||||
|
||||
|
||||
class BaseParser(object):
|
||||
@ -367,7 +367,7 @@ class BasePage(BaseParser):
|
||||
self.retry_times = 3
|
||||
self.retry_interval = 2
|
||||
self._DownloadKit = None
|
||||
self._download_path = str(Path('.').absolute())
|
||||
self._download_path = str(Path('../..').absolute())
|
||||
|
||||
@property
|
||||
def title(self):
|
@ -8,7 +8,7 @@ from typing import Union, Tuple, List
|
||||
|
||||
from DownloadKit import DownloadKit
|
||||
|
||||
from .commons.constants import NoneElement
|
||||
from DrissionPage._commons.constants import NoneElement
|
||||
|
||||
|
||||
class BaseParser(object):
|
@ -1,7 +1,11 @@
|
||||
# -*- coding:utf-8 -*-
|
||||
"""
|
||||
@Author : g1879
|
||||
@Contact : g1879@qq.com
|
||||
"""
|
||||
from time import sleep
|
||||
|
||||
from .browser_download_manager import BrowserDownloadManager
|
||||
from DrissionPage._units.browser_download_manager import BrowserDownloadManager
|
||||
from .chromium_driver import BrowserDriver
|
||||
|
||||
|
@ -1,8 +1,12 @@
|
||||
# -*- coding:utf-8 -*-
|
||||
"""
|
||||
@Author : g1879
|
||||
@Contact : g1879@qq.com
|
||||
"""
|
||||
from typing import List, Optional, Union
|
||||
|
||||
from .browser_download_manager import BrowserDownloadManager
|
||||
from .chromium_page import ChromiumPage
|
||||
from DrissionPage._units.browser_download_manager import BrowserDownloadManager
|
||||
from DrissionPage._pages.chromium_page import ChromiumPage
|
||||
from .chromium_driver import BrowserDriver
|
||||
|
||||
|
@ -3,7 +3,7 @@
|
||||
@Author : g1879
|
||||
@Contact : g1879@qq.com
|
||||
"""
|
||||
from DrissionPage.configs.chromium_options import ChromiumOptions
|
||||
from DrissionPage._configs.chromium_options import ChromiumOptions
|
||||
|
||||
|
||||
def connect_browser(option: ChromiumOptions) -> tuple: ...
|
@ -1,6 +1,11 @@
|
||||
# -*- coding:utf-8 -*-
|
||||
"""
|
||||
@Author : g1879
|
||||
@Contact : g1879@qq.com
|
||||
"""
|
||||
from click import command, option
|
||||
|
||||
from DrissionPage import ChromiumPage
|
||||
from DrissionPage._pages.chromium_page import ChromiumPage
|
||||
from DrissionPage.easy_set import set_paths, configs_to_here as ch
|
||||
|
||||
|
@ -8,7 +8,7 @@ from pathlib import Path
|
||||
from typing import Union
|
||||
from types import FunctionType
|
||||
|
||||
from chromium_page import ChromiumPage
|
||||
from DrissionPage._pages.chromium_page import ChromiumPage
|
||||
|
||||
|
||||
def get_usable_path(path: Union[str, Path], is_file: bool = True, parents: bool = True) -> Path: ...
|
@ -10,8 +10,8 @@ from requests import Session
|
||||
from requests.cookies import RequestsCookieJar
|
||||
|
||||
from DrissionPage.base import DrissionElement, BasePage
|
||||
from DrissionPage.chromium_element import ChromiumElement
|
||||
from DrissionPage.chromium_base import ChromiumBase
|
||||
from DrissionPage._chromium_element import ChromiumElement
|
||||
from DrissionPage._chromium_base import ChromiumBase
|
||||
|
||||
|
||||
def get_ele_txt(e: DrissionElement) -> str: ...
|
@ -6,7 +6,7 @@
|
||||
from pathlib import Path
|
||||
from tempfile import gettempdir, TemporaryDirectory
|
||||
|
||||
from DrissionPage.commons.tools import port_is_using, clean_folder
|
||||
from DrissionPage._commons.tools import port_is_using, clean_folder
|
||||
from .options_manage import OptionsManager
|
||||
|
||||
|
@ -8,7 +8,7 @@ from pathlib import Path
|
||||
from requests import Session
|
||||
from requests.structures import CaseInsensitiveDict
|
||||
|
||||
from DrissionPage.commons.web import cookies_to_tuple, set_session_cookies
|
||||
from DrissionPage._commons.web import cookies_to_tuple, set_session_cookies
|
||||
from .options_manage import OptionsManager
|
||||
|
||||
|
@ -7,17 +7,17 @@ from os.path import basename, sep
|
||||
from pathlib import Path
|
||||
from time import perf_counter, sleep
|
||||
|
||||
from .base import DrissionElement, BaseElement
|
||||
from .commons.constants import FRAME_ELEMENT, NoneElement, Settings
|
||||
from .commons.keys import keys_to_typing, keyDescriptionForString, keyDefinitions
|
||||
from .commons.locator import get_loc
|
||||
from .commons.tools import get_usable_path
|
||||
from .commons.web import make_absolute_link, get_ele_txt, format_html, is_js_func, location_in_viewport, offset_scroll
|
||||
from .errors import ContextLossError, ElementLossError, JavaScriptError, NoRectError, ElementNotFoundError, \
|
||||
from DrissionPage._base.base import DrissionElement, BaseElement
|
||||
from DrissionPage._commons.constants import FRAME_ELEMENT, NoneElement, Settings
|
||||
from DrissionPage._commons.keys import keys_to_typing, keyDescriptionForString, keyDefinitions
|
||||
from DrissionPage._commons.locator import get_loc
|
||||
from DrissionPage._commons.tools import get_usable_path
|
||||
from DrissionPage._commons.web import make_absolute_link, get_ele_txt, format_html, is_js_func, location_in_viewport, offset_scroll
|
||||
from DrissionPage.errors import ContextLossError, ElementLossError, JavaScriptError, NoRectError, ElementNotFoundError, \
|
||||
CDPError, NoResourceError, CanNotClickError
|
||||
from .session_element import make_session_ele
|
||||
from .setter import ChromiumElementSetter
|
||||
from .waiter import ChromiumElementWaiter
|
||||
from DrissionPage._units.setter import ChromiumElementSetter
|
||||
from DrissionPage._units.waiter import ChromiumElementWaiter
|
||||
|
||||
|
||||
class ChromiumElement(DrissionElement):
|
||||
@ -1203,7 +1203,7 @@ def make_chromium_ele(page, node_id=None, obj_id=None):
|
||||
|
||||
ele = ChromiumElement(page, obj_id=obj_id, node_id=node_id, backend_id=backend_id)
|
||||
if ele.tag in FRAME_ELEMENT:
|
||||
from .chromium_frame import ChromiumFrame
|
||||
from ._chromium_frame import ChromiumFrame
|
||||
ele = ChromiumFrame(page, ele)
|
||||
|
||||
return ele
|
@ -6,15 +6,15 @@
|
||||
from pathlib import Path
|
||||
from typing import Union, Tuple, List, Any
|
||||
|
||||
from .base import DrissionElement, BaseElement
|
||||
from .chromium_base import ChromiumBase
|
||||
from .chromium_frame import ChromiumFrame
|
||||
from .chromium_page import ChromiumPage
|
||||
from .commons.constants import NoneElement
|
||||
from .session_element import SessionElement
|
||||
from .setter import ChromiumElementSetter
|
||||
from .waiter import ChromiumElementWaiter
|
||||
from .web_page import WebPage
|
||||
from DrissionPage._base.base import DrissionElement, BaseElement
|
||||
from DrissionPage._commons.constants import NoneElement
|
||||
from DrissionPage._elements.session_element import SessionElement
|
||||
from DrissionPage._pages.chromium_base import ChromiumBase
|
||||
from DrissionPage._pages.chromium_frame import ChromiumFrame
|
||||
from DrissionPage._pages.chromium_page import ChromiumPage
|
||||
from DrissionPage._pages.web_page import WebPage
|
||||
from DrissionPage._units.setter import ChromiumElementSetter
|
||||
from DrissionPage._units.waiter import ChromiumElementWaiter
|
||||
|
||||
|
||||
class ChromiumElement(DrissionElement):
|
@ -9,10 +9,10 @@ from re import match, DOTALL
|
||||
from lxml.etree import tostring
|
||||
from lxml.html import HtmlElement, fromstring
|
||||
|
||||
from .base import DrissionElement, BasePage, BaseElement
|
||||
from .commons.constants import NoneElement
|
||||
from .commons.locator import get_loc
|
||||
from .commons.web import get_ele_txt, make_absolute_link
|
||||
from DrissionPage._base.base import DrissionElement, BasePage, BaseElement
|
||||
from DrissionPage._commons.constants import NoneElement
|
||||
from DrissionPage._commons.locator import get_loc
|
||||
from DrissionPage._commons.web import get_ele_txt, make_absolute_link
|
||||
|
||||
|
||||
class SessionElement(DrissionElement):
|
@ -7,12 +7,12 @@ from typing import Union, List, Tuple
|
||||
|
||||
from lxml.html import HtmlElement
|
||||
|
||||
from .base import DrissionElement, BaseElement
|
||||
from .chromium_base import ChromiumBase
|
||||
from .chromium_element import ChromiumElement
|
||||
from .chromium_frame import ChromiumFrame
|
||||
from .commons.constants import NoneElement
|
||||
from .session_page import SessionPage
|
||||
from DrissionPage._base.base import DrissionElement, BaseElement
|
||||
from DrissionPage._commons.constants import NoneElement
|
||||
from DrissionPage._elements.chromium_element import ChromiumElement
|
||||
from DrissionPage._pages.chromium_base import ChromiumBase
|
||||
from DrissionPage._pages.chromium_frame import ChromiumFrame
|
||||
from DrissionPage._pages.session_page import SessionPage
|
||||
|
||||
|
||||
class SessionElement(DrissionElement):
|
@ -13,20 +13,20 @@ from time import perf_counter, sleep, time
|
||||
|
||||
from requests import get
|
||||
|
||||
from .action_chains import ActionChains
|
||||
from .base import BasePage
|
||||
from .chromium_driver import ChromiumDriver
|
||||
from .chromium_element import ChromiumScroll, ChromiumElement, run_js, make_chromium_ele
|
||||
from .commons.constants import HANDLE_ALERT_METHOD, ERROR, NoneElement
|
||||
from .commons.locator import get_loc
|
||||
from .commons.tools import get_usable_path, clean_folder
|
||||
from .commons.web import location_in_viewport
|
||||
from .errors import ContextLossError, ElementLossError, AlertExistsError, CDPError, TabClosedError, \
|
||||
from DrissionPage._base.base import BasePage
|
||||
from DrissionPage._base.chromium_driver import ChromiumDriver
|
||||
from DrissionPage._commons.constants import HANDLE_ALERT_METHOD, ERROR, NoneElement
|
||||
from DrissionPage._commons.locator import get_loc
|
||||
from DrissionPage._commons.tools import get_usable_path, clean_folder
|
||||
from DrissionPage._commons.web import location_in_viewport
|
||||
from DrissionPage._elements.chromium_element import ChromiumScroll, ChromiumElement, run_js, make_chromium_ele
|
||||
from DrissionPage._elements.session_element import make_session_ele
|
||||
from DrissionPage._units.network_listener import NetworkListener
|
||||
from DrissionPage._units.setter import ChromiumBaseSetter
|
||||
from DrissionPage._units.waiter import ChromiumBaseWaiter
|
||||
from DrissionPage.errors import ContextLossError, ElementLossError, AlertExistsError, CDPError, TabClosedError, \
|
||||
NoRectError, BrowserConnectError, GetDocumentError
|
||||
from .network_listener import NetworkListener
|
||||
from .session_element import make_session_ele
|
||||
from .setter import ChromiumBaseSetter
|
||||
from .waiter import ChromiumBaseWaiter
|
||||
from _units.action_chains import ActionChains
|
||||
|
||||
|
||||
class ChromiumBase(BasePage):
|
||||
@ -48,7 +48,7 @@ class ChromiumBase(BasePage):
|
||||
self._actions = None
|
||||
self._listener = None
|
||||
|
||||
self._download_path = str(Path('.').absolute())
|
||||
self._download_path = str(Path('../..').absolute())
|
||||
|
||||
if isinstance(address, int) or (isinstance(address, str) and address.isdigit()):
|
||||
address = f'127.0.0.1:{address}'
|
@ -8,18 +8,18 @@ from typing import Union, Tuple, List, Any
|
||||
|
||||
from DataRecorder import Recorder
|
||||
|
||||
from .browser import Browser
|
||||
from .action_chains import ActionChains
|
||||
from .base import BasePage
|
||||
from .chromium_driver import ChromiumDriver
|
||||
from .chromium_element import ChromiumElement, ChromiumScroll
|
||||
from .chromium_frame import ChromiumFrame
|
||||
from .chromium_page import ChromiumPage
|
||||
from .commons.constants import NoneElement
|
||||
from .network_listener import NetworkListener
|
||||
from .session_element import SessionElement
|
||||
from .setter import ChromiumBaseSetter
|
||||
from .waiter import ChromiumBaseWaiter
|
||||
from DrissionPage._base.base import BasePage
|
||||
from DrissionPage._base.browser import Browser
|
||||
from DrissionPage._base.chromium_driver import ChromiumDriver
|
||||
from DrissionPage._commons.constants import NoneElement
|
||||
from DrissionPage._elements.chromium_element import ChromiumElement, ChromiumScroll
|
||||
from DrissionPage._elements.session_element import SessionElement
|
||||
from DrissionPage._pages.chromium_frame import ChromiumFrame
|
||||
from DrissionPage._pages.chromium_page import ChromiumPage
|
||||
from DrissionPage._units.action_chains import ActionChains
|
||||
from DrissionPage._units.network_listener import NetworkListener
|
||||
from DrissionPage._units.setter import ChromiumBaseSetter
|
||||
from DrissionPage._units.waiter import ChromiumBaseWaiter
|
||||
|
||||
|
||||
class ChromiumBase(BasePage):
|
@ -10,11 +10,11 @@ from time import sleep, perf_counter
|
||||
|
||||
from requests import get
|
||||
|
||||
from .chromium_base import ChromiumBase, ChromiumPageScroll
|
||||
from .chromium_element import ChromiumElement
|
||||
from .errors import ContextLossError
|
||||
from .setter import ChromiumFrameSetter
|
||||
from .waiter import FrameWaiter
|
||||
from DrissionPage._elements.chromium_element import ChromiumElement
|
||||
from DrissionPage._pages.chromium_base import ChromiumBase, ChromiumPageScroll
|
||||
from DrissionPage._units.setter import ChromiumFrameSetter
|
||||
from DrissionPage._units.waiter import FrameWaiter
|
||||
from DrissionPage.errors import ContextLossError
|
||||
|
||||
|
||||
class ChromiumFrame(ChromiumBase):
|
@ -6,12 +6,13 @@
|
||||
from pathlib import Path
|
||||
from typing import Union, Tuple, List, Any
|
||||
|
||||
from DrissionPage import ChromiumPage, WebPage
|
||||
from .chromium_tab import ChromiumTab
|
||||
from .chromium_base import ChromiumBase, ChromiumPageScroll
|
||||
from .chromium_element import ChromiumElement, Locations, ChromiumElementStates
|
||||
from .setter import ChromiumFrameSetter
|
||||
from .waiter import FrameWaiter
|
||||
from DrissionPage._elements.chromium_element import ChromiumElement, Locations, ChromiumElementStates
|
||||
from DrissionPage._pages.chromium_base import ChromiumBase, ChromiumPageScroll
|
||||
from DrissionPage._pages.chromium_page import ChromiumPage
|
||||
from DrissionPage._pages.chromium_tab import ChromiumTab
|
||||
from DrissionPage._pages.web_page import WebPage
|
||||
from DrissionPage._units.setter import ChromiumFrameSetter
|
||||
from DrissionPage._units.waiter import FrameWaiter
|
||||
|
||||
|
||||
class ChromiumFrame(ChromiumBase):
|
@ -8,14 +8,14 @@ from time import perf_counter, sleep
|
||||
|
||||
from requests import get
|
||||
|
||||
from .browser import Browser
|
||||
from .chromium_base import ChromiumBase, Timeout
|
||||
from .chromium_driver import ChromiumDriver
|
||||
from .chromium_tab import ChromiumTab
|
||||
from .commons.browser import connect_browser
|
||||
from .configs.chromium_options import ChromiumOptions
|
||||
from .setter import ChromiumPageSetter
|
||||
from .waiter import ChromiumPageWaiter
|
||||
from DrissionPage._base.browser import Browser
|
||||
from DrissionPage._base.chromium_driver import ChromiumDriver
|
||||
from DrissionPage._commons.browser import connect_browser
|
||||
from DrissionPage._configs.chromium_options import ChromiumOptions
|
||||
from DrissionPage._pages.chromium_base import ChromiumBase, Timeout
|
||||
from DrissionPage._pages.chromium_tab import ChromiumTab
|
||||
from DrissionPage._units.setter import ChromiumPageSetter
|
||||
from DrissionPage._units.waiter import ChromiumPageWaiter
|
||||
|
||||
|
||||
class ChromiumPage(ChromiumBase):
|
@ -5,13 +5,13 @@
|
||||
"""
|
||||
from typing import Union, Tuple, List, Optional
|
||||
|
||||
from .browser import Browser
|
||||
from .chromium_base import ChromiumBase
|
||||
from .chromium_driver import ChromiumDriver
|
||||
from .chromium_tab import ChromiumTab
|
||||
from .configs.chromium_options import ChromiumOptions
|
||||
from .setter import ChromiumPageSetter
|
||||
from .waiter import ChromiumPageWaiter
|
||||
from DrissionPage._base.browser import Browser
|
||||
from DrissionPage._pages.chromium_base import ChromiumBase
|
||||
from DrissionPage._base.chromium_driver import ChromiumDriver
|
||||
from DrissionPage._pages.chromium_tab import ChromiumTab
|
||||
from DrissionPage._configs.chromium_options import ChromiumOptions
|
||||
from DrissionPage._units.setter import ChromiumPageSetter
|
||||
from DrissionPage._units.waiter import ChromiumPageWaiter
|
||||
|
||||
|
||||
class ChromiumPage(ChromiumBase):
|
@ -5,13 +5,12 @@
|
||||
"""
|
||||
from copy import copy
|
||||
|
||||
from .base import BasePage
|
||||
from .chromium_base import ChromiumBase
|
||||
from .commons.web import set_session_cookies, set_browser_cookies
|
||||
from .session_page import SessionPage
|
||||
from .setter import TabSetter
|
||||
from .setter import WebPageTabSetter
|
||||
from .waiter import ChromiumTabWaiter
|
||||
from DrissionPage._base.base import BasePage
|
||||
from DrissionPage._commons.web import set_session_cookies, set_browser_cookies
|
||||
from DrissionPage._pages.chromium_base import ChromiumBase
|
||||
from DrissionPage._pages.session_page import SessionPage
|
||||
from DrissionPage._units.setter import TabSetter, WebPageTabSetter
|
||||
from DrissionPage._units.waiter import ChromiumTabWaiter
|
||||
|
||||
|
||||
class ChromiumTab(ChromiumBase):
|
@ -7,17 +7,16 @@ from typing import Union, Tuple, Any, List
|
||||
|
||||
from requests import Session, Response
|
||||
|
||||
from .browser import Browser
|
||||
from .chromium_base import ChromiumBase
|
||||
from .chromium_element import ChromiumElement
|
||||
from .chromium_frame import ChromiumFrame
|
||||
from .chromium_page import ChromiumPage, ChromiumTabRect
|
||||
from .session_element import SessionElement
|
||||
from .session_page import SessionPage
|
||||
from .setter import TabSetter
|
||||
from .setter import WebPageTabSetter
|
||||
from .waiter import ChromiumTabWaiter
|
||||
from .web_page import WebPage
|
||||
from DrissionPage._base.browser import Browser
|
||||
from DrissionPage._elements.chromium_element import ChromiumElement
|
||||
from DrissionPage._elements.session_element import SessionElement
|
||||
from DrissionPage._pages.chromium_base import ChromiumBase
|
||||
from DrissionPage._pages.chromium_frame import ChromiumFrame
|
||||
from DrissionPage._pages.chromium_page import ChromiumPage, ChromiumTabRect
|
||||
from DrissionPage._pages.session_page import SessionPage
|
||||
from DrissionPage._pages.web_page import WebPage
|
||||
from DrissionPage._units.setter import TabSetter, WebPageTabSetter
|
||||
from DrissionPage._units.waiter import ChromiumTabWaiter
|
||||
|
||||
|
||||
class ChromiumTab(ChromiumBase):
|
@ -1,336 +1,336 @@
|
||||
# -*- coding:utf-8 -*-
|
||||
"""
|
||||
@Author : g1879
|
||||
@Contact : g1879@qq.com
|
||||
"""
|
||||
from re import search
|
||||
from time import sleep
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from requests import Session
|
||||
from requests.structures import CaseInsensitiveDict
|
||||
from tldextract import extract
|
||||
|
||||
from .base import BasePage
|
||||
from .commons.web import cookie_to_dict
|
||||
from .configs.session_options import SessionOptions
|
||||
from .session_element import SessionElement, make_session_ele
|
||||
from .setter import SessionPageSetter
|
||||
|
||||
|
||||
class SessionPage(BasePage):
|
||||
"""SessionPage封装了页面操作的常用功能,使用requests来获取、解析网页"""
|
||||
|
||||
def __init__(self, session_or_options=None, timeout=None):
|
||||
"""
|
||||
:param session_or_options: Session对象或SessionOptions对象
|
||||
:param timeout: 连接超时时间,为None时从ini文件读取
|
||||
"""
|
||||
super(SessionPage, SessionPage).__init__(self)
|
||||
self._response = None
|
||||
self._session = None
|
||||
self._set = None
|
||||
self._s_set_start_options(session_or_options, None)
|
||||
self._s_set_runtime_settings()
|
||||
self._create_session()
|
||||
if timeout is not None:
|
||||
self.timeout = timeout
|
||||
|
||||
def _s_set_start_options(self, session_or_options, none):
|
||||
"""启动配置
|
||||
:param session_or_options: Session、SessionOptions
|
||||
:param none: 用于后代继承
|
||||
:return: None
|
||||
"""
|
||||
if not session_or_options or isinstance(session_or_options, SessionOptions):
|
||||
self._session_options = session_or_options or SessionOptions(session_or_options)
|
||||
|
||||
elif isinstance(session_or_options, Session):
|
||||
self._session_options = SessionOptions()
|
||||
self._session = session_or_options
|
||||
|
||||
def _s_set_runtime_settings(self):
|
||||
"""设置运行时用到的属性"""
|
||||
self._timeout = self._session_options.timeout
|
||||
self._download_path = self._session_options.download_path
|
||||
|
||||
def _create_session(self):
|
||||
"""创建内建Session对象"""
|
||||
if not self._session:
|
||||
self._session = self._session_options.make_session()
|
||||
|
||||
def __call__(self, loc_or_str, timeout=None):
|
||||
"""在内部查找元素
|
||||
例:ele2 = ele1('@id=ele_id')
|
||||
:param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串
|
||||
:param timeout: 不起实际作用,用于和ChromiumElement对应,便于无差别调用
|
||||
:return: SessionElement对象或属性文本
|
||||
"""
|
||||
return self.ele(loc_or_str)
|
||||
|
||||
# -----------------共有属性和方法-------------------
|
||||
@property
|
||||
def title(self):
|
||||
"""返回网页title"""
|
||||
ele = self._ele('xpath://title', raise_err=False)
|
||||
return ele.text if ele else None
|
||||
|
||||
@property
|
||||
def url(self):
|
||||
"""返回当前访问url"""
|
||||
return self._url
|
||||
|
||||
@property
|
||||
def _session_url(self):
|
||||
"""返回当前访问url"""
|
||||
return self._url
|
||||
|
||||
@property
|
||||
def html(self):
|
||||
"""返回页面的html文本"""
|
||||
return self.response.text if self.response else ''
|
||||
|
||||
@property
|
||||
def json(self):
|
||||
"""当返回内容是json格式时,返回对应的字典,非json格式时返回None"""
|
||||
try:
|
||||
return self.response.json()
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
@property
|
||||
def user_agent(self):
|
||||
"""返回user agent"""
|
||||
return self.session.headers.get('user-agent', '')
|
||||
|
||||
@property
|
||||
def session(self):
|
||||
"""返回session对象"""
|
||||
return self._session
|
||||
|
||||
@property
|
||||
def response(self):
|
||||
"""返回访问url得到的response对象"""
|
||||
return self._response
|
||||
|
||||
@property
|
||||
def set(self):
|
||||
"""返回用于等待的对象"""
|
||||
if self._set is None:
|
||||
self._set = SessionPageSetter(self)
|
||||
return self._set
|
||||
|
||||
def get(self, url, show_errmsg=False, retry=None, interval=None, timeout=None, **kwargs):
|
||||
"""用get方式跳转到url
|
||||
:param url: 目标url
|
||||
:param show_errmsg: 是否显示和抛出异常
|
||||
:param retry: 重试次数
|
||||
:param interval: 重试间隔(秒)
|
||||
:param timeout: 连接超时时间(秒)
|
||||
:param kwargs: 连接参数
|
||||
:return: url是否可用
|
||||
"""
|
||||
return self._s_connect(url, 'get', None, show_errmsg, retry, interval, **kwargs)
|
||||
|
||||
def ele(self, loc_or_ele, timeout=None):
|
||||
"""返回页面中符合条件的第一个元素、属性或节点文本
|
||||
:param loc_or_ele: 元素的定位信息,可以是元素对象,loc元组,或查询字符串
|
||||
:param timeout: 不起实际作用,用于和ChromiumElement对应,便于无差别调用
|
||||
:return: SessionElement对象或属性、文本
|
||||
"""
|
||||
return self._ele(loc_or_ele)
|
||||
|
||||
def eles(self, loc_or_str, timeout=None):
|
||||
"""返回页面中所有符合条件的元素、属性或节点文本
|
||||
:param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串
|
||||
:param timeout: 不起实际作用,用于和ChromiumElement对应,便于无差别调用
|
||||
:return: SessionElement对象或属性、文本组成的列表
|
||||
"""
|
||||
return self._ele(loc_or_str, single=False)
|
||||
|
||||
def s_ele(self, loc_or_ele=None):
|
||||
"""返回页面中符合条件的第一个元素、属性或节点文本
|
||||
:param loc_or_ele: 元素的定位信息,可以是元素对象,loc元组,或查询字符串
|
||||
:return: SessionElement对象或属性、文本
|
||||
"""
|
||||
return make_session_ele(self.html) if loc_or_ele is None else self._ele(loc_or_ele)
|
||||
|
||||
def s_eles(self, loc_or_str):
|
||||
"""返回页面中符合条件的所有元素、属性或节点文本
|
||||
:param loc_or_str: 元素的定位信息,可以是元素对象,loc元组,或查询字符串
|
||||
:return: SessionElement对象或属性、文本
|
||||
"""
|
||||
return self._ele(loc_or_str, single=False)
|
||||
|
||||
def _find_elements(self, loc_or_ele, timeout=None, single=True, raise_err=None):
|
||||
"""返回页面中符合条件的元素、属性或节点文本,默认返回第一个
|
||||
:param loc_or_ele: 元素的定位信息,可以是元素对象,loc元组,或查询字符串
|
||||
:param timeout: 不起实际作用,用于和父类对应
|
||||
:param single: True则返回第一个,False则返回全部
|
||||
:param raise_err: 找不到元素是是否抛出异常,为None时根据全局设置
|
||||
:return: SessionElement对象
|
||||
"""
|
||||
return loc_or_ele if isinstance(loc_or_ele, SessionElement) else make_session_ele(self, loc_or_ele, single)
|
||||
|
||||
def get_cookies(self, as_dict=False, all_domains=False, all_info=False):
|
||||
"""返回cookies
|
||||
:param as_dict: 是否以字典方式返回,False则以list返回
|
||||
:param all_domains: 是否返回所有域的cookies
|
||||
:param all_info: 是否返回所有信息,False则只返回name、value、domain
|
||||
:return: cookies信息
|
||||
"""
|
||||
if all_domains:
|
||||
cookies = self.session.cookies
|
||||
else:
|
||||
if self.url:
|
||||
ex_url = extract(self._session_url)
|
||||
domain = f'{ex_url.domain}.{ex_url.suffix}' if ex_url.suffix else ex_url.domain
|
||||
|
||||
cookies = tuple(x for x in self.session.cookies if domain in x.domain or x.domain == '')
|
||||
else:
|
||||
cookies = tuple(x for x in self.session.cookies)
|
||||
|
||||
if as_dict:
|
||||
return {x.name: x.value for x in cookies}
|
||||
elif all_info:
|
||||
return [cookie_to_dict(cookie) for cookie in cookies]
|
||||
else:
|
||||
r = []
|
||||
for c in cookies:
|
||||
c = cookie_to_dict(c)
|
||||
r.append({'name': c['name'], 'value': c['value'], 'domain': c['domain']})
|
||||
return r
|
||||
|
||||
def post(self, url, data=None, show_errmsg=False, retry=None, interval=None, **kwargs):
|
||||
"""用post方式跳转到url
|
||||
:param url: 目标url
|
||||
:param data: 提交的数据
|
||||
:param show_errmsg: 是否显示和抛出异常
|
||||
:param retry: 重试次数
|
||||
:param interval: 重试间隔(秒)
|
||||
:param kwargs: 连接参数
|
||||
:return: url是否可用
|
||||
"""
|
||||
return self._s_connect(url, 'post', data, show_errmsg, retry, interval, **kwargs)
|
||||
|
||||
def _s_connect(self, url, mode, data=None, show_errmsg=False, retry=None, interval=None, **kwargs):
|
||||
"""执行get或post连接
|
||||
:param url: 目标url
|
||||
:param mode: 'get' 或 'post'
|
||||
:param data: 提交的数据
|
||||
:param show_errmsg: 是否显示和抛出异常
|
||||
:param retry: 重试次数
|
||||
:param interval: 重试间隔(秒)
|
||||
:param kwargs: 连接参数
|
||||
:return: url是否可用
|
||||
"""
|
||||
retry, interval = self._before_connect(url, retry, interval)
|
||||
self._response, info = self._make_response(self._url, mode, data, retry, interval, show_errmsg, **kwargs)
|
||||
|
||||
if self._response is None:
|
||||
self._url_available = False
|
||||
|
||||
else:
|
||||
if self._response.ok:
|
||||
self._url_available = True
|
||||
|
||||
else:
|
||||
if show_errmsg:
|
||||
raise ConnectionError(f'状态码:{self._response.status_code}.')
|
||||
self._url_available = False
|
||||
|
||||
return self._url_available
|
||||
|
||||
def _make_response(self, url, mode='get', data=None, retry=None, interval=None, show_errmsg=False, **kwargs):
|
||||
"""生成Response对象
|
||||
:param url: 目标url
|
||||
:param mode: 'get' 或 'post'
|
||||
:param data: post方式要提交的数据
|
||||
:param show_errmsg: 是否显示和抛出异常
|
||||
:param kwargs: 其它参数
|
||||
:return: tuple,第一位为Response或None,第二位为出错信息或'Success'
|
||||
"""
|
||||
kwargs = CaseInsensitiveDict(kwargs)
|
||||
if 'headers' not in kwargs:
|
||||
kwargs['headers'] = {}
|
||||
else:
|
||||
kwargs['headers'] = CaseInsensitiveDict(kwargs['headers'])
|
||||
|
||||
# 设置referer和host值
|
||||
parsed_url = urlparse(url)
|
||||
hostname = parsed_url.hostname
|
||||
scheme = parsed_url.scheme
|
||||
if not check_headers(kwargs, self.session.headers, 'Referer'):
|
||||
kwargs['headers']['Referer'] = self.url if self.url else f'{scheme}://{hostname}'
|
||||
if 'Host' not in kwargs['headers']:
|
||||
kwargs['headers']['Host'] = hostname
|
||||
|
||||
if not check_headers(kwargs, self.session.headers, 'timeout'):
|
||||
kwargs['timeout'] = self.timeout
|
||||
|
||||
r = err = None
|
||||
retry = retry if retry is not None else self.retry_times
|
||||
interval = interval if interval is not None else self.retry_interval
|
||||
for i in range(retry + 1):
|
||||
try:
|
||||
if mode == 'get':
|
||||
r = self.session.get(url, **kwargs)
|
||||
elif mode == 'post':
|
||||
r = self.session.post(url, data=data, **kwargs)
|
||||
|
||||
if r:
|
||||
return set_charset(r), 'Success'
|
||||
|
||||
except Exception as e:
|
||||
err = e
|
||||
|
||||
# if r and r.status_code in (403, 404):
|
||||
# break
|
||||
|
||||
if i < retry:
|
||||
sleep(interval)
|
||||
if show_errmsg:
|
||||
print(f'重试 {url}')
|
||||
|
||||
if r is None:
|
||||
if show_errmsg:
|
||||
if err:
|
||||
raise err
|
||||
else:
|
||||
raise ConnectionError('连接失败')
|
||||
return None, '连接失败' if err is None else err
|
||||
|
||||
if not r.ok:
|
||||
if show_errmsg:
|
||||
raise ConnectionError(f'状态码:{r.status_code}')
|
||||
return r, f'状态码:{r.status_code}'
|
||||
|
||||
|
||||
def check_headers(kwargs, headers, arg):
|
||||
"""检查kwargs或headers中是否有arg所示属性"""
|
||||
return arg in kwargs['headers'] or arg in headers
|
||||
|
||||
|
||||
def set_charset(response):
|
||||
"""设置Response对象的编码"""
|
||||
# 在headers中获取编码
|
||||
content_type = response.headers.get('content-type', '').lower()
|
||||
if not content_type.endswith(';'):
|
||||
content_type += ';'
|
||||
charset = search(r'charset[=: ]*(.*)?;?', content_type)
|
||||
|
||||
if charset:
|
||||
response.encoding = charset.group(1)
|
||||
|
||||
# 在headers中获取不到编码,且如果是网页
|
||||
elif content_type.replace(' ', '').startswith('text/html'):
|
||||
re_result = search(b'<meta.*?charset=[ \\\'"]*([^"\\\' />]+).*?>', response.content)
|
||||
|
||||
if re_result:
|
||||
charset = re_result.group(1).decode()
|
||||
else:
|
||||
charset = response.apparent_encoding
|
||||
|
||||
response.encoding = charset
|
||||
|
||||
return response
|
||||
# -*- coding:utf-8 -*-
|
||||
"""
|
||||
@Author : g1879
|
||||
@Contact : g1879@qq.com
|
||||
"""
|
||||
from re import search
|
||||
from time import sleep
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from requests import Session
|
||||
from requests.structures import CaseInsensitiveDict
|
||||
from tldextract import extract
|
||||
|
||||
from DrissionPage._base.base import BasePage
|
||||
from DrissionPage._commons.web import cookie_to_dict
|
||||
from DrissionPage._configs.session_options import SessionOptions
|
||||
from DrissionPage._elements.session_element import SessionElement, make_session_ele
|
||||
from DrissionPage._units.setter import SessionPageSetter
|
||||
|
||||
|
||||
class SessionPage(BasePage):
|
||||
"""SessionPage封装了页面操作的常用功能,使用requests来获取、解析网页"""
|
||||
|
||||
def __init__(self, session_or_options=None, timeout=None):
|
||||
"""
|
||||
:param session_or_options: Session对象或SessionOptions对象
|
||||
:param timeout: 连接超时时间,为None时从ini文件读取
|
||||
"""
|
||||
super(SessionPage, SessionPage).__init__(self)
|
||||
self._response = None
|
||||
self._session = None
|
||||
self._set = None
|
||||
self._s_set_start_options(session_or_options, None)
|
||||
self._s_set_runtime_settings()
|
||||
self._create_session()
|
||||
if timeout is not None:
|
||||
self.timeout = timeout
|
||||
|
||||
def _s_set_start_options(self, session_or_options, none):
|
||||
"""启动配置
|
||||
:param session_or_options: Session、SessionOptions
|
||||
:param none: 用于后代继承
|
||||
:return: None
|
||||
"""
|
||||
if not session_or_options or isinstance(session_or_options, SessionOptions):
|
||||
self._session_options = session_or_options or SessionOptions(session_or_options)
|
||||
|
||||
elif isinstance(session_or_options, Session):
|
||||
self._session_options = SessionOptions()
|
||||
self._session = session_or_options
|
||||
|
||||
def _s_set_runtime_settings(self):
|
||||
"""设置运行时用到的属性"""
|
||||
self._timeout = self._session_options.timeout
|
||||
self._download_path = self._session_options.download_path
|
||||
|
||||
def _create_session(self):
|
||||
"""创建内建Session对象"""
|
||||
if not self._session:
|
||||
self._session = self._session_options.make_session()
|
||||
|
||||
def __call__(self, loc_or_str, timeout=None):
|
||||
"""在内部查找元素
|
||||
例:ele2 = ele1('@id=ele_id')
|
||||
:param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串
|
||||
:param timeout: 不起实际作用,用于和ChromiumElement对应,便于无差别调用
|
||||
:return: SessionElement对象或属性文本
|
||||
"""
|
||||
return self.ele(loc_or_str)
|
||||
|
||||
# -----------------共有属性和方法-------------------
|
||||
@property
|
||||
def title(self):
|
||||
"""返回网页title"""
|
||||
ele = self._ele('xpath://title', raise_err=False)
|
||||
return ele.text if ele else None
|
||||
|
||||
@property
|
||||
def url(self):
|
||||
"""返回当前访问url"""
|
||||
return self._url
|
||||
|
||||
@property
|
||||
def _session_url(self):
|
||||
"""返回当前访问url"""
|
||||
return self._url
|
||||
|
||||
@property
|
||||
def html(self):
|
||||
"""返回页面的html文本"""
|
||||
return self.response.text if self.response else ''
|
||||
|
||||
@property
|
||||
def json(self):
|
||||
"""当返回内容是json格式时,返回对应的字典,非json格式时返回None"""
|
||||
try:
|
||||
return self.response.json()
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
@property
|
||||
def user_agent(self):
|
||||
"""返回user agent"""
|
||||
return self.session.headers.get('user-agent', '')
|
||||
|
||||
@property
|
||||
def session(self):
|
||||
"""返回session对象"""
|
||||
return self._session
|
||||
|
||||
@property
|
||||
def response(self):
|
||||
"""返回访问url得到的response对象"""
|
||||
return self._response
|
||||
|
||||
@property
|
||||
def set(self):
|
||||
"""返回用于等待的对象"""
|
||||
if self._set is None:
|
||||
self._set = SessionPageSetter(self)
|
||||
return self._set
|
||||
|
||||
def get(self, url, show_errmsg=False, retry=None, interval=None, timeout=None, **kwargs):
|
||||
"""用get方式跳转到url
|
||||
:param url: 目标url
|
||||
:param show_errmsg: 是否显示和抛出异常
|
||||
:param retry: 重试次数
|
||||
:param interval: 重试间隔(秒)
|
||||
:param timeout: 连接超时时间(秒)
|
||||
:param kwargs: 连接参数
|
||||
:return: url是否可用
|
||||
"""
|
||||
return self._s_connect(url, 'get', None, show_errmsg, retry, interval, **kwargs)
|
||||
|
||||
def ele(self, loc_or_ele, timeout=None):
|
||||
"""返回页面中符合条件的第一个元素、属性或节点文本
|
||||
:param loc_or_ele: 元素的定位信息,可以是元素对象,loc元组,或查询字符串
|
||||
:param timeout: 不起实际作用,用于和ChromiumElement对应,便于无差别调用
|
||||
:return: SessionElement对象或属性、文本
|
||||
"""
|
||||
return self._ele(loc_or_ele)
|
||||
|
||||
def eles(self, loc_or_str, timeout=None):
|
||||
"""返回页面中所有符合条件的元素、属性或节点文本
|
||||
:param loc_or_str: 元素的定位信息,可以是loc元组,或查询字符串
|
||||
:param timeout: 不起实际作用,用于和ChromiumElement对应,便于无差别调用
|
||||
:return: SessionElement对象或属性、文本组成的列表
|
||||
"""
|
||||
return self._ele(loc_or_str, single=False)
|
||||
|
||||
def s_ele(self, loc_or_ele=None):
|
||||
"""返回页面中符合条件的第一个元素、属性或节点文本
|
||||
:param loc_or_ele: 元素的定位信息,可以是元素对象,loc元组,或查询字符串
|
||||
:return: SessionElement对象或属性、文本
|
||||
"""
|
||||
return make_session_ele(self.html) if loc_or_ele is None else self._ele(loc_or_ele)
|
||||
|
||||
def s_eles(self, loc_or_str):
|
||||
"""返回页面中符合条件的所有元素、属性或节点文本
|
||||
:param loc_or_str: 元素的定位信息,可以是元素对象,loc元组,或查询字符串
|
||||
:return: SessionElement对象或属性、文本
|
||||
"""
|
||||
return self._ele(loc_or_str, single=False)
|
||||
|
||||
def _find_elements(self, loc_or_ele, timeout=None, single=True, raise_err=None):
|
||||
"""返回页面中符合条件的元素、属性或节点文本,默认返回第一个
|
||||
:param loc_or_ele: 元素的定位信息,可以是元素对象,loc元组,或查询字符串
|
||||
:param timeout: 不起实际作用,用于和父类对应
|
||||
:param single: True则返回第一个,False则返回全部
|
||||
:param raise_err: 找不到元素是是否抛出异常,为None时根据全局设置
|
||||
:return: SessionElement对象
|
||||
"""
|
||||
return loc_or_ele if isinstance(loc_or_ele, SessionElement) else make_session_ele(self, loc_or_ele, single)
|
||||
|
||||
def get_cookies(self, as_dict=False, all_domains=False, all_info=False):
|
||||
"""返回cookies
|
||||
:param as_dict: 是否以字典方式返回,False则以list返回
|
||||
:param all_domains: 是否返回所有域的cookies
|
||||
:param all_info: 是否返回所有信息,False则只返回name、value、domain
|
||||
:return: cookies信息
|
||||
"""
|
||||
if all_domains:
|
||||
cookies = self.session.cookies
|
||||
else:
|
||||
if self.url:
|
||||
ex_url = extract(self._session_url)
|
||||
domain = f'{ex_url.domain}.{ex_url.suffix}' if ex_url.suffix else ex_url.domain
|
||||
|
||||
cookies = tuple(x for x in self.session.cookies if domain in x.domain or x.domain == '')
|
||||
else:
|
||||
cookies = tuple(x for x in self.session.cookies)
|
||||
|
||||
if as_dict:
|
||||
return {x.name: x.value for x in cookies}
|
||||
elif all_info:
|
||||
return [cookie_to_dict(cookie) for cookie in cookies]
|
||||
else:
|
||||
r = []
|
||||
for c in cookies:
|
||||
c = cookie_to_dict(c)
|
||||
r.append({'name': c['name'], 'value': c['value'], 'domain': c['domain']})
|
||||
return r
|
||||
|
||||
def post(self, url, data=None, show_errmsg=False, retry=None, interval=None, **kwargs):
|
||||
"""用post方式跳转到url
|
||||
:param url: 目标url
|
||||
:param data: 提交的数据
|
||||
:param show_errmsg: 是否显示和抛出异常
|
||||
:param retry: 重试次数
|
||||
:param interval: 重试间隔(秒)
|
||||
:param kwargs: 连接参数
|
||||
:return: url是否可用
|
||||
"""
|
||||
return self._s_connect(url, 'post', data, show_errmsg, retry, interval, **kwargs)
|
||||
|
||||
def _s_connect(self, url, mode, data=None, show_errmsg=False, retry=None, interval=None, **kwargs):
|
||||
"""执行get或post连接
|
||||
:param url: 目标url
|
||||
:param mode: 'get' 或 'post'
|
||||
:param data: 提交的数据
|
||||
:param show_errmsg: 是否显示和抛出异常
|
||||
:param retry: 重试次数
|
||||
:param interval: 重试间隔(秒)
|
||||
:param kwargs: 连接参数
|
||||
:return: url是否可用
|
||||
"""
|
||||
retry, interval = self._before_connect(url, retry, interval)
|
||||
self._response, info = self._make_response(self._url, mode, data, retry, interval, show_errmsg, **kwargs)
|
||||
|
||||
if self._response is None:
|
||||
self._url_available = False
|
||||
|
||||
else:
|
||||
if self._response.ok:
|
||||
self._url_available = True
|
||||
|
||||
else:
|
||||
if show_errmsg:
|
||||
raise ConnectionError(f'状态码:{self._response.status_code}.')
|
||||
self._url_available = False
|
||||
|
||||
return self._url_available
|
||||
|
||||
def _make_response(self, url, mode='get', data=None, retry=None, interval=None, show_errmsg=False, **kwargs):
|
||||
"""生成Response对象
|
||||
:param url: 目标url
|
||||
:param mode: 'get' 或 'post'
|
||||
:param data: post方式要提交的数据
|
||||
:param show_errmsg: 是否显示和抛出异常
|
||||
:param kwargs: 其它参数
|
||||
:return: tuple,第一位为Response或None,第二位为出错信息或'Success'
|
||||
"""
|
||||
kwargs = CaseInsensitiveDict(kwargs)
|
||||
if 'headers' not in kwargs:
|
||||
kwargs['headers'] = {}
|
||||
else:
|
||||
kwargs['headers'] = CaseInsensitiveDict(kwargs['headers'])
|
||||
|
||||
# 设置referer和host值
|
||||
parsed_url = urlparse(url)
|
||||
hostname = parsed_url.hostname
|
||||
scheme = parsed_url.scheme
|
||||
if not check_headers(kwargs, self.session.headers, 'Referer'):
|
||||
kwargs['headers']['Referer'] = self.url if self.url else f'{scheme}://{hostname}'
|
||||
if 'Host' not in kwargs['headers']:
|
||||
kwargs['headers']['Host'] = hostname
|
||||
|
||||
if not check_headers(kwargs, self.session.headers, 'timeout'):
|
||||
kwargs['timeout'] = self.timeout
|
||||
|
||||
r = err = None
|
||||
retry = retry if retry is not None else self.retry_times
|
||||
interval = interval if interval is not None else self.retry_interval
|
||||
for i in range(retry + 1):
|
||||
try:
|
||||
if mode == 'get':
|
||||
r = self.session.get(url, **kwargs)
|
||||
elif mode == 'post':
|
||||
r = self.session.post(url, data=data, **kwargs)
|
||||
|
||||
if r:
|
||||
return set_charset(r), 'Success'
|
||||
|
||||
except Exception as e:
|
||||
err = e
|
||||
|
||||
# if r and r.status_code in (403, 404):
|
||||
# break
|
||||
|
||||
if i < retry:
|
||||
sleep(interval)
|
||||
if show_errmsg:
|
||||
print(f'重试 {url}')
|
||||
|
||||
if r is None:
|
||||
if show_errmsg:
|
||||
if err:
|
||||
raise err
|
||||
else:
|
||||
raise ConnectionError('连接失败')
|
||||
return None, '连接失败' if err is None else err
|
||||
|
||||
if not r.ok:
|
||||
if show_errmsg:
|
||||
raise ConnectionError(f'状态码:{r.status_code}')
|
||||
return r, f'状态码:{r.status_code}'
|
||||
|
||||
|
||||
def check_headers(kwargs, headers, arg):
|
||||
"""检查kwargs或headers中是否有arg所示属性"""
|
||||
return arg in kwargs['headers'] or arg in headers
|
||||
|
||||
|
||||
def set_charset(response):
|
||||
"""设置Response对象的编码"""
|
||||
# 在headers中获取编码
|
||||
content_type = response.headers.get('content-type', '').lower()
|
||||
if not content_type.endswith(';'):
|
||||
content_type += ';'
|
||||
charset = search(r'charset[=: ]*(.*)?;?', content_type)
|
||||
|
||||
if charset:
|
||||
response.encoding = charset.group(1)
|
||||
|
||||
# 在headers中获取不到编码,且如果是网页
|
||||
elif content_type.replace(' ', '').startswith('text/html'):
|
||||
re_result = search(b'<meta.*?charset=[ \\\'"]*([^"\\\' />]+).*?>', response.content)
|
||||
|
||||
if re_result:
|
||||
charset = re_result.group(1).decode()
|
||||
else:
|
||||
charset = response.apparent_encoding
|
||||
|
||||
response.encoding = charset
|
||||
|
||||
return response
|
@ -8,11 +8,11 @@ from typing import Any, Union, Tuple, List
|
||||
from requests import Session, Response
|
||||
from requests.structures import CaseInsensitiveDict
|
||||
|
||||
from .base import BasePage
|
||||
from .commons.constants import NoneElement
|
||||
from .configs.session_options import SessionOptions
|
||||
from .session_element import SessionElement
|
||||
from .setter import SessionPageSetter
|
||||
from DrissionPage._base.base import BasePage
|
||||
from DrissionPage._commons.constants import NoneElement
|
||||
from DrissionPage._configs.session_options import SessionOptions
|
||||
from DrissionPage._elements.session_element import SessionElement
|
||||
from DrissionPage._units.setter import SessionPageSetter
|
||||
|
||||
|
||||
class SessionPage(BasePage):
|
@ -3,13 +3,13 @@
|
||||
@Author : g1879
|
||||
@Contact : g1879@qq.com
|
||||
"""
|
||||
from .base import BasePage
|
||||
from .chromium_page import ChromiumPage
|
||||
from .chromium_tab import WebPageTab
|
||||
from .commons.web import set_session_cookies, set_browser_cookies
|
||||
from .configs.chromium_options import ChromiumOptions
|
||||
from DrissionPage._base.base import BasePage
|
||||
from DrissionPage._commons.web import set_session_cookies, set_browser_cookies
|
||||
from DrissionPage._configs.chromium_options import ChromiumOptions
|
||||
from DrissionPage._pages.chromium_page import ChromiumPage
|
||||
from DrissionPage._pages.chromium_tab import WebPageTab
|
||||
from DrissionPage._units.setter import WebPageSetter
|
||||
from .session_page import SessionPage
|
||||
from .setter import WebPageSetter
|
||||
|
||||
|
||||
class WebPage(SessionPage, ChromiumPage, BasePage):
|
@ -5,20 +5,19 @@
|
||||
"""
|
||||
from typing import Union, Tuple, List, Any
|
||||
|
||||
from DownloadKit import DownloadKit
|
||||
from requests import Session, Response
|
||||
|
||||
from .base import BasePage
|
||||
from .chromium_driver import ChromiumDriver
|
||||
from .chromium_element import ChromiumElement
|
||||
from ._base import BasePage
|
||||
from ._chromium_driver import ChromiumDriver
|
||||
from ._chromium_element import ChromiumElement
|
||||
from .chromium_frame import ChromiumFrame
|
||||
from .chromium_page import ChromiumPage
|
||||
from .chromium_tab import WebPageTab
|
||||
from .configs.chromium_options import ChromiumOptions
|
||||
from .configs.session_options import SessionOptions
|
||||
from .session_element import SessionElement
|
||||
from ._configs.chromium_options import ChromiumOptions
|
||||
from ._configs.session_options import SessionOptions
|
||||
from ._session_element import SessionElement
|
||||
from .session_page import SessionPage
|
||||
from .setter import WebPageSetter
|
||||
from ._units.setter import WebPageSetter
|
||||
|
||||
|
||||
class WebPage(SessionPage, ChromiumPage, BasePage):
|
@ -5,8 +5,8 @@
|
||||
"""
|
||||
from time import sleep
|
||||
|
||||
from .commons.keys import modifierBit, keyDescriptionForString
|
||||
from .commons.web import location_in_viewport
|
||||
from DrissionPage._commons.keys import modifierBit, keyDescriptionForString
|
||||
from DrissionPage._commons.web import location_in_viewport
|
||||
|
||||
|
||||
class ActionChains:
|
@ -5,10 +5,9 @@
|
||||
"""
|
||||
from typing import Union, Tuple
|
||||
|
||||
from .chromium_base import ChromiumBase
|
||||
from .chromium_driver import ChromiumDriver
|
||||
from .chromium_element import ChromiumElement
|
||||
from .chromium_page import ChromiumPage
|
||||
from DrissionPage._base.chromium_driver import ChromiumDriver
|
||||
from DrissionPage._elements.chromium_element import ChromiumElement
|
||||
from DrissionPage._pages.chromium_base import ChromiumBase
|
||||
|
||||
|
||||
class ActionChains:
|
@ -4,7 +4,7 @@ from pathlib import Path
|
||||
from shutil import move
|
||||
from time import sleep, perf_counter
|
||||
|
||||
from .commons.tools import get_usable_path
|
||||
from DrissionPage._commons.tools import get_usable_path
|
||||
|
||||
|
||||
class BrowserDownloadManager(object):
|
@ -1,8 +1,8 @@
|
||||
from pathlib import Path
|
||||
from typing import Dict, Optional, Union
|
||||
|
||||
from .browser import Browser
|
||||
from .chromium_page import ChromiumPage
|
||||
from DrissionPage._base.browser import Browser
|
||||
from DrissionPage._pages.chromium_page import ChromiumPage
|
||||
|
||||
|
||||
class BrowserDownloadManager(object):
|
@ -1,4 +1,8 @@
|
||||
# -*- coding:utf-8 -*-
|
||||
"""
|
||||
@Author : g1879
|
||||
@Contact : g1879@qq.com
|
||||
"""
|
||||
from base64 import b64decode
|
||||
from json import JSONDecodeError, loads
|
||||
from queue import Queue
|
||||
@ -8,7 +12,7 @@ from time import perf_counter, sleep
|
||||
|
||||
from requests.structures import CaseInsensitiveDict
|
||||
|
||||
from .errors import CDPError
|
||||
from DrissionPage.errors import CDPError
|
||||
|
||||
|
||||
class NetworkListener(object):
|
@ -1,10 +1,15 @@
|
||||
# -*- coding:utf-8 -*-
|
||||
"""
|
||||
@Author : g1879
|
||||
@Contact : g1879@qq.com
|
||||
"""
|
||||
from queue import Queue
|
||||
from typing import Union, Dict, List, Iterable, Tuple
|
||||
|
||||
from requests.structures import CaseInsensitiveDict
|
||||
|
||||
from chromium_base import ChromiumBase
|
||||
from chromium_driver import ChromiumDriver
|
||||
from DrissionPage._base.chromium_driver import ChromiumDriver
|
||||
from DrissionPage._pages.chromium_base import ChromiumBase
|
||||
|
||||
|
||||
class NetworkListener(object):
|
@ -7,8 +7,8 @@ from pathlib import Path
|
||||
|
||||
from requests.structures import CaseInsensitiveDict
|
||||
|
||||
from .commons.tools import show_or_hide_browser
|
||||
from .commons.web import set_browser_cookies, set_session_cookies
|
||||
from DrissionPage._commons.tools import show_or_hide_browser
|
||||
from DrissionPage._commons.web import set_browser_cookies, set_session_cookies
|
||||
|
||||
|
||||
class ChromiumBaseSetter(object):
|
@ -11,13 +11,13 @@ from requests.adapters import HTTPAdapter
|
||||
from requests.auth import HTTPBasicAuth
|
||||
from requests.cookies import RequestsCookieJar
|
||||
|
||||
from .chromium_base import ChromiumBase, ChromiumPageScroll
|
||||
from .chromium_element import ChromiumElement
|
||||
from .chromium_frame import ChromiumFrame
|
||||
from .chromium_page import ChromiumPage
|
||||
from .chromium_tab import ChromiumTab
|
||||
from .session_page import SessionPage
|
||||
from .web_page import WebPage
|
||||
from DrissionPage._elements.chromium_element import ChromiumElement
|
||||
from DrissionPage._pages.chromium_base import ChromiumBase, ChromiumPageScroll
|
||||
from DrissionPage._pages.chromium_frame import ChromiumFrame
|
||||
from DrissionPage._pages.chromium_page import ChromiumPage
|
||||
from DrissionPage._pages.chromium_tab import ChromiumTab
|
||||
from DrissionPage._pages.session_page import SessionPage
|
||||
from DrissionPage._pages.web_page import WebPage
|
||||
|
||||
FILE_EXISTS = Literal['skip', 'rename', 'overwrite', 's', 'r', 'o']
|
||||
|
@ -1,8 +1,8 @@
|
||||
# -*- coding:utf-8 -*-
|
||||
from time import sleep, perf_counter
|
||||
|
||||
from .commons.constants import Settings
|
||||
from .errors import WaitTimeoutError
|
||||
from DrissionPage._commons.constants import Settings
|
||||
from DrissionPage.errors import WaitTimeoutError
|
||||
|
||||
|
||||
class ChromiumBaseWaiter(object):
|
@ -5,11 +5,11 @@
|
||||
"""
|
||||
from typing import Union
|
||||
|
||||
from DrissionPage._elements.chromium_element import ChromiumElement
|
||||
from DrissionPage._pages.chromium_base import ChromiumBase
|
||||
from DrissionPage._pages.chromium_frame import ChromiumFrame
|
||||
from DrissionPage._pages.chromium_page import ChromiumPage
|
||||
from .browser_download_manager import DownloadMission
|
||||
from .chromium_base import ChromiumBase
|
||||
from .chromium_element import ChromiumElement
|
||||
from .chromium_frame import ChromiumFrame
|
||||
from .chromium_page import ChromiumPage
|
||||
|
||||
|
||||
class ChromiumBaseWaiter(object):
|
@ -6,9 +6,9 @@
|
||||
"""
|
||||
from FlowViewer import Listener, RequestMan
|
||||
|
||||
from .session_element import make_session_ele
|
||||
from ._elements.session_element import make_session_ele
|
||||
|
||||
from .action_chains import ActionChains
|
||||
from .commons.keys import Keys
|
||||
from .commons.by import By
|
||||
from .commons.constants import Settings
|
||||
from ._units.action_chains import ActionChains
|
||||
from ._commons.keys import Keys
|
||||
from ._commons.by import By
|
||||
from ._commons.constants import Settings
|
||||
|
@ -1,7 +1,10 @@
|
||||
# -*- coding:utf-8 -*-
|
||||
from .session_element import make_session_ele as make_session_ele
|
||||
|
||||
from .action_chains import ActionChains as ActionChains
|
||||
from .commons.keys import Keys as Keys
|
||||
from .commons.by import By as By
|
||||
from .commons.constants import Settings as Settings
|
||||
"""
|
||||
@Author : g1879
|
||||
@Contact : g1879@qq.com
|
||||
"""
|
||||
from ._commons.by import By as By
|
||||
from ._commons.constants import Settings as Settings
|
||||
from ._commons.keys import Keys as Keys
|
||||
from ._elements.session_element import make_session_ele as make_session_ele
|
||||
from ._units.action_chains import ActionChains as ActionChains
|
||||
|
@ -7,8 +7,8 @@ from os import popen
|
||||
from pathlib import Path
|
||||
from re import search
|
||||
|
||||
from .configs.chromium_options import ChromiumOptions
|
||||
from .configs.options_manage import OptionsManager
|
||||
from ._configs.chromium_options import ChromiumOptions
|
||||
from ._configs.options_manage import OptionsManager
|
||||
|
||||
|
||||
def configs_to_here(save_name=None):
|
||||
|
@ -1,4 +1,8 @@
|
||||
# -*- coding:utf-8 -*-
|
||||
"""
|
||||
@Author : g1879
|
||||
@Contact : g1879@qq.com
|
||||
"""
|
||||
|
||||
|
||||
class BaseError(Exception):
|
||||
|
Loading…
x
Reference in New Issue
Block a user