diff --git a/DrissionPage/_configs/session_options.py b/DrissionPage/_configs/session_options.py index cd710f6..c702991 100644 --- a/DrissionPage/_configs/session_options.py +++ b/DrissionPage/_configs/session_options.py @@ -378,32 +378,30 @@ class SessionOptions(object): return session_options_to_dict(self) def make_session(self): - """根据内在的配置生成Session对象""" + """根据内在的配置生成Session对象,ua从对象中分离""" s = Session() + h = CaseInsensitiveDict(self.headers) if self.headers else CaseInsensitiveDict() - if self.headers: - s.headers = CaseInsensitiveDict(self.headers) if self.cookies: set_session_cookies(s, self.cookies) if self.adapters: for url, adapter in self.adapters: s.mount(url, adapter) - attrs = ['auth', 'proxies', 'hooks', 'params', 'verify', - 'cert', 'stream', 'trust_env', 'max_redirects'] - for i in attrs: + for i in ['auth', 'proxies', 'hooks', 'params', 'verify', 'cert', 'stream', 'trust_env', 'max_redirects']: attr = self.__getattribute__(i) if attr: s.__setattr__(i, attr) - return s + return s, h - def from_session(self, session): + def from_session(self, session, headers=None): """从Session对象中读取配置 :param session: Session对象 + :param headers: headers :return: 当前对象 """ - self._headers = session.headers + self._headers = CaseInsensitiveDict(**session.headers, **headers) if headers else session.headers self._cookies = session.cookies self._auth = session.auth self._proxies = session.proxies diff --git a/DrissionPage/_configs/session_options.pyi b/DrissionPage/_configs/session_options.pyi index ed3acf8..e4fc4cf 100644 --- a/DrissionPage/_configs/session_options.pyi +++ b/DrissionPage/_configs/session_options.pyi @@ -4,12 +4,13 @@ @Contact : g1879@qq.com """ from pathlib import Path -from typing import Any, Union, Tuple +from typing import Any, Union, Tuple, Optional from requests import Session from requests.adapters import HTTPAdapter from requests.auth import HTTPBasicAuth from requests.cookies import RequestsCookieJar +from requests.structures import CaseInsensitiveDict class SessionOptions(object): @@ -113,9 +114,9 @@ class SessionOptions(object): def as_dict(self) -> dict: ... - def make_session(self) -> Session: ... + def make_session(self) -> Tuple[Session, Optional[CaseInsensitiveDict]]: ... - def from_session(self, session: Session) -> SessionOptions: ... + def from_session(self, session: Session, headers: CaseInsensitiveDict = None) -> SessionOptions: ... def session_options_to_dict(options: Union[dict, SessionOptions, None]) -> Union[dict, None]: ... diff --git a/DrissionPage/_pages/chromium_tab.py b/DrissionPage/_pages/chromium_tab.py index 55227b8..fdd9a57 100644 --- a/DrissionPage/_pages/chromium_tab.py +++ b/DrissionPage/_pages/chromium_tab.py @@ -68,7 +68,8 @@ class WebPageTab(SessionPage, ChromiumTab, BasePage): self._mode = 'd' self._has_driver = True self._has_session = True - super().__init__(session_or_options=SessionOptions(read_file=False).from_session(copy(page.session))) + super().__init__(session_or_options=SessionOptions(read_file=False).from_session(copy(page.session), + page._headers)) super(SessionPage, self).__init__(page=page, tab_id=tab_id) def __call__(self, loc_or_str, timeout=None): @@ -111,6 +112,14 @@ class WebPageTab(SessionPage, ChromiumTab, BasePage): elif self._mode == 'd': return super(SessionPage, self).title + @property + def raw_data(self): + """返回页码原始数据数据""" + if self._mode == 's': + return super().raw_data + elif self._mode == 'd': + return super(SessionPage, self).html if self._has_driver else '' + @property def html(self): """返回页面html文本""" diff --git a/DrissionPage/_pages/chromium_tab.pyi b/DrissionPage/_pages/chromium_tab.pyi index 687a88c..d00e710 100644 --- a/DrissionPage/_pages/chromium_tab.pyi +++ b/DrissionPage/_pages/chromium_tab.pyi @@ -65,6 +65,9 @@ class WebPageTab(SessionPage, ChromiumTab): @property def title(self) -> str: ... + @property + def raw_data(self) -> Union[str, bytes]: ... + @property def html(self) -> str: ... diff --git a/DrissionPage/_pages/session_page.py b/DrissionPage/_pages/session_page.py index b39637c..811b44a 100644 --- a/DrissionPage/_pages/session_page.py +++ b/DrissionPage/_pages/session_page.py @@ -28,6 +28,7 @@ class SessionPage(BasePage): :param timeout: 连接超时时间,为None时从ini文件读取 """ super(SessionPage, SessionPage).__init__(self) + self._headers = None self._response = None self._session = None self._set = None @@ -58,7 +59,7 @@ class SessionPage(BasePage): def _create_session(self): """创建内建Session对象""" if not self._session: - self._session = self._session_options.make_session() + self._session, self._headers = self._session_options.make_session() def __call__(self, loc_or_str, timeout=None): """在内部查找元素 @@ -86,6 +87,11 @@ class SessionPage(BasePage): """返回当前访问url""" return self._url + @property + def raw_data(self): + """返回页面原始数据""" + return self.response.content if self.response else b'' + @property def html(self): """返回页面的html文本""" @@ -102,7 +108,7 @@ class SessionPage(BasePage): @property def user_agent(self): """返回user agent""" - return self.session.headers.get('user-agent', '') + return self._headers.get('user-agent', '') @property def session(self): @@ -269,6 +275,8 @@ class SessionPage(BasePage): if not check_headers(kwargs, self.session.headers, 'timeout'): kwargs['timeout'] = self.timeout + kwargs['headers'] = {**self._headers, **kwargs['headers']} + r = err = None retry = retry if retry is not None else self.retry_times interval = interval if interval is not None else self.retry_interval diff --git a/DrissionPage/_pages/session_page.pyi b/DrissionPage/_pages/session_page.pyi index 4ae4164..7a214f1 100644 --- a/DrissionPage/_pages/session_page.pyi +++ b/DrissionPage/_pages/session_page.pyi @@ -3,7 +3,7 @@ @Author : g1879 @Contact : g1879@qq.com """ -from typing import Any, Union, Tuple, List +from typing import Any, Union, Tuple, List, Optional from requests import Session, Response from requests.structures import CaseInsensitiveDict @@ -19,6 +19,7 @@ class SessionPage(BasePage): def __init__(self, session_or_options: Union[Session, SessionOptions] = None, timeout: float = None): + self._headers: Optional[CaseInsensitiveDict] = ... self._session: Session = ... self._session_options: SessionOptions = ... self._url: str = ... @@ -49,6 +50,9 @@ class SessionPage(BasePage): @property def _session_url(self) -> str: ... + @property + def raw_data(self) -> Union[str, bytes]: ... + @property def html(self) -> str: ... diff --git a/DrissionPage/_pages/web_page.py b/DrissionPage/_pages/web_page.py index a873b3a..98b1da2 100644 --- a/DrissionPage/_pages/web_page.py +++ b/DrissionPage/_pages/web_page.py @@ -77,6 +77,14 @@ class WebPage(SessionPage, ChromiumPage, BasePage): elif self._mode == 'd': return super(SessionPage, self).title + @property + def raw_data(self): + """返回页码原始数据数据""" + if self._mode == 's': + return super().raw_data + elif self._mode == 'd': + return super(SessionPage, self).html if self._has_driver else '' + @property def html(self): """返回页面html文本""" diff --git a/DrissionPage/_pages/web_page.pyi b/DrissionPage/_pages/web_page.pyi index 60159d4..8f461d0 100644 --- a/DrissionPage/_pages/web_page.pyi +++ b/DrissionPage/_pages/web_page.pyi @@ -47,6 +47,9 @@ class WebPage(SessionPage, ChromiumPage, BasePage): @property def title(self) -> str: ... + @property + def raw_data(self) -> Union[str, bytes]: ... + @property def html(self) -> str: ... diff --git a/DrissionPage/_units/setter.py b/DrissionPage/_units/setter.py index 644492c..da1d97b 100644 --- a/DrissionPage/_units/setter.py +++ b/DrissionPage/_units/setter.py @@ -244,7 +244,7 @@ class SessionPageSetter(object): :param headers: dict形式的headers :return: None """ - self._page.session.headers = CaseInsensitiveDict(headers) + self._page._headers = CaseInsensitiveDict(headers) def header(self, attr, value): """设置headers中一个项 @@ -252,14 +252,14 @@ class SessionPageSetter(object): :param value: 设置值 :return: None """ - self._page.session.headers[attr.lower()] = value + self._page._headers[attr] = value def user_agent(self, ua): """设置user agent :param ua: user agent :return: None """ - self._page.session.headers['user-agent'] = ua + self._page._headers['user-agent'] = ua def proxies(self, http=None, https=None): """设置proxies参数