优化SessionPage速度;页面对象增加raw_data

This commit is contained in:
g1879 2023-11-21 23:52:43 +08:00
parent 977242ad0a
commit bd18b8e427
9 changed files with 53 additions and 19 deletions

View File

@ -378,32 +378,30 @@ class SessionOptions(object):
return session_options_to_dict(self) return session_options_to_dict(self)
def make_session(self): def make_session(self):
"""根据内在的配置生成Session对象""" """根据内在的配置生成Session对象ua从对象中分离"""
s = Session() s = Session()
h = CaseInsensitiveDict(self.headers) if self.headers else CaseInsensitiveDict()
if self.headers:
s.headers = CaseInsensitiveDict(self.headers)
if self.cookies: if self.cookies:
set_session_cookies(s, self.cookies) set_session_cookies(s, self.cookies)
if self.adapters: if self.adapters:
for url, adapter in self.adapters: for url, adapter in self.adapters:
s.mount(url, adapter) s.mount(url, adapter)
attrs = ['auth', 'proxies', 'hooks', 'params', 'verify', for i in ['auth', 'proxies', 'hooks', 'params', 'verify', 'cert', 'stream', 'trust_env', 'max_redirects']:
'cert', 'stream', 'trust_env', 'max_redirects']
for i in attrs:
attr = self.__getattribute__(i) attr = self.__getattribute__(i)
if attr: if attr:
s.__setattr__(i, attr) s.__setattr__(i, attr)
return s return s, h
def from_session(self, session): def from_session(self, session, headers=None):
"""从Session对象中读取配置 """从Session对象中读取配置
:param session: Session对象 :param session: Session对象
:param headers: headers
:return: 当前对象 :return: 当前对象
""" """
self._headers = session.headers self._headers = CaseInsensitiveDict(**session.headers, **headers) if headers else session.headers
self._cookies = session.cookies self._cookies = session.cookies
self._auth = session.auth self._auth = session.auth
self._proxies = session.proxies self._proxies = session.proxies

View File

@ -4,12 +4,13 @@
@Contact : g1879@qq.com @Contact : g1879@qq.com
""" """
from pathlib import Path from pathlib import Path
from typing import Any, Union, Tuple from typing import Any, Union, Tuple, Optional
from requests import Session from requests import Session
from requests.adapters import HTTPAdapter from requests.adapters import HTTPAdapter
from requests.auth import HTTPBasicAuth from requests.auth import HTTPBasicAuth
from requests.cookies import RequestsCookieJar from requests.cookies import RequestsCookieJar
from requests.structures import CaseInsensitiveDict
class SessionOptions(object): class SessionOptions(object):
@ -113,9 +114,9 @@ class SessionOptions(object):
def as_dict(self) -> dict: ... def as_dict(self) -> dict: ...
def make_session(self) -> Session: ... def make_session(self) -> Tuple[Session, Optional[CaseInsensitiveDict]]: ...
def from_session(self, session: Session) -> SessionOptions: ... def from_session(self, session: Session, headers: CaseInsensitiveDict = None) -> SessionOptions: ...
def session_options_to_dict(options: Union[dict, SessionOptions, None]) -> Union[dict, None]: ... def session_options_to_dict(options: Union[dict, SessionOptions, None]) -> Union[dict, None]: ...

View File

@ -68,7 +68,8 @@ class WebPageTab(SessionPage, ChromiumTab, BasePage):
self._mode = 'd' self._mode = 'd'
self._has_driver = True self._has_driver = True
self._has_session = True self._has_session = True
super().__init__(session_or_options=SessionOptions(read_file=False).from_session(copy(page.session))) super().__init__(session_or_options=SessionOptions(read_file=False).from_session(copy(page.session),
page._headers))
super(SessionPage, self).__init__(page=page, tab_id=tab_id) super(SessionPage, self).__init__(page=page, tab_id=tab_id)
def __call__(self, loc_or_str, timeout=None): def __call__(self, loc_or_str, timeout=None):
@ -111,6 +112,14 @@ class WebPageTab(SessionPage, ChromiumTab, BasePage):
elif self._mode == 'd': elif self._mode == 'd':
return super(SessionPage, self).title return super(SessionPage, self).title
@property
def raw_data(self):
"""返回页码原始数据数据"""
if self._mode == 's':
return super().raw_data
elif self._mode == 'd':
return super(SessionPage, self).html if self._has_driver else ''
@property @property
def html(self): def html(self):
"""返回页面html文本""" """返回页面html文本"""

View File

@ -65,6 +65,9 @@ class WebPageTab(SessionPage, ChromiumTab):
@property @property
def title(self) -> str: ... def title(self) -> str: ...
@property
def raw_data(self) -> Union[str, bytes]: ...
@property @property
def html(self) -> str: ... def html(self) -> str: ...

View File

@ -28,6 +28,7 @@ class SessionPage(BasePage):
:param timeout: 连接超时时间为None时从ini文件读取 :param timeout: 连接超时时间为None时从ini文件读取
""" """
super(SessionPage, SessionPage).__init__(self) super(SessionPage, SessionPage).__init__(self)
self._headers = None
self._response = None self._response = None
self._session = None self._session = None
self._set = None self._set = None
@ -58,7 +59,7 @@ class SessionPage(BasePage):
def _create_session(self): def _create_session(self):
"""创建内建Session对象""" """创建内建Session对象"""
if not self._session: if not self._session:
self._session = self._session_options.make_session() self._session, self._headers = self._session_options.make_session()
def __call__(self, loc_or_str, timeout=None): def __call__(self, loc_or_str, timeout=None):
"""在内部查找元素 """在内部查找元素
@ -86,6 +87,11 @@ class SessionPage(BasePage):
"""返回当前访问url""" """返回当前访问url"""
return self._url return self._url
@property
def raw_data(self):
"""返回页面原始数据"""
return self.response.content if self.response else b''
@property @property
def html(self): def html(self):
"""返回页面的html文本""" """返回页面的html文本"""
@ -102,7 +108,7 @@ class SessionPage(BasePage):
@property @property
def user_agent(self): def user_agent(self):
"""返回user agent""" """返回user agent"""
return self.session.headers.get('user-agent', '') return self._headers.get('user-agent', '')
@property @property
def session(self): def session(self):
@ -269,6 +275,8 @@ class SessionPage(BasePage):
if not check_headers(kwargs, self.session.headers, 'timeout'): if not check_headers(kwargs, self.session.headers, 'timeout'):
kwargs['timeout'] = self.timeout kwargs['timeout'] = self.timeout
kwargs['headers'] = {**self._headers, **kwargs['headers']}
r = err = None r = err = None
retry = retry if retry is not None else self.retry_times retry = retry if retry is not None else self.retry_times
interval = interval if interval is not None else self.retry_interval interval = interval if interval is not None else self.retry_interval

View File

@ -3,7 +3,7 @@
@Author : g1879 @Author : g1879
@Contact : g1879@qq.com @Contact : g1879@qq.com
""" """
from typing import Any, Union, Tuple, List from typing import Any, Union, Tuple, List, Optional
from requests import Session, Response from requests import Session, Response
from requests.structures import CaseInsensitiveDict from requests.structures import CaseInsensitiveDict
@ -19,6 +19,7 @@ class SessionPage(BasePage):
def __init__(self, def __init__(self,
session_or_options: Union[Session, SessionOptions] = None, session_or_options: Union[Session, SessionOptions] = None,
timeout: float = None): timeout: float = None):
self._headers: Optional[CaseInsensitiveDict] = ...
self._session: Session = ... self._session: Session = ...
self._session_options: SessionOptions = ... self._session_options: SessionOptions = ...
self._url: str = ... self._url: str = ...
@ -49,6 +50,9 @@ class SessionPage(BasePage):
@property @property
def _session_url(self) -> str: ... def _session_url(self) -> str: ...
@property
def raw_data(self) -> Union[str, bytes]: ...
@property @property
def html(self) -> str: ... def html(self) -> str: ...

View File

@ -77,6 +77,14 @@ class WebPage(SessionPage, ChromiumPage, BasePage):
elif self._mode == 'd': elif self._mode == 'd':
return super(SessionPage, self).title return super(SessionPage, self).title
@property
def raw_data(self):
"""返回页码原始数据数据"""
if self._mode == 's':
return super().raw_data
elif self._mode == 'd':
return super(SessionPage, self).html if self._has_driver else ''
@property @property
def html(self): def html(self):
"""返回页面html文本""" """返回页面html文本"""

View File

@ -47,6 +47,9 @@ class WebPage(SessionPage, ChromiumPage, BasePage):
@property @property
def title(self) -> str: ... def title(self) -> str: ...
@property
def raw_data(self) -> Union[str, bytes]: ...
@property @property
def html(self) -> str: ... def html(self) -> str: ...

View File

@ -244,7 +244,7 @@ class SessionPageSetter(object):
:param headers: dict形式的headers :param headers: dict形式的headers
:return: None :return: None
""" """
self._page.session.headers = CaseInsensitiveDict(headers) self._page._headers = CaseInsensitiveDict(headers)
def header(self, attr, value): def header(self, attr, value):
"""设置headers中一个项 """设置headers中一个项
@ -252,14 +252,14 @@ class SessionPageSetter(object):
:param value: 设置值 :param value: 设置值
:return: None :return: None
""" """
self._page.session.headers[attr.lower()] = value self._page._headers[attr] = value
def user_agent(self, ua): def user_agent(self, ua):
"""设置user agent """设置user agent
:param ua: user agent :param ua: user agent
:return: None :return: None
""" """
self._page.session.headers['user-agent'] = ua self._page._headers['user-agent'] = ua
def proxies(self, http=None, https=None): def proxies(self, http=None, https=None):
"""设置proxies参数 """设置proxies参数