优化SessionPage速度;页面对象增加raw_data

This commit is contained in:
g1879 2023-11-21 23:52:43 +08:00
parent 977242ad0a
commit bd18b8e427
9 changed files with 53 additions and 19 deletions

View File

@ -378,32 +378,30 @@ class SessionOptions(object):
return session_options_to_dict(self)
def make_session(self):
"""根据内在的配置生成Session对象"""
"""根据内在的配置生成Session对象ua从对象中分离"""
s = Session()
h = CaseInsensitiveDict(self.headers) if self.headers else CaseInsensitiveDict()
if self.headers:
s.headers = CaseInsensitiveDict(self.headers)
if self.cookies:
set_session_cookies(s, self.cookies)
if self.adapters:
for url, adapter in self.adapters:
s.mount(url, adapter)
attrs = ['auth', 'proxies', 'hooks', 'params', 'verify',
'cert', 'stream', 'trust_env', 'max_redirects']
for i in attrs:
for i in ['auth', 'proxies', 'hooks', 'params', 'verify', 'cert', 'stream', 'trust_env', 'max_redirects']:
attr = self.__getattribute__(i)
if attr:
s.__setattr__(i, attr)
return s
return s, h
def from_session(self, session):
def from_session(self, session, headers=None):
"""从Session对象中读取配置
:param session: Session对象
:param headers: headers
:return: 当前对象
"""
self._headers = session.headers
self._headers = CaseInsensitiveDict(**session.headers, **headers) if headers else session.headers
self._cookies = session.cookies
self._auth = session.auth
self._proxies = session.proxies

View File

@ -4,12 +4,13 @@
@Contact : g1879@qq.com
"""
from pathlib import Path
from typing import Any, Union, Tuple
from typing import Any, Union, Tuple, Optional
from requests import Session
from requests.adapters import HTTPAdapter
from requests.auth import HTTPBasicAuth
from requests.cookies import RequestsCookieJar
from requests.structures import CaseInsensitiveDict
class SessionOptions(object):
@ -113,9 +114,9 @@ class SessionOptions(object):
def as_dict(self) -> dict: ...
def make_session(self) -> Session: ...
def make_session(self) -> Tuple[Session, Optional[CaseInsensitiveDict]]: ...
def from_session(self, session: Session) -> SessionOptions: ...
def from_session(self, session: Session, headers: CaseInsensitiveDict = None) -> SessionOptions: ...
def session_options_to_dict(options: Union[dict, SessionOptions, None]) -> Union[dict, None]: ...

View File

@ -68,7 +68,8 @@ class WebPageTab(SessionPage, ChromiumTab, BasePage):
self._mode = 'd'
self._has_driver = True
self._has_session = True
super().__init__(session_or_options=SessionOptions(read_file=False).from_session(copy(page.session)))
super().__init__(session_or_options=SessionOptions(read_file=False).from_session(copy(page.session),
page._headers))
super(SessionPage, self).__init__(page=page, tab_id=tab_id)
def __call__(self, loc_or_str, timeout=None):
@ -111,6 +112,14 @@ class WebPageTab(SessionPage, ChromiumTab, BasePage):
elif self._mode == 'd':
return super(SessionPage, self).title
@property
def raw_data(self):
"""返回页码原始数据数据"""
if self._mode == 's':
return super().raw_data
elif self._mode == 'd':
return super(SessionPage, self).html if self._has_driver else ''
@property
def html(self):
"""返回页面html文本"""

View File

@ -65,6 +65,9 @@ class WebPageTab(SessionPage, ChromiumTab):
@property
def title(self) -> str: ...
@property
def raw_data(self) -> Union[str, bytes]: ...
@property
def html(self) -> str: ...

View File

@ -28,6 +28,7 @@ class SessionPage(BasePage):
:param timeout: 连接超时时间为None时从ini文件读取
"""
super(SessionPage, SessionPage).__init__(self)
self._headers = None
self._response = None
self._session = None
self._set = None
@ -58,7 +59,7 @@ class SessionPage(BasePage):
def _create_session(self):
"""创建内建Session对象"""
if not self._session:
self._session = self._session_options.make_session()
self._session, self._headers = self._session_options.make_session()
def __call__(self, loc_or_str, timeout=None):
"""在内部查找元素
@ -86,6 +87,11 @@ class SessionPage(BasePage):
"""返回当前访问url"""
return self._url
@property
def raw_data(self):
"""返回页面原始数据"""
return self.response.content if self.response else b''
@property
def html(self):
"""返回页面的html文本"""
@ -102,7 +108,7 @@ class SessionPage(BasePage):
@property
def user_agent(self):
"""返回user agent"""
return self.session.headers.get('user-agent', '')
return self._headers.get('user-agent', '')
@property
def session(self):
@ -269,6 +275,8 @@ class SessionPage(BasePage):
if not check_headers(kwargs, self.session.headers, 'timeout'):
kwargs['timeout'] = self.timeout
kwargs['headers'] = {**self._headers, **kwargs['headers']}
r = err = None
retry = retry if retry is not None else self.retry_times
interval = interval if interval is not None else self.retry_interval

View File

@ -3,7 +3,7 @@
@Author : g1879
@Contact : g1879@qq.com
"""
from typing import Any, Union, Tuple, List
from typing import Any, Union, Tuple, List, Optional
from requests import Session, Response
from requests.structures import CaseInsensitiveDict
@ -19,6 +19,7 @@ class SessionPage(BasePage):
def __init__(self,
session_or_options: Union[Session, SessionOptions] = None,
timeout: float = None):
self._headers: Optional[CaseInsensitiveDict] = ...
self._session: Session = ...
self._session_options: SessionOptions = ...
self._url: str = ...
@ -49,6 +50,9 @@ class SessionPage(BasePage):
@property
def _session_url(self) -> str: ...
@property
def raw_data(self) -> Union[str, bytes]: ...
@property
def html(self) -> str: ...

View File

@ -77,6 +77,14 @@ class WebPage(SessionPage, ChromiumPage, BasePage):
elif self._mode == 'd':
return super(SessionPage, self).title
@property
def raw_data(self):
"""返回页码原始数据数据"""
if self._mode == 's':
return super().raw_data
elif self._mode == 'd':
return super(SessionPage, self).html if self._has_driver else ''
@property
def html(self):
"""返回页面html文本"""

View File

@ -47,6 +47,9 @@ class WebPage(SessionPage, ChromiumPage, BasePage):
@property
def title(self) -> str: ...
@property
def raw_data(self) -> Union[str, bytes]: ...
@property
def html(self) -> str: ...

View File

@ -244,7 +244,7 @@ class SessionPageSetter(object):
:param headers: dict形式的headers
:return: None
"""
self._page.session.headers = CaseInsensitiveDict(headers)
self._page._headers = CaseInsensitiveDict(headers)
def header(self, attr, value):
"""设置headers中一个项
@ -252,14 +252,14 @@ class SessionPageSetter(object):
:param value: 设置值
:return: None
"""
self._page.session.headers[attr.lower()] = value
self._page._headers[attr] = value
def user_agent(self, ua):
"""设置user agent
:param ua: user agent
:return: None
"""
self._page.session.headers['user-agent'] = ua
self._page._headers['user-agent'] = ua
def proxies(self, http=None, https=None):
"""设置proxies参数