完善cookies域名相关逻辑,get_cookies()增加all_info参数,待测试

This commit is contained in:
g1879 2023-03-07 23:29:22 +08:00
parent 651a00e666
commit 34296c1aca
13 changed files with 85 additions and 45 deletions

View File

@ -349,7 +349,7 @@ class BasePage(BaseParser):
return
@abstractmethod
def get_cookies(self, as_dict=False):
def get_cookies(self, as_dict=False, all_info=False):
return {}
@abstractmethod

View File

@ -172,7 +172,7 @@ class BasePage(BaseParser):
def json(self) -> dict: ...
@abstractmethod
def get_cookies(self, as_dict: bool = False) -> Union[list, dict]: ...
def get_cookies(self, as_dict: bool = False, all_info: bool = False) -> Union[list, dict]: ...
@abstractmethod
def get(self,

View File

@ -429,16 +429,20 @@ class ChromiumBase(BasePage):
timeout=timeout)
return self._url_available
def get_cookies(self, as_dict=False):
def get_cookies(self, as_dict=False, all_info=False):
"""获取cookies信息
:param as_dict: 为True时返回由{name: value}键值对组成的dict
:param as_dict: 为True时返回由{name: value}键值对组成的dict为True时返回list且all_info无效
:param all_info: 是否返回所有信息为False时只返回namevaluedomain
:return: cookies信息
"""
cookies = self.run_cdp_loaded('Network.getCookies')['cookies']
if as_dict:
return {cookie['name']: cookie['value'] for cookie in cookies}
else:
elif all_info:
return cookies
else:
return [{'name': cookie['name'], 'value': cookie['value'], 'domain': cookie['domain']}
for cookie in cookies]
def ele(self, loc_or_ele, timeout=None):
"""获取第一个符合条件的元素对象

View File

@ -138,7 +138,7 @@ class ChromiumBase(BasePage):
interval: float = None,
timeout: float = None) -> Union[None, bool]: ...
def get_cookies(self, as_dict: bool = False) -> Union[list, dict]: ...
def get_cookies(self, as_dict: bool = False, all_info: bool = False) -> Union[list, dict]: ...
def ele(self,
loc_or_ele: Union[Tuple[str, str], str, ChromiumElement, ChromiumFrame],

View File

@ -558,7 +558,7 @@ class ChromiumDownloadSetter(DownloadSetter):
"""把driver对象的cookies复制到session对象"""
ua = self._page.run_cdp('Runtime.evaluate', expression='navigator.userAgent;')['result']['value']
self.session.headers.update({"User-Agent": ua})
set_session_cookies(self.session, self._page.get_cookies(as_dict=True))
set_session_cookies(self.session, self._page.get_cookies(as_dict=False, all_info=False))
def _download_by_DownloadKit(self, **kwargs):
"""拦截浏览器下载并用downloadKit下载"""

View File

@ -282,13 +282,16 @@ class WebPageTab(SessionPage, ChromiumTab):
selenium_user_agent = self.run_cdp('Runtime.evaluate', expression='navigator.userAgent;')['result']['value']
self.session.headers.update({"User-Agent": selenium_user_agent})
self.set.cookies(self._get_driver_cookies(as_dict=True), set_session=True)
self.set.cookies(self._get_driver_cookies(as_dict=False, all_info=False), set_session=True)
def cookies_to_browser(self):
"""把session对象的cookies复制到浏览器"""
netloc = urlparse(self.url).netloc
u = netloc.split('.')
domain = f'{u[-2]}.{u[-1]}' if len(u) > 1 else netloc
if netloc.replace('.', '').isdigit(): # ip
domain = netloc
else: # 域名
u = netloc.split('.')
domain = f'{u[-2]}.{u[-1]}' if len(u) > 1 else netloc
cookies = []
for cookie in super().get_cookies():
if not cookie.get('domain', None):
@ -298,27 +301,32 @@ class WebPageTab(SessionPage, ChromiumTab):
cookies.append(cookie)
self.set.cookies(cookies, set_driver=True)
def get_cookies(self, as_dict=False, all_domains=False):
def get_cookies(self, as_dict=False, all_domains=False, all_info=False):
"""返回cookies
:param as_dict: 是否以字典方式返回
:param all_domains: 是否返回所有域的cookies
:param all_info: 是否返回所有信息False则只返回namevaluedomain
:return: cookies信息
"""
if self._mode == 's':
return super().get_cookies(as_dict, all_domains)
return super().get_cookies(as_dict, all_domains, all_info)
elif self._mode == 'd':
return self._get_driver_cookies(as_dict)
return self._get_driver_cookies(as_dict, all_info)
def _get_driver_cookies(self, as_dict=False):
def _get_driver_cookies(self, as_dict=False, all_info=False):
"""获取浏览器cookies
:param as_dict: 以dict形式返回
:param as_dict: 是否以dict形式返回为True时all_info无效
:param all_info: 是否返回所有信息为False时只返回namevaluedomain
:return: cookies信息
"""
cookies = self.run_cdp('Network.getCookies')['cookies']
if as_dict:
return {cookie['name']: cookie['value'] for cookie in cookies}
else:
elif all_info:
return cookies
else:
return [{'name': cookie['name'], 'value': cookie['value'], 'domain': cookie['domain']}
for cookie in cookies]
def _find_elements(self, loc_or_ele, timeout=None, single=True, relative=False, raise_err=None):
"""返回页面中符合条件的元素、属性或节点文本,默认返回第一个
@ -385,5 +393,5 @@ class WebPageTabDownloadSetter(DownloadSetter):
if self._page.mode == 'd':
ua = self._page.run_cdp('Runtime.evaluate', expression='navigator.userAgent;')['result']['value']
self._page.session.headers.update({"User-Agent": ua})
set_session_cookies(self._page.session, self._page.get_cookies(as_dict=True))
set_session_cookies(self._page.session, self._page.get_cookies(as_dict=False, all_domains=False))
return self.DownloadKit

View File

@ -110,9 +110,10 @@ class WebPageTab(SessionPage, ChromiumTab):
def cookies_to_browser(self) -> None: ...
def get_cookies(self, as_dict: bool = False, all_domains: bool = False) -> Union[dict, list]: ...
def get_cookies(self, as_dict: bool = False, all_domains: bool = False,
all_info: bool = False) -> Union[dict, list]: ...
def _get_driver_cookies(self, as_dict: bool = False) -> dict: ...
def _get_driver_cookies(self, as_dict: bool = False, all_info: bool = False) -> dict: ...
# ----------------重写SessionPage的函数-----------------------
def post(self,

View File

@ -268,8 +268,11 @@ class Drission(object):
if not cookie.get('domain', None):
if browser_domain:
u = browser_domain.split('.')
cookie_domain = f'{u[-2]}.{u[-1]}' if len(u) > 1 else browser_domain
if browser_domain.replace('.', '').isdigit(): # ip
cookie_domain = browser_domain
else: # 域名
u = browser_domain.split('.')
cookie_domain = f'{u[-2]}.{u[-1]}' if len(u) > 1 else browser_domain
else:
raise ValueError('cookie中没有域名或浏览器未访问过URL。')
@ -329,8 +332,11 @@ class Drission(object):
if ex_url not in browser_domain:
self.driver.get(url)
u = ex_url.split('.')
domain = f'{u[-2]}.{u[-1]}' if len(u) > 1 else ex_url
if ex_url.replace('.', '').isdigit(): # ip
domain = ex_url
else: # 域名
u = ex_url.split('.')
domain = f'{u[-2]}.{u[-1]}' if len(u) > 1 else ex_url
cookies = []
for cookie in self.session.cookies:

View File

@ -211,8 +211,11 @@ class SessionPage(BasePage):
else:
if self.url:
netloc = urlparse(self.url).netloc
u = netloc.split('.')
domain = f'{u[-2]}.{u[-1]}' if len(u) > 1 else netloc
if netloc.replace('.', '').isdigit(): # ip
domain = netloc
else: # 域名
u = netloc.split('.')
domain = f'{u[-2]}.{u[-1]}' if len(u) > 1 else netloc
cookies = tuple(x for x in self.session.cookies if domain in x.domain or x.domain == '')
else:
cookies = tuple(x for x in self.session.cookies)

View File

@ -179,10 +179,11 @@ class SessionPage(BasePage):
"""
return loc_or_ele if isinstance(loc_or_ele, SessionElement) else make_session_ele(self, loc_or_ele, single)
def get_cookies(self, as_dict=False, all_domains=False):
def get_cookies(self, as_dict=False, all_domains=False, all_info=False):
"""返回cookies
:param as_dict: 是否以字典方式返回
:param as_dict: 是否以字典方式返回False则以list返回
:param all_domains: 是否返回所有域的cookies
:param all_info: 是否返回所有信息False则只返回namevaluedomain
:return: cookies信息
"""
if all_domains:
@ -190,16 +191,25 @@ class SessionPage(BasePage):
else:
if self.url:
netloc = urlparse(self.url).netloc
u = netloc.split('.')
domain = f'{u[-2]}.{u[-1]}' if len(u) > 1 else netloc
if netloc.replace('.', '').isdigit(): # ip
domain = netloc
else: # 域名
u = netloc.split('.')
domain = f'{u[-2]}.{u[-1]}' if len(u) > 1 else netloc
cookies = tuple(x for x in self.session.cookies if domain in x.domain or x.domain == '')
else:
cookies = tuple(x for x in self.session.cookies)
if as_dict:
return {x.name: x.value for x in cookies}
else:
elif all_info:
return [cookie_to_dict(cookie) for cookie in cookies]
else:
r = []
for c in cookies:
c = cookie_to_dict(c)
r.append({'name': c['name'], 'value': c['value'], 'domain': c['domain']})
return r
def post(self, url, data=None, show_errmsg=False, retry=None, interval=None, **kwargs):
"""用post方式跳转到url

View File

@ -104,9 +104,8 @@ class SessionPage(BasePage):
timeout: float = None, single: bool = True, raise_err: bool = None) \
-> Union[SessionElement, str, NoneElement, List[Union[SessionElement, str]]]: ...
def get_cookies(self,
as_dict: bool = False,
all_domains: bool = False) -> Union[dict, list]: ...
def get_cookies(self, as_dict: bool = False, all_domains: bool = False,
all_info: bool = False) -> Union[dict, list]: ...
# ----------------session独有属性和方法-----------------------
@property

View File

@ -356,13 +356,16 @@ class WebPage(SessionPage, ChromiumPage, BasePage):
selenium_user_agent = self.run_cdp('Runtime.evaluate', expression='navigator.userAgent;')['result']['value']
self.session.headers.update({"User-Agent": selenium_user_agent})
self.set.cookies(self._get_driver_cookies(as_dict=True), set_session=True)
self.set.cookies(self._get_driver_cookies(as_dict=False, all_info=False), set_session=True)
def cookies_to_browser(self):
"""把session对象的cookies复制到浏览器"""
netloc = urlparse(self.url).netloc
u = netloc.split('.')
domain = f'{u[-2]}.{u[-1]}' if len(u) > 1 else netloc
if netloc.replace('.', '').isdigit(): # ip
domain = netloc
else: # 域名
u = netloc.split('.')
domain = f'{u[-2]}.{u[-1]}' if len(u) > 1 else netloc
cookies = []
for cookie in super().get_cookies():
if not cookie.get('domain', None):
@ -372,16 +375,17 @@ class WebPage(SessionPage, ChromiumPage, BasePage):
cookies.append(cookie)
self.set.cookies(cookies, set_driver=True)
def get_cookies(self, as_dict=False, all_domains=False):
def get_cookies(self, as_dict=False, all_domains=False, all_info=False):
"""返回cookies
:param as_dict: 是否以字典方式返回
:param as_dict: 是否以字典方式返回False以list形式返回
:param all_domains: 是否返回所有域的cookies
:param all_info: 是否返回所有信息False则只返回namevaluedomain
:return: cookies信息
"""
if self._mode == 's':
return super().get_cookies(as_dict, all_domains)
return super().get_cookies(as_dict, all_domains, all_info)
elif self._mode == 'd':
return self._get_driver_cookies(as_dict)
return self._get_driver_cookies(as_dict, all_info)
def get_tab(self, tab_id=None):
"""获取一个标签页对象
@ -391,16 +395,20 @@ class WebPage(SessionPage, ChromiumPage, BasePage):
tab_id = tab_id or self.tab_id
return WebPageTab(self, tab_id)
def _get_driver_cookies(self, as_dict=False):
def _get_driver_cookies(self, as_dict=False, all_info=False):
"""获取浏览器cookies
:param as_dict: 以dict形式返回
:param as_dict: 是否以dict形式返回为True时all_info无效
:param all_info: 是否返回所有信息
:return: cookies信息
"""
cookies = self.run_cdp('Network.getCookies')['cookies']
if as_dict:
return {cookie['name']: cookie['value'] for cookie in cookies}
else:
elif all_info:
return cookies
else:
return [{'name': cookie['name'], 'value': cookie['value'], 'domain': cookie['domain']}
for cookie in cookies]
def close_driver(self):
"""关闭driver及浏览器"""

View File

@ -121,11 +121,12 @@ class WebPage(SessionPage, ChromiumPage, BasePage):
def cookies_to_browser(self) -> None: ...
def get_cookies(self, as_dict: bool = False, all_domains: bool = False) -> Union[dict, list]: ...
def get_cookies(self, as_dict: bool = False, all_domains: bool = False,
all_info: bool = False) -> Union[dict, list]: ...
def get_tab(self, tab_id: str = None) -> WebPageTab: ...
def _get_driver_cookies(self, as_dict: bool = False) -> dict: ...
def _get_driver_cookies(self, as_dict: bool = False, all_info: bool = False) -> dict: ...
def close_driver(self) -> None: ...