修复一个当网站headers不规范时获取不到编码的问题

This commit is contained in:
g1879 2023-07-17 17:12:45 +08:00
parent b46b516b73
commit e1daebd350

View File

@ -7,7 +7,7 @@ from re import search
from time import sleep from time import sleep
from urllib.parse import urlparse from urllib.parse import urlparse
from requests import Session, Response from requests import Session
from requests.structures import CaseInsensitiveDict from requests.structures import CaseInsensitiveDict
from tldextract import extract from tldextract import extract
@ -305,15 +305,17 @@ class SessionPage(BasePage):
return r, f'状态码:{r.status_code}' return r, f'状态码:{r.status_code}'
def check_headers(kwargs, headers, arg) -> bool: def check_headers(kwargs, headers, arg):
"""检查kwargs或headers中是否有arg所示属性""" """检查kwargs或headers中是否有arg所示属性"""
return arg in kwargs['headers'] or arg in headers return arg in kwargs['headers'] or arg in headers
def set_charset(response) -> Response: def set_charset(response):
"""设置Response对象的编码""" """设置Response对象的编码"""
# 在headers中获取编码 # 在headers中获取编码
content_type = response.headers.get('content-type', '').lower() content_type = response.headers.get('content-type', '').lower()
if not content_type.endswith(';'):
content_type += ';'
charset = search(r'charset[=: ]*(.*)?;', content_type) charset = search(r'charset[=: ]*(.*)?;', content_type)
if charset: if charset: