修复一个当网站headers不规范时获取不到编码的问题

This commit is contained in:
g1879 2023-07-17 17:12:45 +08:00
parent b46b516b73
commit e1daebd350

View File

@ -7,7 +7,7 @@ from re import search
from time import sleep
from urllib.parse import urlparse
from requests import Session, Response
from requests import Session
from requests.structures import CaseInsensitiveDict
from tldextract import extract
@ -305,15 +305,17 @@ class SessionPage(BasePage):
return r, f'状态码:{r.status_code}'
def check_headers(kwargs, headers, arg) -> bool:
def check_headers(kwargs, headers, arg):
"""检查kwargs或headers中是否有arg所示属性"""
return arg in kwargs['headers'] or arg in headers
def set_charset(response) -> Response:
def set_charset(response):
"""设置Response对象的编码"""
# 在headers中获取编码
content_type = response.headers.get('content-type', '').lower()
if not content_type.endswith(';'):
content_type += ';'
charset = search(r'charset[=: ]*(.*)?;', content_type)
if charset: