diff --git a/DrissionPage/session_page.py b/DrissionPage/session_page.py
index f17e923..c406f93 100644
--- a/DrissionPage/session_page.py
+++ b/DrissionPage/session_page.py
@@ -433,26 +433,34 @@ class SessionPage(object):
return None, e
else:
+ # -------------获取编码开始----------------
headers = dict(r.headers)
content_type = tuple(x for x in headers if x.lower() == 'content-type')
stream = tuple(x for x in kwargs if x.lower() == 'stream')
not_stream = (not stream or not kwargs[stream[0]]) and not self.session.stream
charset = None
+
+ # 若headers中没有编码信息,从页面meta标签提取,若失败,用apparent_encoding
if not content_type or 'charset' not in headers[content_type[0]].lower():
+
+ # 表示是网页,非下载文件
if not_stream:
- re_result = re_SEARCH(r']+).*?>',
- r.iter_content(chunk_size=512).__next__().decode())
+ re_result = re_SEARCH(b']+).*?>', r.content)
+
try:
- charset = re_result.group(1)
+ charset = re_result.group(1).decode()
except:
charset = r.apparent_encoding
+
+ # 在headers中获取编码
else:
charset = headers[content_type[0]].split('=')[1]
+ # -------------获取编码结束----------------
if charset: # 指定网页编码
r.encoding = charset
- if not_stream: # 避免存在退格符导致乱码或解析出错
- r._content = r.content.replace(b'\x08', b'\\b')
+ # if not_stream: # 避免存在退格符导致乱码或解析出错
+ # r._content = r.content.replace(b'\x08', b'\\b')
return r, 'Success'