mirror of
https://gitee.com/g1879/DrissionPage.git
synced 2024-12-10 04:00:23 +08:00
修复获取编码可能出错的bug
This commit is contained in:
parent
6c35dad79d
commit
798d3e771a
@ -433,26 +433,34 @@ class SessionPage(object):
|
|||||||
return None, e
|
return None, e
|
||||||
|
|
||||||
else:
|
else:
|
||||||
|
# -------------获取编码开始----------------
|
||||||
headers = dict(r.headers)
|
headers = dict(r.headers)
|
||||||
content_type = tuple(x for x in headers if x.lower() == 'content-type')
|
content_type = tuple(x for x in headers if x.lower() == 'content-type')
|
||||||
stream = tuple(x for x in kwargs if x.lower() == 'stream')
|
stream = tuple(x for x in kwargs if x.lower() == 'stream')
|
||||||
not_stream = (not stream or not kwargs[stream[0]]) and not self.session.stream
|
not_stream = (not stream or not kwargs[stream[0]]) and not self.session.stream
|
||||||
charset = None
|
charset = None
|
||||||
|
|
||||||
|
# 若headers中没有编码信息,从页面meta标签提取,若失败,用apparent_encoding
|
||||||
if not content_type or 'charset' not in headers[content_type[0]].lower():
|
if not content_type or 'charset' not in headers[content_type[0]].lower():
|
||||||
|
|
||||||
|
# 表示是网页,非下载文件
|
||||||
if not_stream:
|
if not_stream:
|
||||||
re_result = re_SEARCH(r'<meta.*?charset=[ \'"]*([^"\' />]+).*?>',
|
re_result = re_SEARCH(b'<meta.*?charset=[ \\\'"]*([^"\\\' />]+).*?>', r.content)
|
||||||
r.iter_content(chunk_size=512).__next__().decode())
|
|
||||||
try:
|
try:
|
||||||
charset = re_result.group(1)
|
charset = re_result.group(1).decode()
|
||||||
except:
|
except:
|
||||||
charset = r.apparent_encoding
|
charset = r.apparent_encoding
|
||||||
|
|
||||||
|
# 在headers中获取编码
|
||||||
else:
|
else:
|
||||||
charset = headers[content_type[0]].split('=')[1]
|
charset = headers[content_type[0]].split('=')[1]
|
||||||
|
# -------------获取编码结束----------------
|
||||||
|
|
||||||
if charset: # 指定网页编码
|
if charset: # 指定网页编码
|
||||||
r.encoding = charset
|
r.encoding = charset
|
||||||
|
|
||||||
if not_stream: # 避免存在退格符导致乱码或解析出错
|
# if not_stream: # 避免存在退格符导致乱码或解析出错
|
||||||
r._content = r.content.replace(b'\x08', b'\\b')
|
# r._content = r.content.replace(b'\x08', b'\\b')
|
||||||
|
|
||||||
return r, 'Success'
|
return r, 'Success'
|
||||||
|
Loading…
x
Reference in New Issue
Block a user