mirror of
https://gitee.com/g1879/DrissionPage.git
synced 2024-12-10 04:00:23 +08:00
当response的header没有charset时,尝试从meta获取
This commit is contained in:
parent
a31ed1d354
commit
1c823470bd
@ -5,6 +5,7 @@
|
||||
@File : session_page.py
|
||||
"""
|
||||
import os
|
||||
import re
|
||||
from pathlib import Path
|
||||
from random import random
|
||||
from time import time
|
||||
@ -196,7 +197,7 @@ class SessionPage(object):
|
||||
# 设置referer和host值
|
||||
if self._url:
|
||||
if 'headers' in set(x.lower() for x in kwargs):
|
||||
keys=set(x.lower() for x in kwargs['headers'])
|
||||
keys = set(x.lower() for x in kwargs['headers'])
|
||||
if 'referer' not in keys:
|
||||
kwargs['headers']['Referer'] = self._url
|
||||
if 'host' not in keys:
|
||||
@ -216,13 +217,14 @@ class SessionPage(object):
|
||||
return_value = False
|
||||
else:
|
||||
headers = dict(r.headers)
|
||||
if 'Content-Type' not in headers:
|
||||
charset = 'utf-8'
|
||||
else:
|
||||
if 'charset' not in headers['Content-Type']:
|
||||
if 'Content-Type' not in headers or 'charset' not in headers['Content-Type']:
|
||||
re_result = re.search(r'<meta.*?charset=([^"\']+)', r.text)
|
||||
try:
|
||||
charset = re_result.group(1)
|
||||
except:
|
||||
charset = 'utf-8'
|
||||
else:
|
||||
charset = headers['Content-Type'].split('=')[1]
|
||||
else:
|
||||
charset = headers['Content-Type'].split('=')[1]
|
||||
r.encoding = charset
|
||||
return_value = r
|
||||
return return_value
|
||||
|
Loading…
x
Reference in New Issue
Block a user