当response的header没有charset时,尝试从meta获取

This commit is contained in:
g1879 2020-06-02 00:10:55 +08:00
parent a31ed1d354
commit 1c823470bd

View File

@ -5,6 +5,7 @@
@File : session_page.py
"""
import os
import re
from pathlib import Path
from random import random
from time import time
@ -216,10 +217,11 @@ class SessionPage(object):
return_value = False
else:
headers = dict(r.headers)
if 'Content-Type' not in headers:
charset = 'utf-8'
else:
if 'charset' not in headers['Content-Type']:
if 'Content-Type' not in headers or 'charset' not in headers['Content-Type']:
re_result = re.search(r'<meta.*?charset=([^"\']+)', r.text)
try:
charset = re_result.group(1)
except:
charset = 'utf-8'
else:
charset = headers['Content-Type'].split('=')[1]