修复一个当网站headers不规范时获取不到编码的问题;修复一个监听时可能出现的问题

This commit is contained in:
g1879 2023-07-27 15:43:46 +08:00
parent 8f33a9241e
commit e46f068218
2 changed files with 7 additions and 5 deletions

View File

@ -1140,7 +1140,8 @@ class NetworkListener(object):
def _loading_finished(self, **kwargs):
"""请求完成时处理方法"""
request_id = kwargs['requestId']
if request_id in self._requests:
request = self._requests.get(request_id)
if request:
try:
r = self._page.run_cdp('Network.getResponseBody', requestId=request_id)
body = r['body']
@ -1149,7 +1150,6 @@ class NetworkListener(object):
body = ''
is_base64 = False
request = self._requests[request_id]
target = request['target']
rd = ResponseData(request_id, request['response'], body, self._page.tab_id, target)
rd.method = request['method']

View File

@ -8,7 +8,7 @@ from time import sleep
from urllib.parse import urlparse
from DownloadKit import DownloadKit
from requests import Session, Response
from requests import Session
from requests.structures import CaseInsensitiveDict
from tldextract import extract
@ -507,15 +507,17 @@ class FileExists(object):
self._setter.DownloadKit._file_exists = 'overwrite'
def check_headers(kwargs, headers, arg) -> bool:
def check_headers(kwargs, headers, arg):
"""检查kwargs或headers中是否有arg所示属性"""
return arg in kwargs['headers'] or arg in headers
def set_charset(response) -> Response:
def set_charset(response):
"""设置Response对象的编码"""
# 在headers中获取编码
content_type = response.headers.get('content-type', '').lower()
if not content_type.endswith(';'):
content_type += ';'
charset = search(r'charset[=: ]*(.*)?;', content_type)
if charset: