修复一个当网站headers不规范时获取不到编码的问题;修复一个监听时可能出现的问题

This commit is contained in:
g1879 2023-07-27 15:43:46 +08:00
parent 8f33a9241e
commit e46f068218
2 changed files with 7 additions and 5 deletions

View File

@ -1140,7 +1140,8 @@ class NetworkListener(object):
def _loading_finished(self, **kwargs): def _loading_finished(self, **kwargs):
"""请求完成时处理方法""" """请求完成时处理方法"""
request_id = kwargs['requestId'] request_id = kwargs['requestId']
if request_id in self._requests: request = self._requests.get(request_id)
if request:
try: try:
r = self._page.run_cdp('Network.getResponseBody', requestId=request_id) r = self._page.run_cdp('Network.getResponseBody', requestId=request_id)
body = r['body'] body = r['body']
@ -1149,7 +1150,6 @@ class NetworkListener(object):
body = '' body = ''
is_base64 = False is_base64 = False
request = self._requests[request_id]
target = request['target'] target = request['target']
rd = ResponseData(request_id, request['response'], body, self._page.tab_id, target) rd = ResponseData(request_id, request['response'], body, self._page.tab_id, target)
rd.method = request['method'] rd.method = request['method']

View File

@ -8,7 +8,7 @@ from time import sleep
from urllib.parse import urlparse from urllib.parse import urlparse
from DownloadKit import DownloadKit from DownloadKit import DownloadKit
from requests import Session, Response from requests import Session
from requests.structures import CaseInsensitiveDict from requests.structures import CaseInsensitiveDict
from tldextract import extract from tldextract import extract
@ -507,15 +507,17 @@ class FileExists(object):
self._setter.DownloadKit._file_exists = 'overwrite' self._setter.DownloadKit._file_exists = 'overwrite'
def check_headers(kwargs, headers, arg) -> bool: def check_headers(kwargs, headers, arg):
"""检查kwargs或headers中是否有arg所示属性""" """检查kwargs或headers中是否有arg所示属性"""
return arg in kwargs['headers'] or arg in headers return arg in kwargs['headers'] or arg in headers
def set_charset(response) -> Response: def set_charset(response):
"""设置Response对象的编码""" """设置Response对象的编码"""
# 在headers中获取编码 # 在headers中获取编码
content_type = response.headers.get('content-type', '').lower() content_type = response.headers.get('content-type', '').lower()
if not content_type.endswith(';'):
content_type += ';'
charset = search(r'charset[=: ]*(.*)?;', content_type) charset = search(r'charset[=: ]*(.*)?;', content_type)
if charset: if charset: