From fe9b3085622e8d62db643ce6772b1f4ab9921317 Mon Sep 17 00:00:00 2001 From: g1879 Date: Mon, 8 Jun 2020 21:03:55 +0800 Subject: [PATCH] =?UTF-8?q?=E5=9C=A8requests=E7=9A=84=E6=BA=90=E7=A0=81?= =?UTF-8?q?=E5=88=A0=E9=99=A4\x08=EF=BC=88=E9=80=80=E6=A0=BC=EF=BC=89?= =?UTF-8?q?=E5=92=8C\x0D=EF=BC=88=E6=8D=A2=E8=A1=8C=EF=BC=89=E5=AD=97?= =?UTF-8?q?=E7=AC=A6=EF=BC=8C=E4=BB=A5=E4=BF=AE=E6=94=B9=E7=A7=81=E6=9C=89?= =?UTF-8?q?=E5=8F=98=E9=87=8F=E7=9A=84=E6=96=B9=E5=BC=8F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/session_page.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/DrissionPage/session_page.py b/DrissionPage/session_page.py index a742803..f3b5de3 100644 --- a/DrissionPage/session_page.py +++ b/DrissionPage/session_page.py @@ -18,6 +18,7 @@ from requests_html import HTMLSession, HTMLResponse from .common import get_loc_from_str, translate_loc_to_xpath, avoid_duplicate_name from .config import OptionsManager from .session_element import SessionElement, execute_session_find +from html import unescape class SessionPage(object): @@ -62,6 +63,7 @@ class SessionPage(object): @property def html(self) -> str: """获取元素innerHTML,如未指定元素则获取所有源代码""" + # return unescape(self.response.html.raw_html.replace(b'\x08', b'').decode()).replace('\xa0', ' ') return self.response.html.html def ele(self, loc_or_ele: Union[tuple, str, SessionElement], mode: str = None, show_errmsg: bool = False) \ @@ -229,6 +231,7 @@ class SessionPage(object): charset = 'utf-8' else: charset = headers['Content-Type'].split('=')[1] + r._content = r.content.replace(b'\x08', b'').replace(b'\x0d', b'') # TODO: 待测试 r.encoding = charset return_value = r return return_value