From 6d5b9b019d72240c5ccb9c5c9457a8e2c5886fb3 Mon Sep 17 00:00:00 2001 From: g1879 Date: Tue, 2 Jun 2020 23:11:19 +0800 Subject: [PATCH] =?UTF-8?q?attr('href')=E5=A2=9E=E5=8A=A0=E5=A4=84?= =?UTF-8?q?=E7=90=86javascript=E3=80=81mailto=E3=80=81#=E5=8A=9F=E8=83=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/session_element.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/DrissionPage/session_element.py b/DrissionPage/session_element.py index 18a5bd8..19eb603 100644 --- a/DrissionPage/session_element.py +++ b/DrissionPage/session_element.py @@ -106,12 +106,20 @@ class SessionElement(DrissionElement): if attr == 'href': # 如直接获取attr只能获取相对地址 link = self._inner_ele.attrs['href'] - if link.startswith('?'): # 避免当相对URL以?开头时requests-html丢失参数的bug + if link.startswith(('javascript:', 'mailto:')): + return link + elif link.startswith('#'): + if '#' in self.inner_ele.url: + return re.sub(r'#.*', link, self.inner_ele.url) + else: + return f'{self.inner_ele.url}{link}' + elif link.startswith('?'): # 避免当相对URL以?开头时requests-html丢失参数的bug if '?' in self.inner_ele.url: return re.sub(r'\?.*', link, self.inner_ele.url) else: return f'{self.inner_ele.url}{link}' else: + self._inner_ele.skip_anchors = False for link in self._inner_ele.absolute_links: return link elif attr == 'src':