attr('href')增加处理javascript、mailto、#功能

This commit is contained in:
g1879 2020-06-02 23:11:19 +08:00
parent 7d02f81d7e
commit 6d5b9b019d

View File

@ -106,12 +106,20 @@ class SessionElement(DrissionElement):
if attr == 'href':
# 如直接获取attr只能获取相对地址
link = self._inner_ele.attrs['href']
if link.startswith('?'): # 避免当相对URL以?开头时requests-html丢失参数的bug
if link.startswith(('javascript:', 'mailto:')):
return link
elif link.startswith('#'):
if '#' in self.inner_ele.url:
return re.sub(r'#.*', link, self.inner_ele.url)
else:
return f'{self.inner_ele.url}{link}'
elif link.startswith('?'): # 避免当相对URL以?开头时requests-html丢失参数的bug
if '?' in self.inner_ele.url:
return re.sub(r'\?.*', link, self.inner_ele.url)
else:
return f'{self.inner_ele.url}{link}'
else:
self._inner_ele.skip_anchors = False
for link in self._inner_ele.absolute_links:
return link
elif attr == 'src':