From d317e0330e9811f6fccc0a3e8398d8c3f6135335 Mon Sep 17 00:00:00 2001 From: g1879 Date: Mon, 1 Jun 2020 19:48:43 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E6=94=B9=E8=8E=B7=E5=8F=96href?= =?UTF-8?q?=E7=9A=84=E4=BB=A3=E7=A0=81=EF=BC=8C=E9=81=BF=E5=85=8Drequests-?= =?UTF-8?q?html=E4=B8=A2=E5=A4=B1=E5=8F=82=E6=95=B0=E7=9A=84bug=EF=BC=8C?= =?UTF-8?q?=E5=BE=85=E6=B5=8B=E8=AF=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/session_element.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/DrissionPage/session_element.py b/DrissionPage/session_element.py index 4444175..d04254f 100644 --- a/DrissionPage/session_element.py +++ b/DrissionPage/session_element.py @@ -11,10 +11,12 @@ from typing import Union, List from requests_html import Element, BaseParser from .common import DrissionElement, get_loc_from_str, translate_loc_to_xpath +from urllib.parse import urlparse, urljoin class SessionElement(DrissionElement): """session模式的元素对象,包装了一个Element对象,并封装了常用功能""" + def __init__(self, ele: Element): super().__init__(ele) @@ -103,9 +105,15 @@ class SessionElement(DrissionElement): """获取属性值""" try: if attr == 'href': + # TODO: 须测试 # 如直接获取attr只能获取相对地址 - for link in self._inner_ele.absolute_links: - return link + link = self._inner_ele.attrs['href'] + parsed = urlparse(link) + if not parsed.netloc: + return urljoin(self._inner_ele.url, link) + if not parsed.scheme: + return urljoin(urlparse(self._inner_ele.url).scheme, link) + return link elif attr == 'class': class_str = '' for key, i in enumerate(self._inner_ele.attrs['class']):