From e439c1f679011b8c5d4cacd5c15c121f77ec52b2 Mon Sep 17 00:00:00 2001 From: g1879 Date: Sat, 7 Nov 2020 15:46:35 +0800 Subject: [PATCH] =?UTF-8?q?SessionElement=E7=9A=84attr()=E5=88=A0=E9=99=A4?= =?UTF-8?q?=E5=AF=B9#=E5=92=8C=3F=E5=BC=80=E5=A4=B4=E7=9B=B8=E5=AF=B9?= =?UTF-8?q?=E8=B7=AF=E5=BE=84=E7=9A=84=E5=A4=84=E7=90=86?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/session_element.py | 25 ++++++++----------------- 1 file changed, 8 insertions(+), 17 deletions(-) diff --git a/DrissionPage/session_element.py b/DrissionPage/session_element.py index d3182e3..94f8084 100644 --- a/DrissionPage/session_element.py +++ b/DrissionPage/session_element.py @@ -231,23 +231,18 @@ class SessionElement(DrissionElement): :return: 属性值文本,没有该属性返回None """ try: + # 获取href属性时返回绝对url if attr == 'href': - # 如直接获取attr只能获取相对地址 link = self.inner_ele.get('href') + + # 若链接为js或邮件,直接返回 if link.lower().startswith(('javascript:', 'mailto:')): return link - elif link.startswith('#'): - if '#' in self.page.url: - return re.sub(r'#.*', link, self.page.url) - else: - return f'{self.page.url}{link}' - elif link.startswith('?'): # 避免当相对url以?开头时丢失参数的bug TODO:测试是否还存在 - if '?' in self.page.url: - return re.sub(r'\?.*', link, self.page.url) - else: - return f'{self.page.url}{link}' + + # 其它情况直接返回绝对url else: return self._make_absolute(link) + elif attr == 'src': return self._make_absolute(self.inner_ele.get('src')) elif attr == 'text': @@ -293,12 +288,13 @@ def execute_session_find(page_or_ele, if mode not in ['single', 'all']: raise ValueError(f"Argument mode can only be 'single' or 'all', not '{mode}'.") + # 根据传入对象类型获取页面对象和lxml元素对象 if isinstance(page_or_ele, SessionElement): page = page_or_ele.page page_or_ele = page_or_ele.inner_ele else: # 传入的是SessionPage对象 page = page_or_ele - page_or_ele = get_HtmlElement(page_or_ele.response.text) + page_or_ele = HTML(page_or_ele.response.text) try: # 用lxml内置方法获取lxml的元素对象列表 @@ -328,8 +324,3 @@ def execute_session_find(page_or_ele, except SelectorSyntaxError: raise SyntaxError('Invalid css selector syntax.', loc) - - -def get_HtmlElement(html: str) -> _Element: - """从html文本生成lxml的元素对象""" - return HTML(html)