mirror of
https://gitee.com/g1879/DrissionPage.git
synced 2024-12-10 04:00:23 +08:00
修改获取href的代码,避免requests-html丢失参数的bug,待测试
This commit is contained in:
parent
ec95443182
commit
d317e0330e
@ -11,10 +11,12 @@ from typing import Union, List
|
|||||||
from requests_html import Element, BaseParser
|
from requests_html import Element, BaseParser
|
||||||
|
|
||||||
from .common import DrissionElement, get_loc_from_str, translate_loc_to_xpath
|
from .common import DrissionElement, get_loc_from_str, translate_loc_to_xpath
|
||||||
|
from urllib.parse import urlparse, urljoin
|
||||||
|
|
||||||
|
|
||||||
class SessionElement(DrissionElement):
|
class SessionElement(DrissionElement):
|
||||||
"""session模式的元素对象,包装了一个Element对象,并封装了常用功能"""
|
"""session模式的元素对象,包装了一个Element对象,并封装了常用功能"""
|
||||||
|
|
||||||
def __init__(self, ele: Element):
|
def __init__(self, ele: Element):
|
||||||
super().__init__(ele)
|
super().__init__(ele)
|
||||||
|
|
||||||
@ -103,9 +105,15 @@ class SessionElement(DrissionElement):
|
|||||||
"""获取属性值"""
|
"""获取属性值"""
|
||||||
try:
|
try:
|
||||||
if attr == 'href':
|
if attr == 'href':
|
||||||
|
# TODO: 须测试
|
||||||
# 如直接获取attr只能获取相对地址
|
# 如直接获取attr只能获取相对地址
|
||||||
for link in self._inner_ele.absolute_links:
|
link = self._inner_ele.attrs['href']
|
||||||
return link
|
parsed = urlparse(link)
|
||||||
|
if not parsed.netloc:
|
||||||
|
return urljoin(self._inner_ele.url, link)
|
||||||
|
if not parsed.scheme:
|
||||||
|
return urljoin(urlparse(self._inner_ele.url).scheme, link)
|
||||||
|
return link
|
||||||
elif attr == 'class':
|
elif attr == 'class':
|
||||||
class_str = ''
|
class_str = ''
|
||||||
for key, i in enumerate(self._inner_ele.attrs['class']):
|
for key, i in enumerate(self._inner_ele.attrs['class']):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user