mirror of
https://gitee.com/g1879/DrissionPage.git
synced 2024-12-10 04:00:23 +08:00
弃用requests_html,未完成
This commit is contained in:
parent
41d700a3d9
commit
c9eae68acb
@ -113,21 +113,45 @@ class SessionElement(DrissionElement):
|
||||
"""
|
||||
return self.ele(f'xpath:..{"/.." * (num - 1)}')
|
||||
|
||||
def nexts(self, num: int = 1):
|
||||
def nexts(self, num: int = 1, mode: str = 'ele'):
|
||||
"""返回后面第num个兄弟元素 \n
|
||||
:param num: 后面第几个兄弟元素
|
||||
:param mode: 匹配元素还是节点
|
||||
:return: SessionElement对象
|
||||
"""
|
||||
# TODO: 增加获取node
|
||||
return self.ele(f'xpath:./following-sibling::*[{num}]')
|
||||
if mode == 'ele':
|
||||
node_txt = '*'
|
||||
elif mode == 'node':
|
||||
node_txt = 'node()'
|
||||
else:
|
||||
raise ValueError("Argument mode can only be 'node' or 'ele'.")
|
||||
|
||||
def prevs(self, num: int = 1):
|
||||
e = self.ele(f'xpath:./following-sibling::{node_txt}[{num}]', show_errmsg=False)
|
||||
while e == '\n':
|
||||
num += 1
|
||||
e = self.ele(f'xpath:./following-sibling::{node_txt}[{num}]', show_errmsg=False)
|
||||
|
||||
return e
|
||||
|
||||
def prevs(self, num: int = 1, mode: str = 'ele'):
|
||||
"""返回前面第num个兄弟元素 \n
|
||||
:param num: 前面第几个兄弟元素
|
||||
:param mode: 匹配元素还是节点
|
||||
:return: SessionElement对象
|
||||
"""
|
||||
# TODO: 增加获取node
|
||||
return self.ele(f'xpath:./preceding-sibling::*[{num}]')
|
||||
if mode == 'ele':
|
||||
node_txt = '*'
|
||||
elif mode == 'node':
|
||||
node_txt = 'node()'
|
||||
else:
|
||||
raise ValueError("Argument mode can only be 'node' or 'ele'.")
|
||||
|
||||
e = self.ele(f'xpath:./preceding-sibling::{node_txt}[{num}]', show_errmsg=False)
|
||||
while e == '\n':
|
||||
num += 1
|
||||
e = self.ele(f'xpath:./preceding-sibling::{node_txt}[{num}]', show_errmsg=False)
|
||||
|
||||
return e
|
||||
|
||||
def ele(self, loc_or_str: Union[Tuple[str, str], str], mode: str = None, show_errmsg: bool = False):
|
||||
"""返回当前元素下级符合条件的子元素,默认返回第一个 \n
|
||||
@ -207,44 +231,45 @@ class SessionElement(DrissionElement):
|
||||
"""
|
||||
return self.ele(loc_or_str, mode='all', show_errmsg=show_errmsg)
|
||||
|
||||
# def attr(self, attr: str) -> Union[str, None]:
|
||||
# """返回属性值 \n
|
||||
# :param attr: 属性名
|
||||
# :return: 属性值文本,没有该属性返回None
|
||||
# """
|
||||
# try:
|
||||
# if attr == 'href':
|
||||
# # 如直接获取attr只能获取相对地址
|
||||
# link = self._inner_ele.attrs['href']
|
||||
# if link.lower().startswith(('javascript:', 'mailto:')):
|
||||
# return link
|
||||
# elif link.startswith('#'):
|
||||
# if '#' in self.inner_ele.url:
|
||||
# return re.sub(r'#.*', link, self.inner_ele.url)
|
||||
# else:
|
||||
# return f'{self.inner_ele.url}{link}'
|
||||
# elif link.startswith('?'): # 避免当相对URL以?开头时requests-html丢失参数的bug
|
||||
# if '?' in self.inner_ele.url:
|
||||
# return re.sub(r'\?.*', link, self.inner_ele.url)
|
||||
# else:
|
||||
# return f'{self.inner_ele.url}{link}'
|
||||
# else:
|
||||
# for link in self._inner_ele.absolute_links:
|
||||
# return link
|
||||
# elif attr == 'src':
|
||||
# return self._inner_ele._make_absolute(self._inner_ele.attrs['src'])
|
||||
# elif attr == 'class':
|
||||
# return ' '.join(self._inner_ele.attrs['class'])
|
||||
# elif attr == 'text':
|
||||
# return self.text
|
||||
# elif attr == 'outerHTML':
|
||||
# return self.inner_ele.html
|
||||
# elif attr == 'innerHTML':
|
||||
# return self.html
|
||||
# else:
|
||||
# return self._inner_ele.attrs[attr]
|
||||
# except:
|
||||
# return None
|
||||
def attr(self, attr: str) -> Union[str, None]:
|
||||
"""返回属性值 \n
|
||||
:param attr: 属性名
|
||||
:return: 属性值文本,没有该属性返回None
|
||||
"""
|
||||
try:
|
||||
|
||||
if attr == 'href':
|
||||
# 如直接获取attr只能获取相对地址
|
||||
link = self.inner_ele.get('href')
|
||||
if link.lower().startswith(('javascript:', 'mailto:')):
|
||||
return link
|
||||
elif link.startswith('#'):
|
||||
if '#' in self.url:
|
||||
return re.sub(r'#.*', link, self.url)
|
||||
else:
|
||||
return f'{self.url}{link}'
|
||||
# elif link.startswith('?'): # 避免当相对URL以?开头时requests-html丢失参数的bug
|
||||
# if '?' in self.inner_ele.url:
|
||||
# return re.sub(r'\?.*', link, self.inner_ele.url)
|
||||
# else:
|
||||
# return f'{self.inner_ele.url}{link}'
|
||||
# else:
|
||||
# for link in self._inner_ele.absolute_links:
|
||||
# return link
|
||||
# elif attr == 'src':
|
||||
# return self._inner_ele._make_absolute(self._inner_ele.attrs['src'])
|
||||
# elif attr == 'class':
|
||||
# return ' '.join(self._inner_ele.attrs['class'])
|
||||
# elif attr == 'text':
|
||||
# return self.text
|
||||
# elif attr == 'outerHTML':
|
||||
# return self.inner_ele.html
|
||||
# elif attr == 'innerHTML':
|
||||
# return self.html
|
||||
else:
|
||||
return self.inner_ele.get(attr)
|
||||
except:
|
||||
return None
|
||||
|
||||
|
||||
def execute_session_find(page_or_ele: _Element,
|
||||
|
@ -204,7 +204,7 @@ class SessionPage(object):
|
||||
self._url_available = True
|
||||
else:
|
||||
if show_errmsg:
|
||||
raise ConnectionError(f'Status code: {self._response.status_code}.')
|
||||
raise ConnectionError(f'{to_url}\nStatus code: {self._response.status_code}.')
|
||||
self._url_available = False
|
||||
return self._url_available
|
||||
|
||||
@ -433,9 +433,10 @@ class SessionPage(object):
|
||||
else:
|
||||
charset = headers[content_type[0]].split('=')[1]
|
||||
|
||||
if charset:
|
||||
r.encoding = charset
|
||||
|
||||
if not_stream: # 加载网页时修复编码
|
||||
r._content = r.content.replace(b'\x08', b'\\b') # 避免存在退格符导致乱码或解析出错
|
||||
# r.html.encoding = r.encoding # 修复requests_html丢失编码方式的bug
|
||||
if charset:
|
||||
r.encoding = charset
|
||||
return r, 'Success'
|
||||
|
Loading…
x
Reference in New Issue
Block a user