mirror of
https://gitee.com/g1879/DrissionPage.git
synced 2024-12-10 04:00:23 +08:00
修改格式
This commit is contained in:
parent
8d49d9accb
commit
9b3158b866
@ -68,7 +68,7 @@ class DriverElement(DrissionElement):
|
|||||||
if text_node_only:
|
if text_node_only:
|
||||||
return self.eles('xpath:./text()')
|
return self.eles('xpath:./text()')
|
||||||
else:
|
else:
|
||||||
return list(map(lambda x: x if isinstance(x, str) else x.text, self.eles('xpath:./node()')))
|
return [x if isinstance(x, str) else x.text for x in self.eles('xpath:./node()')]
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def html(self) -> str:
|
def html(self) -> str:
|
||||||
@ -348,8 +348,8 @@ class DriverElement(DrissionElement):
|
|||||||
|
|
||||||
# 等待元素加载完成
|
# 等待元素加载完成
|
||||||
if self.tag == 'img':
|
if self.tag == 'img':
|
||||||
js = 'return arguments[0].complete && typeof arguments[0].naturalWidth != "undefined" ' \
|
js = ('return arguments[0].complete && typeof arguments[0].naturalWidth != "undefined" '
|
||||||
'&& arguments[0].naturalWidth > 0'
|
'&& arguments[0].naturalWidth > 0')
|
||||||
while not self.run_script(js):
|
while not self.run_script(js):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
@ -587,8 +587,8 @@ class ElementsByXpath(object):
|
|||||||
|
|
||||||
def __call__(self, ele_or_driver: Union[WebDriver, WebElement]) \
|
def __call__(self, ele_or_driver: Union[WebDriver, WebElement]) \
|
||||||
-> Union[str, DriverElement, None, List[str or DriverElement]]:
|
-> Union[str, DriverElement, None, List[str or DriverElement]]:
|
||||||
driver, the_node = (ele_or_driver, 'document') if isinstance(ele_or_driver, WebDriver) \
|
driver, the_node = ((ele_or_driver, 'document') if isinstance(ele_or_driver, WebDriver)
|
||||||
else (ele_or_driver.parent, ele_or_driver)
|
else (ele_or_driver.parent, ele_or_driver))
|
||||||
|
|
||||||
def get_nodes(node=None, xpath_txt=None, type_txt='7'):
|
def get_nodes(node=None, xpath_txt=None, type_txt='7'):
|
||||||
"""用js通过xpath获取元素、节点或属性
|
"""用js通过xpath获取元素、节点或属性
|
||||||
@ -637,8 +637,8 @@ class ElementsByXpath(object):
|
|||||||
if self.mode == 'single':
|
if self.mode == 'single':
|
||||||
try:
|
try:
|
||||||
e = get_nodes(the_node, xpath_txt=self.xpath, type_txt='9')
|
e = get_nodes(the_node, xpath_txt=self.xpath, type_txt='9')
|
||||||
return DriverElement(e, self.page, self.timeout) \
|
return (DriverElement(e, self.page, self.timeout)
|
||||||
if isinstance(e, WebElement) else unescape(e).replace('\xa0', ' ')
|
if isinstance(e, WebElement) else unescape(e).replace('\xa0', ' '))
|
||||||
|
|
||||||
# 找不到目标时
|
# 找不到目标时
|
||||||
except JavascriptException:
|
except JavascriptException:
|
||||||
@ -647,10 +647,7 @@ class ElementsByXpath(object):
|
|||||||
elif self.mode == 'all':
|
elif self.mode == 'all':
|
||||||
e = get_nodes(the_node, xpath_txt=self.xpath)
|
e = get_nodes(the_node, xpath_txt=self.xpath)
|
||||||
|
|
||||||
# 去除元素间换行符
|
# 去除元素间换行符并替换空格
|
||||||
e = filter(lambda x: x != '\n', e)
|
e = (unescape(x).replace('\xa0', ' ') if isinstance(x, str) else x for x in e if x != '\n')
|
||||||
|
|
||||||
# 替换空格
|
return [DriverElement(x, self.page, self.timeout) if isinstance(x, WebElement) else x for x in e]
|
||||||
e = map(lambda x: unescape(x).replace('\xa0', ' ') if isinstance(x, str) else x, e)
|
|
||||||
|
|
||||||
return list(map(lambda x: DriverElement(x, self.page, self.timeout) if isinstance(x, WebElement) else x, e))
|
|
||||||
|
@ -192,7 +192,7 @@ class SessionElement(DrissionElement):
|
|||||||
:param attr: 属性名
|
:param attr: 属性名
|
||||||
:return: 属性值文本,没有该属性返回None
|
:return: 属性值文本,没有该属性返回None
|
||||||
"""
|
"""
|
||||||
try:
|
# try:
|
||||||
# 获取href属性时返回绝对url
|
# 获取href属性时返回绝对url
|
||||||
if attr == 'href':
|
if attr == 'href':
|
||||||
link = self.inner_ele.get('href')
|
link = self.inner_ele.get('href')
|
||||||
@ -207,16 +207,21 @@ class SessionElement(DrissionElement):
|
|||||||
|
|
||||||
elif attr == 'src':
|
elif attr == 'src':
|
||||||
return self._make_absolute(self.inner_ele.get('src'))
|
return self._make_absolute(self.inner_ele.get('src'))
|
||||||
|
|
||||||
elif attr == 'text':
|
elif attr == 'text':
|
||||||
return self.text
|
return self.text
|
||||||
|
|
||||||
elif attr == 'outerHTML':
|
elif attr == 'outerHTML':
|
||||||
return unescape(tostring(self._inner_ele).decode()).replace('\xa0', ' ')
|
return unescape(tostring(self._inner_ele).decode()).replace('\xa0', ' ')
|
||||||
|
|
||||||
elif attr == 'innerHTML':
|
elif attr == 'innerHTML':
|
||||||
return self.html
|
return self.html
|
||||||
|
|
||||||
else:
|
else:
|
||||||
return self.inner_ele.get(attr)
|
return self.inner_ele.get(attr)
|
||||||
except:
|
|
||||||
return None
|
# except:
|
||||||
|
# return None
|
||||||
|
|
||||||
# -----------------私有函数-------------------
|
# -----------------私有函数-------------------
|
||||||
def _make_absolute(self, link):
|
def _make_absolute(self, link):
|
||||||
@ -240,18 +245,23 @@ class SessionElement(DrissionElement):
|
|||||||
"""获取css路径或xpath路径"""
|
"""获取css路径或xpath路径"""
|
||||||
path_str = ''
|
path_str = ''
|
||||||
ele = self
|
ele = self
|
||||||
|
|
||||||
while ele:
|
while ele:
|
||||||
ele_id = ele.attr('id')
|
ele_id = ele.attr('id')
|
||||||
|
|
||||||
if ele_id:
|
if ele_id:
|
||||||
return f'#{ele_id}{path_str}' if mode == 'css' else f'//{ele.tag}[@id="{ele_id}"]{path_str}'
|
return f'#{ele_id}{path_str}' if mode == 'css' else f'//{ele.tag}[@id="{ele_id}"]{path_str}'
|
||||||
else:
|
else:
|
||||||
|
|
||||||
if mode == 'css':
|
if mode == 'css':
|
||||||
brothers = len(ele.eles(f'xpath:./preceding-sibling::*'))
|
brothers = len(ele.eles(f'xpath:./preceding-sibling::*'))
|
||||||
path_str = f'>:nth-child({brothers + 1}){path_str}'
|
path_str = f'>:nth-child({brothers + 1}){path_str}'
|
||||||
else:
|
else:
|
||||||
brothers = len(ele.eles(f'xpath:./preceding-sibling::{ele.tag}'))
|
brothers = len(ele.eles(f'xpath:./preceding-sibling::{ele.tag}'))
|
||||||
path_str = f'/{ele.tag}[{brothers + 1}]{path_str}' if brothers > 0 else f'/{ele.tag}{path_str}'
|
path_str = f'/{ele.tag}[{brothers + 1}]{path_str}' if brothers > 0 else f'/{ele.tag}{path_str}'
|
||||||
|
|
||||||
ele = ele.parent
|
ele = ele.parent
|
||||||
|
|
||||||
return path_str[1:] if mode == 'css' else path_str
|
return path_str[1:] if mode == 'css' else path_str
|
||||||
|
|
||||||
def _get_brother(self, num: int = 1, mode: str = 'ele', direction: str = 'next'):
|
def _get_brother(self, num: int = 1, mode: str = 'ele', direction: str = 'next'):
|
||||||
@ -316,23 +326,23 @@ def execute_session_find(page_or_ele,
|
|||||||
# 用lxml内置方法获取lxml的元素对象列表
|
# 用lxml内置方法获取lxml的元素对象列表
|
||||||
if loc[0] == 'xpath':
|
if loc[0] == 'xpath':
|
||||||
ele = page_or_ele.xpath(loc[1])
|
ele = page_or_ele.xpath(loc[1])
|
||||||
else: # 用css selector获取
|
else: # 用css selector获取元素对象列表
|
||||||
ele = page_or_ele.cssselect(loc[1])
|
ele = page_or_ele.cssselect(loc[1])
|
||||||
|
|
||||||
# 把lxml元素对象包装成SessionElement对象并按需要返回第一个或全部
|
# 把lxml元素对象包装成SessionElement对象并按需要返回第一个或全部
|
||||||
if mode == 'single':
|
if mode == 'single':
|
||||||
ele = ele[0] if ele else None
|
ele = ele[0] if ele else None
|
||||||
|
|
||||||
if isinstance(ele, _Element):
|
if isinstance(ele, _Element):
|
||||||
return SessionElement(ele, page)
|
return SessionElement(ele, page)
|
||||||
elif isinstance(ele, str):
|
elif isinstance(ele, str):
|
||||||
return unescape(ele).replace('\xa0', ' ')
|
return unescape(ele).replace('\xa0', ' ')
|
||||||
else:
|
else:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
elif mode == 'all':
|
elif mode == 'all':
|
||||||
# 去除元素间换行符
|
# 去除元素间换行符并替换空格
|
||||||
ele = filter(lambda x: x != '\n', ele)
|
ele = (unescape(x).replace('\xa0', ' ') if isinstance(x, str) else x for x in ele if x != '\n')
|
||||||
# 处理空格
|
|
||||||
ele = map(lambda x: unescape(x).replace('\xa0', ' ') if isinstance(x, str) else x, ele)
|
|
||||||
return [SessionElement(e, page) if isinstance(e, _Element) else e for e in ele]
|
return [SessionElement(e, page) if isinstance(e, _Element) else e for e in ele]
|
||||||
|
|
||||||
except XPathEvalError:
|
except XPathEvalError:
|
||||||
|
@ -409,10 +409,12 @@ class SessionPage(object):
|
|||||||
r = self.session.get(url, **kwargs)
|
r = self.session.get(url, **kwargs)
|
||||||
elif mode == 'post':
|
elif mode == 'post':
|
||||||
r = self.session.post(url, data=data, **kwargs)
|
r = self.session.post(url, data=data, **kwargs)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
if show_errmsg:
|
if show_errmsg:
|
||||||
raise e
|
raise e
|
||||||
return None, e
|
return None, e
|
||||||
|
|
||||||
else:
|
else:
|
||||||
headers = dict(r.headers)
|
headers = dict(r.headers)
|
||||||
content_type = tuple(x for x in headers if x.lower() == 'content-type')
|
content_type = tuple(x for x in headers if x.lower() == 'content-type')
|
||||||
|
1095
README.zh-cn.md
1095
README.zh-cn.md
File diff suppressed because it is too large
Load Diff
Loading…
x
Reference in New Issue
Block a user