修改格式

This commit is contained in:
g1879 2020-11-10 17:57:48 +08:00
parent 8d49d9accb
commit 9b3158b866
5 changed files with 863 additions and 357 deletions

View File

@ -68,7 +68,7 @@ class DriverElement(DrissionElement):
if text_node_only: if text_node_only:
return self.eles('xpath:./text()') return self.eles('xpath:./text()')
else: else:
return list(map(lambda x: x if isinstance(x, str) else x.text, self.eles('xpath:./node()'))) return [x if isinstance(x, str) else x.text for x in self.eles('xpath:./node()')]
@property @property
def html(self) -> str: def html(self) -> str:
@ -139,13 +139,13 @@ class DriverElement(DrissionElement):
loc_or_str: Union[Tuple[str, str], str], loc_or_str: Union[Tuple[str, str], str],
mode: str = None, mode: str = None,
timeout: float = None): timeout: float = None):
"""返回当前元素下级符合条件的子元素,默认返回第一个 \n """返回当前元素下级符合条件的子元素,默认返回第一个 \n
示例 \n 示例 \n
- 用loc元组查找 \n - 用loc元组查找 \n
ele.ele((By.CLASS_NAME, 'ele_class')) - 返回第一个class为ele_class的子元素 \n ele.ele((By.CLASS_NAME, 'ele_class')) - 返回第一个class为ele_class的子元素 \n
- 用查询字符串查找 \n - 用查询字符串查找 \n
查找方式属性tag name和属性文本xpathcss selector \n 查找方式属性tag name和属性文本xpathcss selector \n
其中@表示属性=表示精确匹配:表示模糊匹配无控制字符串时默认搜索该字符串 \n 其中@表示属性=表示精确匹配:表示模糊匹配无控制字符串时默认搜索该字符串 \n
ele.ele('@class:ele_class') - 返回第一个class含有ele_class的子元素 \n ele.ele('@class:ele_class') - 返回第一个class含有ele_class的子元素 \n
ele.ele('@name=ele_name') - 返回第一个name等于ele_name的子元素 \n ele.ele('@name=ele_name') - 返回第一个name等于ele_name的子元素 \n
ele.ele('@placeholder') - 返回第一个带placeholder属性的子元素 \n ele.ele('@placeholder') - 返回第一个带placeholder属性的子元素 \n
@ -196,12 +196,12 @@ class DriverElement(DrissionElement):
loc_or_str: Union[Tuple[str, str], str], loc_or_str: Union[Tuple[str, str], str],
timeout: float = None): timeout: float = None):
"""返回当前元素下级所有符合条件的子元素 \n """返回当前元素下级所有符合条件的子元素 \n
示例 \n 示例 \n
- 用loc元组查找 \n - 用loc元组查找 \n
ele.eles((By.CLASS_NAME, 'ele_class')) - 返回所有class为ele_class的子元素 \n ele.eles((By.CLASS_NAME, 'ele_class')) - 返回所有class为ele_class的子元素 \n
- 用查询字符串查找 \n - 用查询字符串查找 \n
查找方式属性tag name和属性文本xpathcss selector \n 查找方式属性tag name和属性文本xpathcss selector \n
其中@表示属性=表示精确匹配:表示模糊匹配无控制字符串时默认搜索该字符串 \n 其中@表示属性=表示精确匹配:表示模糊匹配无控制字符串时默认搜索该字符串 \n
ele.eles('@class:ele_class') - 返回所有class含有ele_class的子元素 \n ele.eles('@class:ele_class') - 返回所有class含有ele_class的子元素 \n
ele.eles('@name=ele_name') - 返回所有name等于ele_name的子元素 \n ele.eles('@name=ele_name') - 返回所有name等于ele_name的子元素 \n
ele.eles('@placeholder') - 返回所有带placeholder属性的子元素 \n ele.eles('@placeholder') - 返回所有带placeholder属性的子元素 \n
@ -348,8 +348,8 @@ class DriverElement(DrissionElement):
# 等待元素加载完成 # 等待元素加载完成
if self.tag == 'img': if self.tag == 'img':
js = 'return arguments[0].complete && typeof arguments[0].naturalWidth != "undefined" ' \ js = ('return arguments[0].complete && typeof arguments[0].naturalWidth != "undefined" '
'&& arguments[0].naturalWidth > 0' '&& arguments[0].naturalWidth > 0')
while not self.run_script(js): while not self.run_script(js):
pass pass
@ -587,8 +587,8 @@ class ElementsByXpath(object):
def __call__(self, ele_or_driver: Union[WebDriver, WebElement]) \ def __call__(self, ele_or_driver: Union[WebDriver, WebElement]) \
-> Union[str, DriverElement, None, List[str or DriverElement]]: -> Union[str, DriverElement, None, List[str or DriverElement]]:
driver, the_node = (ele_or_driver, 'document') if isinstance(ele_or_driver, WebDriver) \ driver, the_node = ((ele_or_driver, 'document') if isinstance(ele_or_driver, WebDriver)
else (ele_or_driver.parent, ele_or_driver) else (ele_or_driver.parent, ele_or_driver))
def get_nodes(node=None, xpath_txt=None, type_txt='7'): def get_nodes(node=None, xpath_txt=None, type_txt='7'):
"""用js通过xpath获取元素、节点或属性 """用js通过xpath获取元素、节点或属性
@ -628,7 +628,7 @@ class ElementsByXpath(object):
return_txt = 'return e.singleNodeValue;' return_txt = 'return e.singleNodeValue;'
js = """ js = """
var e=document.evaluate('""" + xpath_txt + """', """ + node_txt + """, null, """ + type_txt + """, null); var e=document.evaluate('""" + xpath_txt + """', """ + node_txt + """, null, """ + type_txt + """,null);
""" + for_txt + """ """ + for_txt + """
""" + return_txt + """ """ + return_txt + """
""" """
@ -637,8 +637,8 @@ class ElementsByXpath(object):
if self.mode == 'single': if self.mode == 'single':
try: try:
e = get_nodes(the_node, xpath_txt=self.xpath, type_txt='9') e = get_nodes(the_node, xpath_txt=self.xpath, type_txt='9')
return DriverElement(e, self.page, self.timeout) \ return (DriverElement(e, self.page, self.timeout)
if isinstance(e, WebElement) else unescape(e).replace('\xa0', ' ') if isinstance(e, WebElement) else unescape(e).replace('\xa0', ' '))
# 找不到目标时 # 找不到目标时
except JavascriptException: except JavascriptException:
@ -647,10 +647,7 @@ class ElementsByXpath(object):
elif self.mode == 'all': elif self.mode == 'all':
e = get_nodes(the_node, xpath_txt=self.xpath) e = get_nodes(the_node, xpath_txt=self.xpath)
# 去除元素间换行符 # 去除元素间换行符并替换空格
e = filter(lambda x: x != '\n', e) e = (unescape(x).replace('\xa0', ' ') if isinstance(x, str) else x for x in e if x != '\n')
# 替换空格 return [DriverElement(x, self.page, self.timeout) if isinstance(x, WebElement) else x for x in e]
e = map(lambda x: unescape(x).replace('\xa0', ' ') if isinstance(x, str) else x, e)
return list(map(lambda x: DriverElement(x, self.page, self.timeout) if isinstance(x, WebElement) else x, e))

View File

@ -152,8 +152,8 @@ def check_driver_version(driver_path: str = None, chrome_path: str = None) -> bo
return True return True
except Exception as e: except Exception as e:
info = f''' info = f'''
出现异常 出现异常
{e}chromedriver下载网址https://chromedriver.chromium.org/downloads {e}chromedriver下载网址https://chromedriver.chromium.org/downloads
''' '''
print(info) print(info)
return False return False

View File

@ -192,31 +192,36 @@ class SessionElement(DrissionElement):
:param attr: 属性名 :param attr: 属性名
:return: 属性值文本没有该属性返回None :return: 属性值文本没有该属性返回None
""" """
try: # try:
# 获取href属性时返回绝对url # 获取href属性时返回绝对url
if attr == 'href': if attr == 'href':
link = self.inner_ele.get('href') link = self.inner_ele.get('href')
# 若链接为js或邮件直接返回 # 若链接为js或邮件直接返回
if link.lower().startswith(('javascript:', 'mailto:')): if link.lower().startswith(('javascript:', 'mailto:')):
return link return link
# 其它情况直接返回绝对url # 其它情况直接返回绝对url
else:
return self._make_absolute(link)
elif attr == 'src':
return self._make_absolute(self.inner_ele.get('src'))
elif attr == 'text':
return self.text
elif attr == 'outerHTML':
return unescape(tostring(self._inner_ele).decode()).replace('\xa0', ' ')
elif attr == 'innerHTML':
return self.html
else: else:
return self.inner_ele.get(attr) return self._make_absolute(link)
except:
return None elif attr == 'src':
return self._make_absolute(self.inner_ele.get('src'))
elif attr == 'text':
return self.text
elif attr == 'outerHTML':
return unescape(tostring(self._inner_ele).decode()).replace('\xa0', ' ')
elif attr == 'innerHTML':
return self.html
else:
return self.inner_ele.get(attr)
# except:
# return None
# -----------------私有函数------------------- # -----------------私有函数-------------------
def _make_absolute(self, link): def _make_absolute(self, link):
@ -240,18 +245,23 @@ class SessionElement(DrissionElement):
"""获取css路径或xpath路径""" """获取css路径或xpath路径"""
path_str = '' path_str = ''
ele = self ele = self
while ele: while ele:
ele_id = ele.attr('id') ele_id = ele.attr('id')
if ele_id: if ele_id:
return f'#{ele_id}{path_str}' if mode == 'css' else f'//{ele.tag}[@id="{ele_id}"]{path_str}' return f'#{ele_id}{path_str}' if mode == 'css' else f'//{ele.tag}[@id="{ele_id}"]{path_str}'
else: else:
if mode == 'css': if mode == 'css':
brothers = len(ele.eles(f'xpath:./preceding-sibling::*')) brothers = len(ele.eles(f'xpath:./preceding-sibling::*'))
path_str = f'>:nth-child({brothers + 1}){path_str}' path_str = f'>:nth-child({brothers + 1}){path_str}'
else: else:
brothers = len(ele.eles(f'xpath:./preceding-sibling::{ele.tag}')) brothers = len(ele.eles(f'xpath:./preceding-sibling::{ele.tag}'))
path_str = f'/{ele.tag}[{brothers + 1}]{path_str}' if brothers > 0 else f'/{ele.tag}{path_str}' path_str = f'/{ele.tag}[{brothers + 1}]{path_str}' if brothers > 0 else f'/{ele.tag}{path_str}'
ele = ele.parent ele = ele.parent
return path_str[1:] if mode == 'css' else path_str return path_str[1:] if mode == 'css' else path_str
def _get_brother(self, num: int = 1, mode: str = 'ele', direction: str = 'next'): def _get_brother(self, num: int = 1, mode: str = 'ele', direction: str = 'next'):
@ -316,23 +326,23 @@ def execute_session_find(page_or_ele,
# 用lxml内置方法获取lxml的元素对象列表 # 用lxml内置方法获取lxml的元素对象列表
if loc[0] == 'xpath': if loc[0] == 'xpath':
ele = page_or_ele.xpath(loc[1]) ele = page_or_ele.xpath(loc[1])
else: # 用css selector获取 else: # 用css selector获取元素对象列表
ele = page_or_ele.cssselect(loc[1]) ele = page_or_ele.cssselect(loc[1])
# 把lxml元素对象包装成SessionElement对象并按需要返回第一个或全部 # 把lxml元素对象包装成SessionElement对象并按需要返回第一个或全部
if mode == 'single': if mode == 'single':
ele = ele[0] if ele else None ele = ele[0] if ele else None
if isinstance(ele, _Element): if isinstance(ele, _Element):
return SessionElement(ele, page) return SessionElement(ele, page)
elif isinstance(ele, str): elif isinstance(ele, str):
return unescape(ele).replace('\xa0', ' ') return unescape(ele).replace('\xa0', ' ')
else: else:
return None return None
elif mode == 'all': elif mode == 'all':
# 去除元素间换行符 # 去除元素间换行符并替换空格
ele = filter(lambda x: x != '\n', ele) ele = (unescape(x).replace('\xa0', ' ') if isinstance(x, str) else x for x in ele if x != '\n')
# 处理空格
ele = map(lambda x: unescape(x).replace('\xa0', ' ') if isinstance(x, str) else x, ele)
return [SessionElement(e, page) if isinstance(e, _Element) else e for e in ele] return [SessionElement(e, page) if isinstance(e, _Element) else e for e in ele]
except XPathEvalError: except XPathEvalError:

View File

@ -409,10 +409,12 @@ class SessionPage(object):
r = self.session.get(url, **kwargs) r = self.session.get(url, **kwargs)
elif mode == 'post': elif mode == 'post':
r = self.session.post(url, data=data, **kwargs) r = self.session.post(url, data=data, **kwargs)
except Exception as e: except Exception as e:
if show_errmsg: if show_errmsg:
raise e raise e
return None, e return None, e
else: else:
headers = dict(r.headers) headers = dict(r.headers)
content_type = tuple(x for x in headers if x.lower() == 'content-type') content_type = tuple(x for x in headers if x.lower() == 'content-type')

File diff suppressed because it is too large Load Diff