2.0.0

2024-12-10 04:00:23 +08:00 · 2021-12-02 17:17:10 +08:00 · 2021-12-02 17:17:10 +08:00 · 710a6db736
commit 710a6db736
parent bec93268ba
7 changed files with 35 additions and 28 deletions
--- a/DrissionPage/drission.py
+++ b/DrissionPage/drission.py
@ -484,7 +484,7 @@ def _create_driver(chrome_path: str, driver_path: str, options: Options) -> WebD
                except:
                    pass

-    print('无法启动，请手动设置chromedriver。\n下载地址：http://npm.taobao.org/mirrors/chromedriver/')
+    print('无法启动，请检查浏览器路径，或手动设置chromedriver。\n下载地址：http://npm.taobao.org/mirrors/chromedriver/')
    exit(0)


--- a/DrissionPage/driver_element.py
+++ b/DrissionPage/driver_element.py
@ -105,10 +105,16 @@ class DriverElement(DrissionElement):
        :param attr: 属性名
        :return: 属性值文本
        """
-        if attr in ('text', 'innerText'):
+        if attr == 'text':
            return self.text
-
-        return format_html(self.inner_ele.get_attribute(attr))
+        elif attr == 'innerText':
+            return self.raw_text
+        elif attr in ('html', 'outerHTML'):
+            return self.html
+        elif attr == 'innerHTML':
+            return self.inner_html
+        else:
+            return format_html(self.inner_ele.get_attribute(attr))

    def ele(self,
            loc_or_str: Union[Tuple[str, str], str],
--- a/DrissionPage/easy_set.py
+++ b/DrissionPage/easy_set.py
@ -261,7 +261,8 @@ def _get_chrome_path(ini_path: str = None,
        path = None

    if path and Path(path).is_file():
-        print('ini文件中', end='')
+        if show_msg:
+            print('ini文件中', end='')
        return str(path)

    # -----------从注册表中获取--------------
--- a/DrissionPage/session_element.py
+++ b/DrissionPage/session_element.py
@ -85,22 +85,23 @@ class SessionElement(DrissionElement):
        # 获取href属性时返回绝对url
        if attr == 'href':
            link = self.inner_ele.get('href')
-
            # 若为链接为None、js或邮件，直接返回
            if not link or link.lower().startswith(('javascript:', 'mailto:')):
                return link

-            # 其它情况直接返回绝对url
-            else:
+            else:  # 其它情况直接返回绝对url
                return self._make_absolute(link)

        elif attr == 'src':
            return self._make_absolute(self.inner_ele.get('src'))

-        elif attr in ('text', 'innerText'):
+        elif attr == 'text':
            return self.text

-        elif attr == 'outerHTML':
+        elif attr == 'innerText':
+            return self.raw_text
+
+        elif attr in ('html', 'outerHTML'):
            return self.html

        elif attr == 'innerHTML':
--- a/DrissionPage/session_page.py
+++ b/DrissionPage/session_page.py
@ -279,16 +279,16 @@ class SessionPage(BasePage):
        """
        if file_exists == 'skip' and Path(f'{goal_path}{sep}{rename}').exists():
            if show_msg:
-                print(f'{file_url}\n{goal_path}{sep}{rename}\n已跳过。\n')
-            return None, 'Skipped because a file with the same name already exists.'
+                print(f'{file_url}\n{goal_path}{sep}{rename}\n存在同名文件，已跳过。\n')
+            return None, '已跳过，因存在同名文件。'

        def do() -> tuple:
            kwargs['stream'] = True
            if 'timeout' not in kwargs:
                kwargs['timeout'] = 20

+            # 生成临时的response
            mode = 'post' if post_data is not None else 'get'
-            # 生成的response不写入self._response，是临时的
            r, info = self._make_response(file_url, mode=mode, data=post_data, show_errmsg=show_errmsg, **kwargs)

            if r is None:
@ -298,8 +298,8 @@ class SessionPage(BasePage):

            if not r.ok:
                if show_errmsg:
-                    raise ConnectionError(f'连接状态码：{r.status_code}.')
-                return False, f'Status code: {r.status_code}.'
+                    raise ConnectionError(f'连接状态码：{r.status_code}')
+                return False, f'状态码：{r.status_code}'

            # -------------------获取文件名-------------------
            file_name = _get_download_file_name(file_url, r)
@ -345,11 +345,11 @@ class SessionPage(BasePage):
                print(full_name if file_name == full_name else f'{file_name} -> {full_name}')
                print(f'正在下载到：{goal}')
                if skip:
-                    print('已跳过。\n')
+                    print('存在同名文件，已跳过。\n')

            # -------------------开始下载-------------------
            if skip:
-                return None, 'Skipped because a file with the same name already exists.'
+                return None, '已跳过，因存在同名文件。'

            # 获取远程文件大小
            content_length = r.headers.get('content-length')
@ -373,23 +373,20 @@ class SessionPage(BasePage):
            except Exception as e:
                if show_errmsg:
                    raise ConnectionError(e)
-
-                download_status, info = False, f'Download failed.\n{e}'
+                download_status, info = False, f'下载失败。\n{e}'

            else:
                if full_path.stat().st_size == 0:
                    if show_errmsg:
                        raise ValueError('文件大小为0。')
-
-                    download_status, info = False, 'File size is 0.'
+                    download_status, info = False, '文件大小为0。'

                else:
                    download_status, info = True, str(full_path)

            finally:
-                if not download_status and full_path.exists():
+                if download_status is False and full_path.exists():
                    full_path.unlink()  # 删除下载出错文件
-
                r.close()

            # -------------------显示并返回值-------------------
@ -406,7 +403,8 @@ class SessionPage(BasePage):
        if result[0] is False:  # 第一位为None表示跳过的情况
            for i in range(retry_times):
                sleep(retry_interval)
-                print(f'重试 {file_url}')
+                if show_msg:
+                    print(f'\n重试 {file_url}')

                result = do()
                if result[0] is not False:
@ -431,7 +429,7 @@ class SessionPage(BasePage):
        if not url:
            if show_errmsg:
                raise ValueError('URL为空。')
-            return None, 'url is empty.'
+            return None, 'URL为空。'

        if mode not in ('get', 'post'):
            raise ValueError("mode参数只能是'get'或'post'。")
--- a/README.md
+++ b/README.md
@ -33,7 +33,7 @@ requests 爬虫面对要登录的网站时，要分析数据包、JS 源码，

 - 代码高度集成，以简洁的代码为第一追求。
 - 页面对象可在 selenium 和 requests 模式间任意切换，保留登录状态。
- 极简单但强大的元素查找功能，支持链式操作，代码极其简洁。
+- 极简单但强大的元素定位语法，支持链式操作，代码极其简洁。
 - 两种模式提供一致的 API，使用体验一致。
 - 人性化设计，集成众多实用功能，大大降低开发工作量。

@ -41,6 +41,7 @@ requests 爬虫面对要登录的网站时，要分析数据包、JS 源码，

 - 每次运行程序可以反复使用已经打开的浏览器。如手动设置网页到某个状态，再用程序接管，或手动处理登录，再用程序爬内容。无须每次运行从头启动浏览器，超级方便。
 - 使用 ini 文件保存常用配置，自动调用，也提供便捷的设置api，远离繁杂的配置项。
+- 极致简明的定位语法，支持直接按文本定位元素，支持直接获取前后兄弟元素和父元素等。
 - 强大的下载工具，操作浏览器时也能享受快捷可靠的下载功能。
 - 下载工具支持多种方式处理文件名冲突、自动创建目标路径、断链重试等。
 - 访问网址带自动重试功能，可设置间隔和超时时间。
@ -70,7 +71,7 @@ requests 爬虫面对要登录的网站时，要分析数据包、JS 源码，

 以下代码实现一模一样的功能，对比两者的代码量：

- 用显性等待方式查找第一个文本包含 some text 的元素
+- 用显性等待方式定位第一个文本包含 some text 的元素

 ```python
 # 使用 selenium：
--- a/setup.py
+++ b/setup.py
@ -6,7 +6,7 @@ with open("README.md", "r", encoding='utf-8') as fh:

 setup(
    name="DrissionPage",
-    version="1.11.7",
+    version="2.0.0",
    author="g1879",
    author_email="g1879@qq.com",
    description="A module that integrates selenium and requests session, encapsulates common page operations.",