diff --git a/DrissionPage/chromium_element.py b/DrissionPage/chromium_element.py index 3106d02..339da01 100644 --- a/DrissionPage/chromium_element.py +++ b/DrissionPage/chromium_element.py @@ -1251,7 +1251,7 @@ def run_script(page_or_ele, script, as_expr=False, timeout=None, args=None, not_ :param script: js文本 :param as_expr: 是否作为表达式运行,为True时args无效 :param timeout: 超时时间 - :param args: 参数,按顺序在js文本中对应argument[0]、argument[2]... + :param args: 参数,按顺序在js文本中对应argument[0]、argument[1]... :param not_change: 执行时是否切换页面对象模式 :return: js执行结果 """ diff --git a/DrissionPage/chromium_frame.py b/DrissionPage/chromium_frame.py index 70ed271..b53ae36 100644 --- a/DrissionPage/chromium_frame.py +++ b/DrissionPage/chromium_frame.py @@ -13,6 +13,7 @@ from .chromium_element import ChromiumElement class ChromiumFrame(ChromiumBase): def __init__(self, page, ele): self.page = page + self.address = self.page.address node = page.run_cdp('DOM.describeNode', nodeId=ele.node_id, not_change=True)['node'] self.frame_id = node['frameId'] self._backend_id = ele.backend_id @@ -42,20 +43,23 @@ class ChromiumFrame(ChromiumBase): attrs = [f"{attr}='{attrs[attr]}'" for attr in attrs] return f'' - # def _reload_document(self): - # self._frame_ele = ChromiumElement(self.page, backend_id=self._backend_id) - # node = self.page.run_cdp('DOM.describeNode', nodeId=self._frame_ele.node_id, not_change=True)['node'] - # - # if self._is_inner_frame(): - # self._is_diff_domain = False - # self.doc_ele = ChromiumElement(self.page, backend_id=node['contentDocument']['backendNodeId']) - # super().__init__(self.page.address, self.page.tab_id, self.page.timeout) - # else: - # self._is_diff_domain = True - # self._tab_obj.stop() - # super().__init__(self.page.address, self.frame_id, self.page.timeout) - # obj_id = super().run_script('document;', as_expr=True)['objectId'] - # self.doc_ele = ChromiumElement(self, obj_id=obj_id) + def _reload(self): + self._frame_ele = ChromiumElement(self.page, backend_id=self._backend_id) + node = self.page.run_cdp('DOM.describeNode', nodeId=self._frame_ele.node_id, not_change=True)['node'] + + if self._is_inner_frame(): + print('-111') + self._is_diff_domain = False + self.doc_ele = ChromiumElement(self.page, backend_id=node['contentDocument']['backendNodeId']) + super().__init__(self.address, self.page.tab_id, self.page.timeout) + print('+111') + else: + print('-222') + self._is_diff_domain = True + super().__init__(self.address, self.frame_id, self.page.timeout) + obj_id = super().run_script('document;', as_expr=True)['objectId'] + self.doc_ele = ChromiumElement(self, obj_id=obj_id) + print('+222') def _get_new_document(self): """刷新cdp使用的document数据""" @@ -156,7 +160,11 @@ class ChromiumFrame(ChromiumBase): @property def title(self): """返回页面title""" - return self.ele('t:title').text + while True: + try: + return self.ele('t:title').text + except: + self._reload() @property def cookies(self): @@ -251,7 +259,7 @@ class ChromiumFrame(ChromiumBase): """运行javascript代码 \n :param script: js文本 :param as_expr: 是否作为表达式运行,为True时args无效 - :param args: 参数,按顺序在js文本中对应argument[0]、argument[2]... + :param args: 参数,按顺序在js文本中对应argument[0]、argument[1]... :return: 运行的结果 """ return self.doc_ele.run_script(script, as_expr=as_expr, *args) diff --git a/DrissionPage/chromium_frame.pyi b/DrissionPage/chromium_frame.pyi index 2c82a67..fb668a8 100644 --- a/DrissionPage/chromium_frame.pyi +++ b/DrissionPage/chromium_frame.pyi @@ -28,6 +28,8 @@ class ChromiumFrame(ChromiumBase): def __repr__(self) -> str: ... + def _reload(self) -> None: ... + def _get_new_document(self) -> None: ... def _onFrameAttached(self, **kwargs): ... diff --git a/DrissionPage/common.py b/DrissionPage/common.py index 5245197..b4d29eb 100644 --- a/DrissionPage/common.py +++ b/DrissionPage/common.py @@ -113,8 +113,8 @@ def get_loc(loc: Union[tuple, str], translate_css: bool = False) -> tuple: def str_to_loc(loc: str) -> tuple: - """处理元素查找语句 \n - 查找方式:属性、tag name及属性、文本、xpath、css selector、id、class \n + """处理元素查找语句 \n + 查找方式:属性、tag name及属性、文本、xpath、css selector、id、class \n @表示属性,.表示class,#表示id,=表示精确匹配,:表示模糊匹配,无控制字符串时默认搜索该字符串 \n """ loc_by = 'xpath' @@ -159,7 +159,7 @@ def str_to_loc(loc: str) -> tuple: # 根据文本查找 elif loc.startswith('text='): - loc_str = f'//*[.={_make_search_str(loc[5:])}]' + loc_str = f'//*[text()={_make_search_str(loc[5:])}]' elif loc.startswith('text:') and loc != 'text:': loc_str = f'//*/text()[contains(., {_make_search_str(loc[5:])})]/..' diff --git a/docs/WebPage使用方法/3.8标签页操作.md b/docs/WebPage使用方法/3.8标签页操作.md index c5d6120..e430d24 100644 --- a/docs/WebPage使用方法/3.8标签页操作.md +++ b/docs/WebPage使用方法/3.8标签页操作.md @@ -164,4 +164,79 @@ page.to_tab('0B300BEA6F1F1F4D5DE406872B79B1AD') ## 📍 获取标签页对象 -# 未完待续 +可以用`WebPage`或`ChromiumPage`的`get_tab()`方法获取标签页对象,然后可以使用这个对象对标签页进行操作。 + +`get_tab()` + +**参数:** + +- `tab_id`:要获取的标签页 id,为`None`时获取当前标签页 + +**返回:**`ChromiumTab`对象 + +```python +tab = page.get_tab() # 获取当前标签页对象 +``` + +## 📍 使用标签页对象 + +每个`ChromiumTab`对象控制一个浏览器标签页,方法和直接使用`ChromiumPage`一致,只比`ChromiumPage`少了控制标签页功能。 + +```python +tab.get('https://www.baidu.com') # 使用标签页对象 +``` + +## 📍 同时控制多标签页示例 + +下面的例子演示多个线程控制多个标签页进行内容采集。 + +```python +from threading import Thread + +from DrissionPage import ChromiumPage + + +def 采集(tab): + # 采集 4 页 + for _ in range(4): + # 获取某页所有库名称并打印 + for i in tab.eles('.title project-namespace-path'): + print(i.text) + + # 点击翻页 + tab('@rel=next').click() + # 等待页面进入加载 + tab.wait_loading() + + +if __name__ == '__main__': + # 新建页面对象 + page = ChromiumPage() + # 第一个标签页访问网址 + page.get('https://gitee.com/explore/ai') + # 获取第一个标签页对象 + tab1 = page.get_tab() + # 新建一个标签页并访问另一个网址 + page.new_tab('https://gitee.com/explore/machine-learning') + # 获取第二个标签页对象 + tab2 = page.get_tab() + + # 多线程同时处理多个页面 + Thread(target=采集, args=(tab1,)).start() + Thread(target=采集, args=(tab2,)).start() +``` + +输出: + +```console +MindSpore/mindspore +PaddlePaddle/Paddle +MindSpore/docs +scruel/Notes-ML-AndrewNg +MindSpore/graphengine +inspur-inna/inna1.0 +MindSpore/course +MindSpore/community + +后面省略。。。 +``` diff --git a/docs/WebPage使用方法/3.9iframe操作.md b/docs/WebPage使用方法/3.9iframe操作.md new file mode 100644 index 0000000..df9341d --- /dev/null +++ b/docs/WebPage使用方法/3.9iframe操作.md @@ -0,0 +1,258 @@ +` + + +``` + +按`F12`,可以看到网页右侧是一个两层`