From 1550e8d673aec0645b27eee370f52e00395885d5 Mon Sep 17 00:00:00 2001 From: g1879 Date: Sun, 5 Mar 2023 22:59:41 +0800 Subject: [PATCH] =?UTF-8?q?=E6=94=AF=E6=8C=81=E5=BC=82=E5=9F=9Fiframe?= =?UTF-8?q?=E5=86=85=E5=85=83=E7=B4=A0=E6=88=AA=E5=9B=BE=EF=BC=8C=E5=9F=BA?= =?UTF-8?q?=E6=9C=AC=E5=AE=8C=E6=88=90?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/chromium_base.py | 123 +++++++++++++++++-------------- DrissionPage/chromium_base.pyi | 7 ++ DrissionPage/chromium_element.py | 4 +- DrissionPage/chromium_frame.py | 65 ++++++++++++++-- DrissionPage/chromium_frame.pyi | 13 +++- 5 files changed, 145 insertions(+), 67 deletions(-) diff --git a/DrissionPage/chromium_base.py b/DrissionPage/chromium_base.py index 387a4ab..b20a277 100644 --- a/DrissionPage/chromium_base.py +++ b/DrissionPage/chromium_base.py @@ -631,60 +631,8 @@ class ChromiumBase(BasePage): :param right_bottom: 截取范围右下角角坐标 :return: 图片完整路径或字节文本 """ - if as_bytes: - if as_bytes is True: - pic_type = 'png' - else: - if as_bytes not in ('jpg', 'jpeg', 'png', 'webp'): - raise ValueError("只能接收 'jpg', 'jpeg', 'png', 'webp' 四种格式。") - pic_type = 'jpeg' if as_bytes == 'jpg' else as_bytes - - elif as_base64: - if as_base64 is True: - pic_type = 'png' - else: - if as_base64 not in ('jpg', 'jpeg', 'png', 'webp'): - raise ValueError("只能接收 'jpg', 'jpeg', 'png', 'webp' 四种格式。") - pic_type = 'jpeg' if as_base64 == 'jpg' else as_base64 - - else: - if not path: - path = f'{self.title}.jpg' - path = get_usable_path(path) - pic_type = path.suffix.lower() - if pic_type not in ('.jpg', '.jpeg', '.png', '.webp'): - raise TypeError(f'不支持的文件格式:{pic_type}。') - pic_type = 'jpeg' if pic_type == '.jpg' else pic_type[1:] - - width, height = self.size - if full_page: - vp = {'x': 0, 'y': 0, 'width': width, 'height': height, 'scale': 1} - png = self.run_cdp_loaded('Page.captureScreenshot', format=pic_type, - captureBeyondViewport=True, clip=vp)['data'] - else: - if left_top and right_bottom: - x, y = left_top - w = right_bottom[0] - x - h = right_bottom[1] - y - vp = {'x': x, 'y': y, 'width': w, 'height': h, 'scale': 1} - png = self.run_cdp_loaded('Page.captureScreenshot', format=pic_type, - captureBeyondViewport=False, clip=vp)['data'] - else: - png = self.run_cdp_loaded('Page.captureScreenshot', format=pic_type)['data'] - - if as_base64: - return png - - from base64 import b64decode - png = b64decode(png) - - if as_bytes: - return png - - path.parent.mkdir(parents=True, exist_ok=True) - with open(path, 'wb') as f: - f.write(png) - return str(path.absolute()) + return self._get_screenshot(path=path, as_bytes=as_bytes, as_base64=as_base64, + full_page=full_page, left_top=left_top, right_bottom=right_bottom) def clear_cache(self, session_storage=True, local_storage=True, cache=True, cookies=True): """清除缓存,可选要清除的项 @@ -749,6 +697,73 @@ class ChromiumBase(BasePage): return True + def _get_screenshot(self, path=None, as_bytes=None, as_base64=None, + full_page=False, left_top=None, right_bottom=None, ele=None): + """对页面进行截图,可对整个网页、可见网页、指定范围截图。对可视范围外截图需要90以上版本浏览器支持 + :param path: 完整路径,后缀可选 'jpg','jpeg','png','webp' + :param as_bytes: 是否以字节形式返回图片,可选 'jpg','jpeg','png','webp',生效时path参数和as_base64参数无效 + :param as_base64: 是否以base64字符串形式返回图片,可选 'jpg','jpeg','png','webp',生效时path参数无效 + :param full_page: 是否整页截图,为True截取整个网页,为False截取可视窗口 + :param left_top: 截取范围左上角坐标 + :param right_bottom: 截取范围右下角角坐标 + :param ele: 为异域iframe内元素截图设置 + :return: 图片完整路径或字节文本 + """ + if as_bytes: + if as_bytes is True: + pic_type = 'png' + else: + if as_bytes not in ('jpg', 'jpeg', 'png', 'webp'): + raise ValueError("只能接收 'jpg', 'jpeg', 'png', 'webp' 四种格式。") + pic_type = 'jpeg' if as_bytes == 'jpg' else as_bytes + + elif as_base64: + if as_base64 is True: + pic_type = 'png' + else: + if as_base64 not in ('jpg', 'jpeg', 'png', 'webp'): + raise ValueError("只能接收 'jpg', 'jpeg', 'png', 'webp' 四种格式。") + pic_type = 'jpeg' if as_base64 == 'jpg' else as_base64 + + else: + if not path: + path = f'{self.title}.jpg' + path = get_usable_path(path) + pic_type = path.suffix.lower() + if pic_type not in ('.jpg', '.jpeg', '.png', '.webp'): + raise TypeError(f'不支持的文件格式:{pic_type}。') + pic_type = 'jpeg' if pic_type == '.jpg' else pic_type[1:] + + width, height = self.size + if full_page: + vp = {'x': 0, 'y': 0, 'width': width, 'height': height, 'scale': 1} + png = self.run_cdp_loaded('Page.captureScreenshot', format=pic_type, + captureBeyondViewport=True, clip=vp)['data'] + else: + if left_top and right_bottom: + x, y = left_top + w = right_bottom[0] - x + h = right_bottom[1] - y + vp = {'x': x, 'y': y, 'width': w, 'height': h, 'scale': 1} + png = self.run_cdp_loaded('Page.captureScreenshot', format=pic_type, + captureBeyondViewport=False, clip=vp)['data'] + else: + png = self.run_cdp_loaded('Page.captureScreenshot', format=pic_type)['data'] + + if as_base64: + return png + + from base64 import b64decode + png = b64decode(png) + + if as_bytes: + return png + + path.parent.mkdir(parents=True, exist_ok=True) + with open(path, 'wb') as f: + f.write(png) + return str(path.absolute()) + # ------------------准备废弃---------------------- def wait_loading(self, timeout=None): """阻塞程序,等待页面进入加载状态 diff --git a/DrissionPage/chromium_base.pyi b/DrissionPage/chromium_base.pyi index e941162..d6341b5 100644 --- a/DrissionPage/chromium_base.pyi +++ b/DrissionPage/chromium_base.pyi @@ -183,6 +183,13 @@ class ChromiumBase(BasePage): left_top: Tuple[int, int] = None, right_bottom: Tuple[int, int] = None) -> Union[str, bytes]: ... + def _get_screenshot(self, path: [str, Path] = None, + as_bytes: [bool, str] = None, as_base64: [bool, str] = None, + full_page: bool = False, + left_top: Tuple[int, int] = None, + right_bottom: Tuple[int, int] = None, + ele: ChromiumElement = None) -> Union[str, bytes]: ... + def clear_cache(self, session_storage: bool = True, local_storage: bool = True, diff --git a/DrissionPage/chromium_element.py b/DrissionPage/chromium_element.py index e2c3842..1743eb2 100644 --- a/DrissionPage/chromium_element.py +++ b/DrissionPage/chromium_element.py @@ -485,8 +485,8 @@ class ChromiumElement(DrissionElement): right_bottom = (left + width, top + height) if not path: path = f'{self.tag}.jpg' - return self.page.get_screenshot(path, as_bytes=as_bytes, as_base64=as_base64, full_page=False, - left_top=left_top, right_bottom=right_bottom) + return self.page._get_screenshot(path, as_bytes=as_bytes, as_base64=as_base64, full_page=False, + left_top=left_top, right_bottom=right_bottom, ele=self) def input(self, vals, clear=True): """输入文本或组合键,也可用于输入文件路径到input元素(路径间用\n间隔) diff --git a/DrissionPage/chromium_frame.py b/DrissionPage/chromium_frame.py index b68b605..80ca28e 100644 --- a/DrissionPage/chromium_frame.py +++ b/DrissionPage/chromium_frame.py @@ -7,6 +7,7 @@ from re import search from time import sleep, perf_counter from warnings import warn +from .commons.tools import get_usable_path from .chromium_base import ChromiumBase, ChromiumPageScroll, ChromiumBaseSetter from .chromium_element import ChromiumElement @@ -402,23 +403,73 @@ class ChromiumFrame(ChromiumBase): self._check_ok() return self.frame_ele.afters(filter_loc, timeout) - def get_screenshot(self, path=None, as_bytes=None, as_base64=None, - full_page=False, left_top=None, right_bottom=None): + def get_screenshot(self, path=None, as_bytes=None, as_base64=None): """对页面进行截图,可对整个网页、可见网页、指定范围截图。对可视范围外截图需要90以上版本浏览器支持 :param path: 完整路径,后缀可选 'jpg','jpeg','png','webp' :param as_bytes: 是否以字节形式返回图片,可选 'jpg','jpeg','png','webp',生效时path参数和as_base64参数无效 :param as_base64: 是否以base64字符串形式返回图片,可选 'jpg','jpeg','png','webp',生效时path参数无效 + :return: 图片完整路径或字节文本 + """ + return self.frame_ele.get_screenshot(path=path, as_bytes=as_bytes, as_base64=as_base64) + + def _get_screenshot(self, path=None, as_bytes: [bool, str] = None, as_base64: [bool, str] = None, + full_page=False, left_top=None, right_bottom=None, ele=None): + """实现对元素截图 + :param path: 完整路径,后缀可选 'jpg','jpeg','png','webp' + :param as_bytes: 是否以字节形式返回图片,可选 'jpg','jpeg','png','webp',生效时path参数和as_base64参数无效 + :param as_base64: 是否以base64字符串形式返回图片,可选 'jpg','jpeg','png','webp',生效时path参数无效 :param full_page: 是否整页截图,为True截取整个网页,为False截取可视窗口 :param left_top: 截取范围左上角坐标 :param right_bottom: 截取范围右下角角坐标 + :param ele: 为异域iframe内元素截图设置 :return: 图片完整路径或字节文本 """ - if full_page: - raise RuntimeError('暂未实现对iframe全页截图功能。') - if left_top is None and right_bottom is None: - return self.frame_ele.get_screenshot(path=path, as_bytes=as_bytes, as_base64=as_base64) + if not self._is_diff_domain: + return super().get_screenshot(path=path, as_bytes=as_bytes, as_base64=as_base64, + full_page=full_page, left_top=left_top, right_bottom=right_bottom) + else: - raise RuntimeError('暂未实现对异域iframe内元素截图功能。') + if as_bytes: + if as_bytes is True: + pic_type = 'png' + else: + if as_bytes not in ('jpg', 'jpeg', 'png', 'webp'): + raise ValueError("只能接收 'jpg', 'jpeg', 'png', 'webp' 四种格式。") + pic_type = 'jpeg' if as_bytes == 'jpg' else as_bytes + + elif as_base64: + if as_base64 is True: + pic_type = 'png' + else: + if as_base64 not in ('jpg', 'jpeg', 'png', 'webp'): + raise ValueError("只能接收 'jpg', 'jpeg', 'png', 'webp' 四种格式。") + pic_type = 'jpeg' if as_base64 == 'jpg' else as_base64 + + else: + if not path: + path = f'{self.title}.jpg' + path = get_usable_path(path) + pic_type = path.suffix.lower() + if pic_type not in ('.jpg', '.jpeg', '.png', '.webp'): + raise TypeError(f'不支持的文件格式:{pic_type}。') + pic_type = 'jpeg' if pic_type == '.jpg' else pic_type[1:] + + self.scroll.to_see(ele) + cx, cy = ele.locations.viewport_location + w, h = ele.size + img_data = f'data:image/{pic_type};base64,{self.frame_ele.get_screenshot(as_base64=True)}' + body = self.page('t:body') + first_child = body('c::first-child') + js = f''' + haskell = document.createElement('img'); + haskell.src = "{img_data}"; + arguments[0].insertBefore(haskell, this); + return haskell;''' + new_ele = first_child.run_js(js, body) + r = self.page.get_screenshot(path=path, as_bytes=as_bytes, as_base64=as_base64, + left_top=(cx, cy), right_bottom=(cx + w, cy + h)) + self.page.remove_ele(new_ele) + return r def _find_elements(self, loc_or_ele, timeout=None, single=True, relative=False, raise_err=None): """在frame内查找单个元素 diff --git a/DrissionPage/chromium_frame.pyi b/DrissionPage/chromium_frame.pyi index f1d9440..080571d 100644 --- a/DrissionPage/chromium_frame.pyi +++ b/DrissionPage/chromium_frame.pyi @@ -152,10 +152,15 @@ class ChromiumFrame(ChromiumBase): timeout: float = ...) -> List[Union[ChromiumElement, ChromiumFrame, str]]: ... def get_screenshot(self, path: [str, Path] = None, - as_bytes: [bool, str] = None, as_base64: [bool, str] = None, - full_page: bool = False, - left_top: Tuple[int, int] = None, - right_bottom: Tuple[int, int] = None) -> Union[str, bytes]: ... + as_bytes: [bool, str] = None, + as_base64: [bool, str] = None) -> Union[str, bytes]: ... + + def _get_screenshot(self, path: [str, Path] = None, + as_bytes: [bool, str] = None, as_base64: [bool, str] = None, + full_page: bool = False, + left_top: Tuple[int, int] = None, + right_bottom: Tuple[int, int] = None, + ele: ChromiumElement = None) -> Union[str, bytes]: ... def _find_elements(self, loc_or_ele: Union[Tuple[str, str], str, ChromiumElement, ChromiumFrame], timeout: float = None, single: bool = True, relative: bool = False, raise_err: bool = None) \