下载地址:
我下载的是 106.0.5249.61。
下载的 Chrome Driver 需要匹配自己的电脑安装的 Chrome,否则无法启动
验证是否安装成功:
% ./chromedriverStarting ChromeDriver 106.0.5249.61 (511755355844955cd3e264779baf0dd38212a4d0-refs/branch-heads/5249@{#569}) on port 9515Only local connections are allowed.Please see https://chromedriver.chromium.org/security-considerations for suggestions on keeping ChromeDriver safe.ChromeDriver was started successfully.
创建项目、安装依赖:
mkdir selenium-democd selenium-demopython3 -m venv venvvenv/bin/pip3 install selenium-wire==5.1.0将 Chrome Driver 移动到项目目录下。
项目结构:
x.├── chromedriver├── configuration.py├── main.py└── selenium_utility├── __init__.py├── chrome_driver_builder.py└── local_storage.py
其中:
configuration.py:
x
from typing import Tuple, Mapping
CHROME_WINDOW_SIZE: Tuple[int, int] = (2304, 1440)CHROME_EXECUTABLE_PATH: str = "./chromedriver"CHROME_LOCAL_STORAGE: Mapping[str, str] = { # 在这里设置 Local Storage}
# 等待元素被发现或命令完成的时间,单位是秒IMPLICITLY_WAIT_SECOND: float = 20PAGE_WAIT_FOR_SECOND: float = 10
# 在这里设置入口页面ENTRY_PAGE_URL: str = ...# 在这里设置入口页面的名字ENTRY_PAGE_NAME: str = ...main.py:
x
import reimport timeimport urllib.parse
from seleniumwire import webdriverfrom selenium.webdriver.common.by import Byfrom selenium.webdriver.remote.webelement import WebElement
from selenium_utility import ( LocalStorage, ChromeDriverBuilder)from configuration import *
def main(): # 调试时,可以不指定 headless 参数 driver: webdriver.Chrome = ChromeDriverBuilder() \ .with_argument("no-sandbox") \ .with_argument("headless") \ .with_window_size(*CHROME_WINDOW_SIZE) \ .with_executable_path(CHROME_EXECUTABLE_PATH) \ .build() local_storage: LocalStorage = LocalStorage(driver)
driver.implicitly_wait(IMPLICITLY_WAIT_SECOND)
# 先访问入口页,否则无法设置 LocalStorage driver.get(ENTRY_PAGE_URL) # 设置 LocalStorage for k, v in CHROME_LOCAL_STORAGE.items(): local_storage.set(k, v) # 然后再访问入口页 print(f"准备访问入口页 {ENTRY_PAGE_URL}") driver.get(ENTRY_PAGE_URL) print(f"已访问入口页,等待页面加载") # 等待一会,保证页面加载完成 time.sleep(PAGE_WAIT_FOR_SECOND) # 截屏 driver.get_screenshot_as_file(f"{ENTRY_PAGE_NAME}.png") print(f"截屏入口页完成") # 提取元素 # element: WebElement = driver.find_element(By.CLASS_NAME, "XXX")
print(f"准备点击") clicked_element: WebElement = driver.find_element(By.XPATH, "//a[@href='XXX']") clicked_element.click() print(f"点击完成,等待页面加载") time.sleep(PAGE_WAIT_FOR_SECOND) driver.get_screenshot_as_file(f"XXX.png") print(f"截屏页面完成") # element: WebElement = driver.find_element(By.XPATH, "//*[@class='XXX']")
print(f"开始验证请求") # 验证请求 for request in driver.requests: path: str = urllib.parse.urlparse(request.url).path.lower() if re.search(r"\.(js|css|png|jpg|jpeg|svg)$", path): continue if not request.response: raise RuntimeError(f"no response for {request.url}") status_code: int = request.response.status_code if not 200 <= status_code < 400: raise RuntimeError(f"status code of {request.url} is {status_code}, 2XX or 3XX expected") print(f"{request.url} success") driver.close()
if __name__ == "__main__": main()selenium_utility/local_storage.py:
xxxxxxxxxxfrom typing import Any, Optional
class LocalStorage: """ 用于操作 LocalStorage """
def __init__(self, driver: Any) -> None: self._driver: Any = driver
def set(self, key: str, value: str) -> None: key = key.replace("`", "\\`") value = value.replace("`", "\\`") self._driver.execute_script(f""" window.localStorage.setItem(`{key}`, `{value}`); """)
def get(self, key: str) -> Optional[str]: key = key.replace("`", "\\`") return self._driver.execute_script(f""" return window.localStorage.getItem(`{key}`) """)selenium_utility/chrome_driver_builder.py:
xxxxxxxxxxfrom typing import Optional, Tuple
from seleniumwire import webdriverfrom selenium.webdriver.chrome.service import Service
class ChromeDriverBuilder: def __init__(self) -> None: self._options = webdriver.ChromeOptions() self._executable_path: Optional[str] = None self._window_size: Optional[Tuple[int, int]] = None
def with_argument(self, argument: str) -> "ChromeDriverBuilder": self._options.add_argument(argument) return self
def with_executable_path(self, executable_path: str) -> "ChromeDriverBuilder": self._executable_path = executable_path return self
def with_window_size(self, width: int, height: int) -> "ChromeDriverBuilder": self._window_size = (width, height) return self
def build(self) -> webdriver.Chrome: if self._executable_path is not None: service: Service = Service(executable_path=self._executable_path) else: service: Service = Service() driver: webdriver.Chrome = webdriver.Chrome(service=service, options=self._options) if self._window_size is not None: driver.set_window_size(*self._window_size) return driverselenium_utility/__init__.py:
x
from .chrome_driver_builder import ChromeDriverBuilderfrom .local_storage import LocalStorage