下载地址:
我下载的是 106.0.5249.61。
下载的 Chrome Driver 需要匹配自己的电脑安装的 Chrome,否则无法启动
验证是否安装成功:
% ./chromedriver
Starting ChromeDriver 106.0.5249.61 (511755355844955cd3e264779baf0dd38212a4d0-refs/branch-heads/5249@{#569}) on port 9515
Only local connections are allowed.
Please see https://chromedriver.chromium.org/security-considerations for suggestions on keeping ChromeDriver safe.
ChromeDriver was started successfully.
创建项目、安装依赖:
mkdir selenium-demo
cd selenium-demo
python3 -m venv venv
venv/bin/pip3 install selenium-wire==5.1.0
将 Chrome Driver 移动到项目目录下。
项目结构:
x.
├── chromedriver
├── configuration.py
├── main.py
└── selenium_utility
├── __init__.py
├── chrome_driver_builder.py
└── local_storage.py
其中:
configuration.py:
x
from typing import Tuple, Mapping
CHROME_WINDOW_SIZE: Tuple[int, int] = (2304, 1440)
CHROME_EXECUTABLE_PATH: str = "./chromedriver"
CHROME_LOCAL_STORAGE: Mapping[str, str] = {
# 在这里设置 Local Storage
}
# 等待元素被发现或命令完成的时间,单位是秒
IMPLICITLY_WAIT_SECOND: float = 20
PAGE_WAIT_FOR_SECOND: float = 10
# 在这里设置入口页面
ENTRY_PAGE_URL: str = ...
# 在这里设置入口页面的名字
ENTRY_PAGE_NAME: str = ...
main.py:
x
import re
import time
import urllib.parse
from seleniumwire import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.remote.webelement import WebElement
from selenium_utility import (
LocalStorage,
ChromeDriverBuilder
)
from configuration import *
def main():
# 调试时,可以不指定 headless 参数
driver: webdriver.Chrome = ChromeDriverBuilder() \
.with_argument("no-sandbox") \
.with_argument("headless") \
.with_window_size(*CHROME_WINDOW_SIZE) \
.with_executable_path(CHROME_EXECUTABLE_PATH) \
.build()
local_storage: LocalStorage = LocalStorage(driver)
driver.implicitly_wait(IMPLICITLY_WAIT_SECOND)
# 先访问入口页,否则无法设置 LocalStorage
driver.get(ENTRY_PAGE_URL)
# 设置 LocalStorage
for k, v in CHROME_LOCAL_STORAGE.items():
local_storage.set(k, v)
# 然后再访问入口页
print(f"准备访问入口页 {ENTRY_PAGE_URL}")
driver.get(ENTRY_PAGE_URL)
print(f"已访问入口页,等待页面加载")
# 等待一会,保证页面加载完成
time.sleep(PAGE_WAIT_FOR_SECOND)
# 截屏
driver.get_screenshot_as_file(f"{ENTRY_PAGE_NAME}.png")
print(f"截屏入口页完成")
# 提取元素
# element: WebElement = driver.find_element(By.CLASS_NAME, "XXX")
print(f"准备点击")
clicked_element: WebElement = driver.find_element(By.XPATH, "//a[@href='XXX']")
clicked_element.click()
print(f"点击完成,等待页面加载")
time.sleep(PAGE_WAIT_FOR_SECOND)
driver.get_screenshot_as_file(f"XXX.png")
print(f"截屏页面完成")
# element: WebElement = driver.find_element(By.XPATH, "//*[@class='XXX']")
print(f"开始验证请求")
# 验证请求
for request in driver.requests:
path: str = urllib.parse.urlparse(request.url).path.lower()
if re.search(r"\.(js|css|png|jpg|jpeg|svg)$", path):
continue
if not request.response:
raise RuntimeError(f"no response for {request.url}")
status_code: int = request.response.status_code
if not 200 <= status_code < 400:
raise RuntimeError(f"status code of {request.url} is {status_code}, 2XX or 3XX expected")
print(f"{request.url} success")
driver.close()
if __name__ == "__main__":
main()
selenium_utility/local_storage.py:
xxxxxxxxxx
from typing import Any, Optional
class LocalStorage:
"""
用于操作 LocalStorage
"""
def __init__(self, driver: Any) -> None:
self._driver: Any = driver
def set(self, key: str, value: str) -> None:
key = key.replace("`", "\\`")
value = value.replace("`", "\\`")
self._driver.execute_script(f"""
window.localStorage.setItem(`{key}`, `{value}`);
""")
def get(self, key: str) -> Optional[str]:
key = key.replace("`", "\\`")
return self._driver.execute_script(f"""
return window.localStorage.getItem(`{key}`)
""")
selenium_utility/chrome_driver_builder.py:
xxxxxxxxxx
from typing import Optional, Tuple
from seleniumwire import webdriver
from selenium.webdriver.chrome.service import Service
class ChromeDriverBuilder:
def __init__(self) -> None:
self._options = webdriver.ChromeOptions()
self._executable_path: Optional[str] = None
self._window_size: Optional[Tuple[int, int]] = None
def with_argument(self, argument: str) -> "ChromeDriverBuilder":
self._options.add_argument(argument)
return self
def with_executable_path(self, executable_path: str) -> "ChromeDriverBuilder":
self._executable_path = executable_path
return self
def with_window_size(self, width: int, height: int) -> "ChromeDriverBuilder":
self._window_size = (width, height)
return self
def build(self) -> webdriver.Chrome:
if self._executable_path is not None:
service: Service = Service(executable_path=self._executable_path)
else:
service: Service = Service()
driver: webdriver.Chrome = webdriver.Chrome(service=service, options=self._options)
if self._window_size is not None:
driver.set_window_size(*self._window_size)
return driver
selenium_utility/__init__.py:
x
from .chrome_driver_builder import ChromeDriverBuilder
from .local_storage import LocalStorage