创建工作目录,虚拟环境,安装 Playwright
mkdir playwright-demo
cd playwright-demo
python3 -m venv venv
venv/bin/pip3 install pytest-playwright playwright
venv/bin/playwright install
创建 auth.py,其内容如下:
import optparse
from playwright.sync_api import sync_playwright, expect, Browser, BrowserContext, Page
def run(output: str, login_url: str, selector: str, timeout: float) -> None:
with sync_playwright() as p:
browser: Browser = p.chromium.launch(headless=False)
ctx: BrowserContext = browser.new_context()
page: Page = ctx.new_page()
page.goto(login_url)
expect(page.locator(selector)).to_be_visible(timeout=timeout)
ctx.storage_state(path=output)
if __name__ == "__main__":
parser: optparse.OptionParser = optparse.OptionParser(usage="python3 %prog <options>")
parser.add_option("-o", "--output", type=str, default="state.json", help="output file. default: state.json")
parser.add_option("-l", "--login-url", type=str, default="https://bilibili.com/", help="login url")
parser.add_option("-s", "--selector", type=str,
default='//img[@class="bili-avatar-img bili-avatar-face bili-avatar-img-radius"]',
help="the selector to wait for")
parser.add_option("-t", "--timeout", type=float, default=10 * 60 * 1000,
help="the timeout to wait for. default: 10min. unit: millisecond")
options, _ = parser.parse_args()
if options.login_url is None:
parser.error("no Login URL specified")
if options.selector is None:
parser.error("no Selector specified")
run(options.output, options.login_url, options.selector, options.timeout)
print(f"Authentication state was store in {options.output}")
创建 main.py,其内容如下:
import optparse
import re
import typing
import urllib.parse
from playwright.sync_api import (
sync_playwright,
Browser,
BrowserContext,
Page,
Request
)
def on_request(request: Request) -> None:
path: str = urllib.parse.urlparse(request.url).path
# 忽略图片、js、css
if re.search(r"\.(jpg|jgpeg|svg|png|js|css)$", path.lower()) or request.url.startswith("blob:"):
return
print(f"{request.method} {request.url}")
def on_request_failure(request: Request) -> None:
print(f"{request.url} {request.failure}")
def on_request_finished(request: Request) -> None:
path: str = urllib.parse.urlparse(request.url).path
# 忽略图片、js、css
if re.search(r"\.(jpg|jgpeg|svg|png|js|css)$", path.lower()) or request.url.startswith("blob:"):
return
print(f"{request.method} {request.url} got {request.response().status}")
def main(storage_state: str,
tracing_path: str,
record_video_dir: typing.Optional[str] = None) -> None:
"""
:param storage_state: 保存认证状态的文件
:param tracing_path: 保存 trace 的文件
:param record_video_dir: 保存视频的目录
"""
with sync_playwright() as p:
browser: Browser = p.chromium.launch()
context: BrowserContext = browser.new_context(
storage_state=storage_state,
record_video_dir=record_video_dir
)
context.tracing.start(screenshots=True, snapshots=True, sources=True)
page: Page = context.new_page()
page.on("request", on_request)
page.on("requestfailed", on_request_failure)
page.on("requestfinished", on_request_finished)
# 首先进入预览页
print("开始请求首页")
page.goto("https://bilibili.com/")
print("点击头像,进入我的空间")
page.locator("//li[@class='v-popover-wrap header-avatar-wrap']").click(timeout=5000)
print("等待新页面被创建")
context.wait_for_event("page", timeout=5000)
last_page: Page = context.pages[-1]
print(f"新页面地址:{last_page.url}")
last_page.locator('//*[@id="navigator"]/div/div[1]/div[1]/a[2]/span[2]').click()
last_page.wait_for_timeout(2000)
last_page.screenshot(timeout=5000, path="myspace.jpg")
context.tracing.stop(path=tracing_path)
context.close()
browser.close()
if __name__ == "__main__":
parser: optparse.OptionParser = optparse.OptionParser(usage="python3 %prog options")
parser.add_option("-s", "--storage-state", type=str, default="state.json", help="storage state file")
parser.add_option("-t", "--tracing-path", type=str, default="tracing.zip", help="tracing path")
parser.add_option("-r", "--record-video-dir", type=str, default=".",
help="Enables video recording for all pages into the specified directory")
options, _ = parser.parse_args()
main(options.storage_state, options.tracing_path, options.record_video_dir)
生成认证状态文件:
venv/bin/python auth.py
# 在浏览器中,手工登陆
# 登陆成功后,终端中输出:
# Authentication state was store in state.json
运行 main.py:
venv/bin/python main.py
# 执行完成后,将在当前目录下得到:
# 1. tracing.zip - 追踪文件
# 2. *.webm - 测试过程的录像(每个 Tab 一个)