好的,下面给出完整的技术方案 + 可运行代码,技术栈选择:
Python + OpenAI SDK(调 LLM)
Playwright MCP(执行测试)
DSL 作为中间层(稳定性保障)
架构确认
自然语言
↓
Python (OpenAI SDK) → DSL JSON(带 JSON Schema 校验 + 重试)
↓
Playwright MCP Server(接收 DSL 指令逐步执行)项目结构
testgen/
├── config.py # 配置(模型、MCP地址等)
├── prompt.py # Prompt 模板
├── schema.py # DSL JSON Schema 定义
├── llm.py # LLM 调用 + 校验 + 重试
├── executor.py # Playwright MCP 执行器
├── runner.py # 主入口
├── selector_dict.json # 业务元素 → selector 映射
└── cases/ # 生成的 DSL 用例存储
└── login_success.json完整代码
config.py
import os
LLM_MODEL = "gpt-4o"
LLM_TEMPERATURE = 0.2 # 低温,输出稳定
LLM_MAX_RETRIES = 3 # Schema 校验失败最多重试次数
OPENAI_API_KEY = os.environ["OPENAI_API_KEY"]
# Playwright MCP Server(本地启动后的地址)
MCP_WS_URL = "ws://localhost:8931"
SELECTOR_DICT_PATH = "selector_dict.json"
CASES_DIR = "cases"schema.py — DSL 结构定义
from typing import Literal, Optional
from pydantic import BaseModel, field_validator
# ── 单个步骤 ──────────────────────────────────────────────
ActionType = Literal[
"navigate",
"input",
"click",
"assert_url",
"assert_text",
"assert_visible",
"wait_for",
"select",
"hover",
]
class Step(BaseModel):
action: ActionType
target: Optional[str] = None # 业务语义名,来自 selector_dict
value: Optional[str] = None # input 的值 / assert 的期望值
url: Optional[str] = None # navigate 专用
@field_validator("target")
@classmethod
def target_required_for_non_navigate(cls, v, info):
action = info.data.get("action")
if action not in ("navigate", "assert_url") and not v:
raise ValueError(f"action={action} 必须提供 target")
return v
@field_validator("url")
@classmethod
def url_required_for_navigate(cls, v, info):
if info.data.get("action") == "navigate" and not v:
raise ValueError("navigate 必须提供 url")
return v
# ── 整个用例 ──────────────────────────────────────────────
class TestCase(BaseModel):
test_name: str
description: str
steps: list[Step]
@field_validator("steps")
@classmethod
def must_have_assertion(cls, steps):
has_assert = any(s.action.startswith("assert") for s in steps)
if not has_assert:
raise ValueError("steps 中至少需要一个 assert_* 断言步骤")
return stepsprompt.py — Prompt 模板
import json
SYSTEM_PROMPT = """\
你是一个 Web 自动化测试用例生成器。
你的任务是将用户的自然语言操作描述,转换为结构化的测试 DSL(JSON 格式)。
规则(必须严格遵守):
1. 只输出合法 JSON,不输出任何解释、注释或 markdown 代码块
2. action 只能使用以下值之一:
navigate / input / click / assert_url / assert_text / assert_visible / wait_for / select / hover
3. target 字段必须使用"页面元素字典"中已有的业务名称,不允许自造
4. navigate 和 assert_url 不需要 target,只需要 url 或 value
5. 必须包含至少一个 assert_* 断言步骤
6. 描述中提到的每一个操作,都需要对应一个 step
输出 JSON 结构:
{
"test_name": "用例名称",
"description": "一句话描述该用例的测试目标",
"steps": [
{"action": "navigate", "url": "/login"},
{"action": "input", "target": "用户名输入框", "value": "test@example.com"},
{"action": "click", "target": "登录按钮"},
{"action": "assert_url", "value": "/dashboard"}
]
}
"""
FIX_PROMPT_TEMPLATE = """\
你之前生成的 JSON 有以下错误,请修复并重新输出合法的 JSON:
错误信息:
{errors}
你之前输出的内容:
{previous_output}
规则同上,只输出 JSON,不输出解释。
"""
def build_user_prompt(description: str, selector_dict: dict) -> str:
dict_str = json.dumps(selector_dict, ensure_ascii=False, indent=2)
return f"""\
页面元素字典(target 只能使用这里的名称):
{dict_str}
用户描述:
{description}
"""
def build_fix_prompt(errors: str, previous_output: str) -> str:
return FIX_PROMPT_TEMPLATE.format(
errors=errors,
previous_output=previous_output,
)llm.py — LLM 调用 + 校验 + 自动重试
import json
import re
from openai import OpenAI
from pydantic import ValidationError
from config import LLM_MODEL, LLM_TEMPERATURE, LLM_MAX_RETRIES, OPENAI_API_KEY
from schema import TestCase
from prompt import SYSTEM_PROMPT, build_user_prompt, build_fix_prompt
client = OpenAI(api_key=OPENAI_API_KEY)
def _extract_json(raw: str) -> str:
"""兼容 LLM 偶尔输出的 markdown 代码块"""
match = re.search(r"```(?:json)?\s*([\s\S]+?)```", raw)
return match.group(1).strip() if match else raw.strip()
def _call_llm(messages: list[dict]) -> str:
resp = client.chat.completions.create(
model=LLM_MODEL,
temperature=LLM_TEMPERATURE,
response_format={"type": "json_object"}, # 强制 JSON 模式
messages=messages,
)
return resp.choices[0].message.content
def generate_test_case(
description: str,
selector_dict: dict,
) -> TestCase:
"""
主入口:自然语言 → 校验后的 TestCase。
内置重试:校验失败时把错误信息反馈给 LLM 要求自修复。
"""
user_content = build_user_prompt(description, selector_dict)
messages = [
{"role": "system", "content": SYSTEM_PROMPT},
{"role": "user", "content": user_content},
]
last_raw = ""
last_errors = ""
for attempt in range(1, LLM_MAX_RETRIES + 1):
print(f"[LLM] 第 {attempt} 次生成...")
if attempt > 1:
# 把上次的错误反馈给 LLM,要求修复
fix_content = build_fix_prompt(last_errors, last_raw)
messages.append({"role": "assistant", "content": last_raw})
messages.append({"role": "user", "content": fix_content})
last_raw = _call_llm(messages)
json_str = _extract_json(last_raw)
try:
data = json.loads(json_str)
test_case = TestCase(**data)
print(f"[LLM] 校验通过 ✓({len(test_case.steps)} 个步骤)")
return test_case
except (json.JSONDecodeError, ValidationError, KeyError) as e:
last_errors = str(e)
print(f"[LLM] 校验失败(attempt {attempt}): {last_errors}")
raise RuntimeError(
f"LLM 连续 {LLM_MAX_RETRIES} 次生成无效 DSL,最后一次输出:\n{last_raw}"
)executor.py — Playwright MCP 执行器
"""
通过 Playwright MCP(stdio 模式)执行 DSL TestCase。
本地启动 MCP Server:
npx @playwright/mcp@latest --port 8931
或 stdio 模式(推荐,无需单独启动服务):
直接在代码中 subprocess 启动
"""
import asyncio
import json
import subprocess
import sys
from schema import TestCase, Step
# ── MCP 通信层(stdio JSON-RPC) ──────────────────────────
class MCPClient:
"""
通过 stdio 和 Playwright MCP Server 通信(JSON-RPC 2.0)。
"""
def __init__(self):
self._proc: subprocess.Popen | None = None
self._req_id = 0
def start(self):
self._proc = subprocess.Popen(
["npx", "@playwright/mcp@latest", "--headless"],
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=sys.stderr,
text=True,
bufsize=1,
)
# 等待 server 就绪
self._initialize()
def stop(self):
if self._proc:
self._proc.stdin.close()
self._proc.wait()
def _next_id(self) -> int:
self._req_id += 1
return self._req_id
def _send(self, method: str, params: dict) -> dict:
req = {
"jsonrpc": "2.0",
"id": self._next_id(),
"method": method,
"params": params,
}
line = json.dumps(req) + "\n"
self._proc.stdin.write(line)
self._proc.stdin.flush()
# 读取响应
raw = self._proc.stdout.readline()
resp = json.loads(raw)
if "error" in resp:
raise RuntimeError(f"MCP error: {resp['error']}")
return resp.get("result", {})
def _initialize(self):
self._send("initialize", {
"protocolVersion": "2024-11-05",
"clientInfo": {"name": "testgen", "version": "1.0"},
"capabilities": {},
})
self._send("notifications/initialized", {})
def call_tool(self, tool: str, arguments: dict) -> dict:
return self._send("tools/call", {
"name": tool,
"arguments": arguments,
})
# ── DSL → MCP 工具映射 ────────────────────────────────────
class PlaywrightMCPExecutor:
"""
将 TestCase DSL 翻译为 Playwright MCP 工具调用序列。
"""
def __init__(self, selector_dict: dict):
self.selector_dict = selector_dict
self.client = MCPClient()
def _sel(self, target: str) -> str:
"""业务名称 → CSS selector"""
sel = self.selector_dict.get(target)
if not sel:
raise KeyError(
f"selector_dict 中找不到元素「{target}」,"
"请先在 selector_dict.json 中添加映射。"
)
return sel
def _execute_step(self, step: Step):
c = self.client
action = step.action
if action == "navigate":
print(f" → navigate: {step.url}")
c.call_tool("browser_navigate", {"url": step.url})
elif action == "input":
sel = self._sel(step.target)
print(f" → input [{step.target}] = '{step.value}'")
c.call_tool("browser_fill", {"selector": sel, "value": step.value})
elif action == "click":
sel = self._sel(step.target)
print(f" → click [{step.target}]")
c.call_tool("browser_click", {"selector": sel})
elif action == "hover":
sel = self._sel(step.target)
print(f" → hover [{step.target}]")
c.call_tool("browser_hover", {"selector": sel})
elif action == "select":
sel = self._sel(step.target)
print(f" → select [{step.target}] = '{step.value}'")
c.call_tool("browser_select_option", {
"selector": sel, "value": step.value
})
elif action == "wait_for":
sel = self._sel(step.target)
print(f" → wait_for [{step.target}]")
c.call_tool("browser_wait_for_selector", {"selector": sel})
elif action == "assert_url":
print(f" → assert_url = '{step.value}'")
result = c.call_tool("browser_evaluate", {
"expression": "window.location.pathname"
})
actual = result.get("result", "")
assert step.value in actual, (
f"URL 断言失败:期望包含 '{step.value}',实际为 '{actual}'"
)
print(f" ✓ URL 匹配")
elif action == "assert_text":
sel = self._sel(step.target)
print(f" → assert_text [{step.target}] contains '{step.value}'")
result = c.call_tool("browser_evaluate", {
"expression": f"document.querySelector('{sel}')?.textContent"
})
actual = result.get("result", "") or ""
assert step.value in actual, (
f"文本断言失败:「{step.target}」期望包含 '{step.value}',"
f"实际为 '{actual}'"
)
print(f" ✓ 文本匹配")
elif action == "assert_visible":
sel = self._sel(step.target)
print(f" → assert_visible [{step.target}]")
result = c.call_tool("browser_evaluate", {
"expression": (
f"(() => {{"
f" const el = document.querySelector('{sel}');"
f" return el ? getComputedStyle(el).display !== 'none' : false;"
f"}})()"
)
})
assert result.get("result") is True, (
f"可见性断言失败:「{step.target}」不可见"
)
print(f" ✓ 元素可见")
def run(self, test_case: TestCase) -> dict:
"""执行完整用例,返回结果报告"""
print(f"\n▶ 开始执行:{test_case.test_name}")
print(f" {test_case.description}\n")
self.client.start()
results = []
passed = 0
failed = 0
try:
for i, step in enumerate(test_case.steps, 1):
print(f"[{i}/{len(test_case.steps)}]", end=" ")
try:
self._execute_step(step)
results.append({"step": i, "action": step.action,
"status": "pass"})
passed += 1
except AssertionError as e:
print(f" ✗ 断言失败: {e}")
results.append({"step": i, "action": step.action,
"status": "fail", "error": str(e)})
failed += 1
break # 断言失败即终止
except Exception as e:
print(f" ✗ 执行错误: {e}")
results.append({"step": i, "action": step.action,
"status": "error", "error": str(e)})
failed += 1
break
finally:
self.client.stop()
status = "PASSED" if failed == 0 else "FAILED"
print(f"\n{'✅' if failed == 0 else '❌'} 用例 {status}"
f"({passed} 通过 / {failed} 失败)")
return {
"test_name": test_case.test_name,
"status": status,
"passed": passed,
"failed": failed,
"steps": results,
}runner.py — 主入口(串联全流程)
import json
import os
import sys
from pathlib import Path
from datetime import datetime
from config import SELECTOR_DICT_PATH, CASES_DIR
from llm import generate_test_case
from executor import PlaywrightMCPExecutor
def load_selector_dict() -> dict:
with open(SELECTOR_DICT_PATH, encoding="utf-8") as f:
return json.load(f)
def save_case(test_case, output_dir: str = CASES_DIR):
Path(output_dir).mkdir(exist_ok=True)
filename = (
test_case.test_name.replace(" ", "_").replace("/", "-")
+ f"_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
)
path = Path(output_dir) / filename
path.write_text(
test_case.model_dump_json(indent=2, exclude_none=True),
encoding="utf-8",
)
print(f"[保存] 用例已写入 {path}")
return path
def run(description: str, execute: bool = True):
# 1. 加载 selector 字典
selector_dict = load_selector_dict()
# 2. LLM 生成 DSL(含校验+重试)
test_case = generate_test_case(description, selector_dict)
# 3. 打印生成的 DSL
print("\n[DSL 预览]")
print(test_case.model_dump_json(indent=2, exclude_none=True))
# 4. 保存用例
save_case(test_case)
# 5. 可选:执行
if execute:
executor = PlaywrightMCPExecutor(selector_dict)
report = executor.run(test_case)
return report
return {"test_name": test_case.test_name, "status": "generated_only"}
# ── CLI 入口 ──────────────────────────────────────────────
if __name__ == "__main__":
if len(sys.argv) < 2:
print("用法: python runner.py '操作描述' [--no-exec]")
sys.exit(1)
description = sys.argv[1]
execute = "--no-exec" not in sys.argv
result = run(description, execute=execute)
print(f"\n最终结果: {result['status']}")selector_dict.json — 元素字典(按项目维护)
{
"用户名输入框": "[data-testid='username']",
"密码输入框": "[data-testid='password']",
"登录按钮": "[data-testid='login-btn']",
"注册按钮": "[data-testid='register-btn']",
"搜索输入框": "[data-testid='search-input']",
"搜索按钮": "[data-testid='search-btn']",
"购物车按钮": "[data-testid='cart-btn']",
"商品数量输入": "[data-testid='qty-input']",
"提交按钮": "[data-testid='submit-btn']",
"错误提示": ".error-message",
"成功提示": ".toast-success",
"欢迎信息": ".welcome-banner"
}运行方式
# 安装依赖
pip install openai pydantic
# 安装 Playwright MCP
npm install -g @playwright/mcp
# 只生成 DSL,不执行
python runner.py "打开登录页,输入邮箱 test@test.com 和密码 123456,点击登录,验证跳转到 /dashboard" --no-exec
# 生成 + 执行
python runner.py "打开登录页,输入邮箱 test@test.com 和密码 123456,点击登录,验证跳转到 /dashboard"一次完整运行的控制台输出示例
[LLM] 第 1 次生成...
[LLM] 校验通过 ✓(4 个步骤)
[DSL 预览]
{
"test_name": "登录成功",
"description": "验证使用正确的用户名和密码能够成功登录并跳转到 Dashboard",
"steps": [
{"action": "navigate", "url": "/login"},
{"action": "input", "target": "用户名输入框", "value": "test@test.com"},
{"action": "input", "target": "密码输入框", "value": "123456"},
{"action": "click", "target": "登录按钮"},
{"action": "assert_url", "value": "/dashboard"}
]
}
[保存] 用例已写入 cases/登录成功_20250403_143021.json
▶ 开始执行:登录成功
验证使用正确的用户名和密码能够成功登录并跳转到 Dashboard
[1/5] → navigate: /login
[2/5] → input [用户名输入框] = 'test@test.com'
[3/5] → input [密码输入框] = '123456'
[4/5] → click [登录按钮]
[5/5] → assert_url = '/dashboard'
✓ URL 匹配
✅ 用例 PASSED(5 通过 / 0 失败)三阶段对应关系
阶段
在代码中的对应位置
工作量
Phase 1
llm.py
+
executor.py
+
selector_dict.json
1~2天
Phase 2
schema.py
的 validator +
llm.py
的重试逻辑 +
cases/
存储
追加1周
Phase 3
在
executor.py
中加 DOM 采集 → 自动更新 selector_dict
按需扩展
整个系统 不超过 300 行核心代码,DSL 层保证了可扩展性——后续无论是换模型、换执行框架、还是加 Agent,都只需改对应模块,不影响其他部分。