好的,下面给出完整的技术方案 + 可运行代码,技术栈选择:

架构确认

自然语言
  ↓
Python (OpenAI SDK) → DSL JSON(带 JSON Schema 校验 + 重试)
  ↓
Playwright MCP Server(接收 DSL 指令逐步执行)

项目结构

testgen/
├── config.py              # 配置(模型、MCP地址等)
├── prompt.py              # Prompt 模板
├── schema.py              # DSL JSON Schema 定义
├── llm.py                 # LLM 调用 + 校验 + 重试
├── executor.py            # Playwright MCP 执行器
├── runner.py              # 主入口
├── selector_dict.json     # 业务元素 → selector 映射
└── cases/                 # 生成的 DSL 用例存储
    └── login_success.json

完整代码

config.py

import os

LLM_MODEL       = "gpt-4o"
LLM_TEMPERATURE = 0.2          # 低温,输出稳定
LLM_MAX_RETRIES = 3            # Schema 校验失败最多重试次数

OPENAI_API_KEY  = os.environ["OPENAI_API_KEY"]

# Playwright MCP Server(本地启动后的地址)
MCP_WS_URL      = "ws://localhost:8931"

SELECTOR_DICT_PATH = "selector_dict.json"
CASES_DIR          = "cases"

schema.py — DSL 结构定义

from typing import Literal, Optional
from pydantic import BaseModel, field_validator

# ── 单个步骤 ──────────────────────────────────────────────
ActionType = Literal[
    "navigate",
    "input",
    "click",
    "assert_url",
    "assert_text",
    "assert_visible",
    "wait_for",
    "select",
    "hover",
]

class Step(BaseModel):
    action: ActionType
    target: Optional[str] = None   # 业务语义名,来自 selector_dict
    value:  Optional[str] = None   # input 的值 / assert 的期望值
    url:    Optional[str] = None   # navigate 专用

    @field_validator("target")
    @classmethod
    def target_required_for_non_navigate(cls, v, info):
        action = info.data.get("action")
        if action not in ("navigate", "assert_url") and not v:
            raise ValueError(f"action={action} 必须提供 target")
        return v

    @field_validator("url")
    @classmethod
    def url_required_for_navigate(cls, v, info):
        if info.data.get("action") == "navigate" and not v:
            raise ValueError("navigate 必须提供 url")
        return v

# ── 整个用例 ──────────────────────────────────────────────
class TestCase(BaseModel):
    test_name:   str
    description: str
    steps:       list[Step]

    @field_validator("steps")
    @classmethod
    def must_have_assertion(cls, steps):
        has_assert = any(s.action.startswith("assert") for s in steps)
        if not has_assert:
            raise ValueError("steps 中至少需要一个 assert_* 断言步骤")
        return steps

prompt.py — Prompt 模板

import json

SYSTEM_PROMPT = """\
你是一个 Web 自动化测试用例生成器。
你的任务是将用户的自然语言操作描述,转换为结构化的测试 DSL(JSON 格式)。

规则(必须严格遵守):
1. 只输出合法 JSON,不输出任何解释、注释或 markdown 代码块
2. action 只能使用以下值之一:
   navigate / input / click / assert_url / assert_text / assert_visible / wait_for / select / hover
3. target 字段必须使用"页面元素字典"中已有的业务名称,不允许自造
4. navigate 和 assert_url 不需要 target,只需要 url 或 value
5. 必须包含至少一个 assert_* 断言步骤
6. 描述中提到的每一个操作,都需要对应一个 step

输出 JSON 结构:
{
  "test_name": "用例名称",
  "description": "一句话描述该用例的测试目标",
  "steps": [
    {"action": "navigate", "url": "/login"},
    {"action": "input",    "target": "用户名输入框", "value": "test@example.com"},
    {"action": "click",    "target": "登录按钮"},
    {"action": "assert_url", "value": "/dashboard"}
  ]
}
"""

FIX_PROMPT_TEMPLATE = """\
你之前生成的 JSON 有以下错误,请修复并重新输出合法的 JSON:

错误信息:
{errors}

你之前输出的内容:
{previous_output}

规则同上,只输出 JSON,不输出解释。
"""

def build_user_prompt(description: str, selector_dict: dict) -> str:
    dict_str = json.dumps(selector_dict, ensure_ascii=False, indent=2)
    return f"""\
页面元素字典(target 只能使用这里的名称):
{dict_str}

用户描述:
{description}
"""

def build_fix_prompt(errors: str, previous_output: str) -> str:
    return FIX_PROMPT_TEMPLATE.format(
        errors=errors,
        previous_output=previous_output,
    )

llm.py — LLM 调用 + 校验 + 自动重试

import json
import re
from openai import OpenAI
from pydantic import ValidationError

from config import LLM_MODEL, LLM_TEMPERATURE, LLM_MAX_RETRIES, OPENAI_API_KEY
from schema import TestCase
from prompt import SYSTEM_PROMPT, build_user_prompt, build_fix_prompt

client = OpenAI(api_key=OPENAI_API_KEY)


def _extract_json(raw: str) -> str:
    """兼容 LLM 偶尔输出的 markdown 代码块"""
    match = re.search(r"```(?:json)?\s*([\s\S]+?)```", raw)
    return match.group(1).strip() if match else raw.strip()


def _call_llm(messages: list[dict]) -> str:
    resp = client.chat.completions.create(
        model=LLM_MODEL,
        temperature=LLM_TEMPERATURE,
        response_format={"type": "json_object"},   # 强制 JSON 模式
        messages=messages,
    )
    return resp.choices[0].message.content


def generate_test_case(
    description: str,
    selector_dict: dict,
) -> TestCase:
    """
    主入口:自然语言 → 校验后的 TestCase。
    内置重试:校验失败时把错误信息反馈给 LLM 要求自修复。
    """
    user_content = build_user_prompt(description, selector_dict)
    messages = [
        {"role": "system", "content": SYSTEM_PROMPT},
        {"role": "user",   "content": user_content},
    ]

    last_raw = ""
    last_errors = ""

    for attempt in range(1, LLM_MAX_RETRIES + 1):
        print(f"[LLM] 第 {attempt} 次生成...")

        if attempt > 1:
            # 把上次的错误反馈给 LLM,要求修复
            fix_content = build_fix_prompt(last_errors, last_raw)
            messages.append({"role": "assistant", "content": last_raw})
            messages.append({"role": "user",      "content": fix_content})

        last_raw = _call_llm(messages)
        json_str = _extract_json(last_raw)

        try:
            data = json.loads(json_str)
            test_case = TestCase(**data)
            print(f"[LLM] 校验通过 ✓({len(test_case.steps)} 个步骤)")
            return test_case

        except (json.JSONDecodeError, ValidationError, KeyError) as e:
            last_errors = str(e)
            print(f"[LLM] 校验失败(attempt {attempt}): {last_errors}")

    raise RuntimeError(
        f"LLM 连续 {LLM_MAX_RETRIES} 次生成无效 DSL,最后一次输出:\n{last_raw}"
    )

executor.py — Playwright MCP 执行器

"""
通过 Playwright MCP(stdio 模式)执行 DSL TestCase。

本地启动 MCP Server:
  npx @playwright/mcp@latest --port 8931
  
或 stdio 模式(推荐,无需单独启动服务):
  直接在代码中 subprocess 启动
"""

import asyncio
import json
import subprocess
import sys
from schema import TestCase, Step


# ── MCP 通信层(stdio JSON-RPC) ──────────────────────────

class MCPClient:
    """
    通过 stdio 和 Playwright MCP Server 通信(JSON-RPC 2.0)。
    """

    def __init__(self):
        self._proc: subprocess.Popen | None = None
        self._req_id = 0

    def start(self):
        self._proc = subprocess.Popen(
            ["npx", "@playwright/mcp@latest", "--headless"],
            stdin=subprocess.PIPE,
            stdout=subprocess.PIPE,
            stderr=sys.stderr,
            text=True,
            bufsize=1,
        )
        # 等待 server 就绪
        self._initialize()

    def stop(self):
        if self._proc:
            self._proc.stdin.close()
            self._proc.wait()

    def _next_id(self) -> int:
        self._req_id += 1
        return self._req_id

    def _send(self, method: str, params: dict) -> dict:
        req = {
            "jsonrpc": "2.0",
            "id": self._next_id(),
            "method": method,
            "params": params,
        }
        line = json.dumps(req) + "\n"
        self._proc.stdin.write(line)
        self._proc.stdin.flush()

        # 读取响应
        raw = self._proc.stdout.readline()
        resp = json.loads(raw)

        if "error" in resp:
            raise RuntimeError(f"MCP error: {resp['error']}")
        return resp.get("result", {})

    def _initialize(self):
        self._send("initialize", {
            "protocolVersion": "2024-11-05",
            "clientInfo": {"name": "testgen", "version": "1.0"},
            "capabilities": {},
        })
        self._send("notifications/initialized", {})

    def call_tool(self, tool: str, arguments: dict) -> dict:
        return self._send("tools/call", {
            "name": tool,
            "arguments": arguments,
        })


# ── DSL → MCP 工具映射 ────────────────────────────────────

class PlaywrightMCPExecutor:
    """
    将 TestCase DSL 翻译为 Playwright MCP 工具调用序列。
    """

    def __init__(self, selector_dict: dict):
        self.selector_dict = selector_dict
        self.client = MCPClient()

    def _sel(self, target: str) -> str:
        """业务名称 → CSS selector"""
        sel = self.selector_dict.get(target)
        if not sel:
            raise KeyError(
                f"selector_dict 中找不到元素「{target}」,"
                "请先在 selector_dict.json 中添加映射。"
            )
        return sel

    def _execute_step(self, step: Step):
        c = self.client
        action = step.action

        if action == "navigate":
            print(f"  → navigate: {step.url}")
            c.call_tool("browser_navigate", {"url": step.url})

        elif action == "input":
            sel = self._sel(step.target)
            print(f"  → input [{step.target}] = '{step.value}'")
            c.call_tool("browser_fill", {"selector": sel, "value": step.value})

        elif action == "click":
            sel = self._sel(step.target)
            print(f"  → click [{step.target}]")
            c.call_tool("browser_click", {"selector": sel})

        elif action == "hover":
            sel = self._sel(step.target)
            print(f"  → hover [{step.target}]")
            c.call_tool("browser_hover", {"selector": sel})

        elif action == "select":
            sel = self._sel(step.target)
            print(f"  → select [{step.target}] = '{step.value}'")
            c.call_tool("browser_select_option", {
                "selector": sel, "value": step.value
            })

        elif action == "wait_for":
            sel = self._sel(step.target)
            print(f"  → wait_for [{step.target}]")
            c.call_tool("browser_wait_for_selector", {"selector": sel})

        elif action == "assert_url":
            print(f"  → assert_url = '{step.value}'")
            result = c.call_tool("browser_evaluate", {
                "expression": "window.location.pathname"
            })
            actual = result.get("result", "")
            assert step.value in actual, (
                f"URL 断言失败:期望包含 '{step.value}',实际为 '{actual}'"
            )
            print(f"     ✓ URL 匹配")

        elif action == "assert_text":
            sel = self._sel(step.target)
            print(f"  → assert_text [{step.target}] contains '{step.value}'")
            result = c.call_tool("browser_evaluate", {
                "expression": f"document.querySelector('{sel}')?.textContent"
            })
            actual = result.get("result", "") or ""
            assert step.value in actual, (
                f"文本断言失败:「{step.target}」期望包含 '{step.value}',"
                f"实际为 '{actual}'"
            )
            print(f"     ✓ 文本匹配")

        elif action == "assert_visible":
            sel = self._sel(step.target)
            print(f"  → assert_visible [{step.target}]")
            result = c.call_tool("browser_evaluate", {
                "expression": (
                    f"(() => {{"
                    f"  const el = document.querySelector('{sel}');"
                    f"  return el ? getComputedStyle(el).display !== 'none' : false;"
                    f"}})()"
                )
            })
            assert result.get("result") is True, (
                f"可见性断言失败:「{step.target}」不可见"
            )
            print(f"     ✓ 元素可见")

    def run(self, test_case: TestCase) -> dict:
        """执行完整用例,返回结果报告"""
        print(f"\n▶ 开始执行:{test_case.test_name}")
        print(f"  {test_case.description}\n")

        self.client.start()
        results = []
        passed = 0
        failed = 0

        try:
            for i, step in enumerate(test_case.steps, 1):
                print(f"[{i}/{len(test_case.steps)}]", end=" ")
                try:
                    self._execute_step(step)
                    results.append({"step": i, "action": step.action,
                                    "status": "pass"})
                    passed += 1
                except AssertionError as e:
                    print(f"     ✗ 断言失败: {e}")
                    results.append({"step": i, "action": step.action,
                                    "status": "fail", "error": str(e)})
                    failed += 1
                    break  # 断言失败即终止
                except Exception as e:
                    print(f"     ✗ 执行错误: {e}")
                    results.append({"step": i, "action": step.action,
                                    "status": "error", "error": str(e)})
                    failed += 1
                    break
        finally:
            self.client.stop()

        status = "PASSED" if failed == 0 else "FAILED"
        print(f"\n{'✅' if failed == 0 else '❌'} 用例 {status}"
              f"({passed} 通过 / {failed} 失败)")

        return {
            "test_name": test_case.test_name,
            "status": status,
            "passed": passed,
            "failed": failed,
            "steps": results,
        }

runner.py — 主入口(串联全流程)

import json
import os
import sys
from pathlib import Path
from datetime import datetime

from config import SELECTOR_DICT_PATH, CASES_DIR
from llm import generate_test_case
from executor import PlaywrightMCPExecutor


def load_selector_dict() -> dict:
    with open(SELECTOR_DICT_PATH, encoding="utf-8") as f:
        return json.load(f)


def save_case(test_case, output_dir: str = CASES_DIR):
    Path(output_dir).mkdir(exist_ok=True)
    filename = (
        test_case.test_name.replace(" ", "_").replace("/", "-")
        + f"_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
    )
    path = Path(output_dir) / filename
    path.write_text(
        test_case.model_dump_json(indent=2, exclude_none=True),
        encoding="utf-8",
    )
    print(f"[保存] 用例已写入 {path}")
    return path


def run(description: str, execute: bool = True):
    # 1. 加载 selector 字典
    selector_dict = load_selector_dict()

    # 2. LLM 生成 DSL(含校验+重试)
    test_case = generate_test_case(description, selector_dict)

    # 3. 打印生成的 DSL
    print("\n[DSL 预览]")
    print(test_case.model_dump_json(indent=2, exclude_none=True))

    # 4. 保存用例
    save_case(test_case)

    # 5. 可选:执行
    if execute:
        executor = PlaywrightMCPExecutor(selector_dict)
        report = executor.run(test_case)
        return report

    return {"test_name": test_case.test_name, "status": "generated_only"}


# ── CLI 入口 ──────────────────────────────────────────────
if __name__ == "__main__":
    if len(sys.argv) < 2:
        print("用法: python runner.py '操作描述' [--no-exec]")
        sys.exit(1)

    description = sys.argv[1]
    execute = "--no-exec" not in sys.argv

    result = run(description, execute=execute)
    print(f"\n最终结果: {result['status']}")

selector_dict.json — 元素字典(按项目维护)

{
  "用户名输入框":  "[data-testid='username']",
  "密码输入框":   "[data-testid='password']",
  "登录按钮":    "[data-testid='login-btn']",
  "注册按钮":    "[data-testid='register-btn']",
  "搜索输入框":   "[data-testid='search-input']",
  "搜索按钮":    "[data-testid='search-btn']",
  "购物车按钮":   "[data-testid='cart-btn']",
  "商品数量输入": "[data-testid='qty-input']",
  "提交按钮":    "[data-testid='submit-btn']",
  "错误提示":    ".error-message",
  "成功提示":    ".toast-success",
  "欢迎信息":    ".welcome-banner"
}

运行方式

# 安装依赖
pip install openai pydantic

# 安装 Playwright MCP
npm install -g @playwright/mcp

# 只生成 DSL,不执行
python runner.py "打开登录页,输入邮箱 test@test.com 和密码 123456,点击登录,验证跳转到 /dashboard" --no-exec

# 生成 + 执行
python runner.py "打开登录页,输入邮箱 test@test.com 和密码 123456,点击登录,验证跳转到 /dashboard"

一次完整运行的控制台输出示例

[LLM] 第 1 次生成...
[LLM] 校验通过 ✓(4 个步骤)

[DSL 预览]
{
  "test_name": "登录成功",
  "description": "验证使用正确的用户名和密码能够成功登录并跳转到 Dashboard",
  "steps": [
    {"action": "navigate", "url": "/login"},
    {"action": "input", "target": "用户名输入框", "value": "test@test.com"},
    {"action": "input", "target": "密码输入框",   "value": "123456"},
    {"action": "click", "target": "登录按钮"},
    {"action": "assert_url", "value": "/dashboard"}
  ]
}

[保存] 用例已写入 cases/登录成功_20250403_143021.json

▶ 开始执行:登录成功
  验证使用正确的用户名和密码能够成功登录并跳转到 Dashboard

[1/5] → navigate: /login
[2/5] → input [用户名输入框] = 'test@test.com'
[3/5] → input [密码输入框] = '123456'
[4/5] → click [登录按钮]
[5/5] → assert_url = '/dashboard'
         ✓ URL 匹配

✅ 用例 PASSED(5 通过 / 0 失败)

三阶段对应关系

阶段

在代码中的对应位置

工作量

Phase 1

llm.py

+

executor.py

+

selector_dict.json

1~2天

Phase 2

schema.py

的 validator +

llm.py

的重试逻辑 +

cases/

存储

追加1周

Phase 3

executor.py

中加 DOM 采集 → 自动更新 selector_dict

按需扩展

整个系统 不超过 300 行核心代码,DSL 层保证了可扩展性——后续无论是换模型、换执行框架、还是加 Agent,都只需改对应模块,不影响其他部分。