Initial commit

This commit is contained in:
Your Name
2026-02-05 16:25:52 +08:00
commit d5ea866eb4
178 changed files with 32681 additions and 0 deletions

View File

@@ -0,0 +1,116 @@
import os
import sys
from pathlib import Path
from typing import Dict, Any, Optional
from planner.mineru_client import MinerUClient
import dotenv
_ENV_PATH = Path(__file__).resolve().parents[2] / ".env"
dotenv.load_dotenv(dotenv_path=_ENV_PATH, override=False)
TEXT_EXTS = {".txt", ".md", ".markdown"}
DOC_EXTS = {".pdf", ".docx", ".doc", ".xlsx", ".xls", ".pptx"}
IMAGE_EXTS = {".png", ".jpg", ".jpeg", ".bmp", ".tif", ".tiff", ".webp"}
def _get_mineru_token() -> str:
token = os.getenv("MinerU_API") or os.getenv("MinerU_API_KEY")
if token:
return token
if os.getenv("MinerU_URL"):
return ""
raise ValueError("MinerU_API 环境变量未设置")
def _read_text_file(file_path: Path) -> str:
with open(file_path, "r", encoding="utf-8") as f:
return f.read().strip()
def _extract_title_and_content(text: str, fallback_title: str) -> Dict[str, str]:
if not text:
return {"title": fallback_title, "content": ""}
lines = [line.strip() for line in text.splitlines() if line.strip()]
if not lines:
return {"title": fallback_title, "content": text}
first = lines[0]
if first.startswith("#"):
title = first.lstrip("#").strip() or fallback_title
content_start_index = text.find(lines[1]) if len(lines) > 1 else -1
content = text[content_start_index:].strip() if content_start_index != -1 else ""
return {"title": title, "content": content or text}
title = first
content = "\n".join(lines[1:]).strip()
return {"title": title, "content": content}
def _ensure_markdown(text: str, title: str) -> str:
if not text:
return f"# {title}\n"
stripped = text.lstrip()
if stripped.startswith("#"):
return text
return f"# {title}\n\n{text}"
def parse_intent_file(input_path: str, save_dir: str = "planner\\mineru_result") -> Dict[str, Any]:
"""
解析意图编制导入文件:
- txt/md: 直接读取
- pdf/图片: 使用 MinerU 解析,返回 JSON 与解析文本
"""
path = Path(input_path)
if not path.exists() or not path.is_file():
raise FileNotFoundError(f"输入文件不存在: {input_path}")
suffix = path.suffix.lower()
file_title = path.stem
if suffix in TEXT_EXTS:
content = _read_text_file(path)
result = _extract_title_and_content(content, file_title)
return {"source": str(path), "type": "text", **result, "raw_result": None}
if suffix in IMAGE_EXTS or suffix == ".pdf":
mineru_token = _get_mineru_token()
client = MinerUClient(token=mineru_token)
result_json = client.parse_file(str(path))
content: Optional[str] = None
if os.getenv("MinerU_URL"):
file_key = path.stem
result_obj = (result_json or {}).get("results", {}).get(file_key, {})
content = result_obj.get("md_content") or ""
else:
file_result_dir = os.path.join(save_dir, path.stem)
folder = client.download_result(save_dir=file_result_dir)
md_path = Path(folder) / "full.md"
txt_path = Path(folder) / "merged_text.txt"
if md_path.exists():
content = _read_text_file(md_path)
elif txt_path.exists():
content = _read_text_file(txt_path)
else:
for tf in Path(folder).rglob("*.*"):
if tf.suffix.lower() in [".md", ".txt"]:
content = _read_text_file(tf)
break
if suffix == ".pdf":
content = _ensure_markdown(content or "", file_title)
parsed = _extract_title_and_content(content or "", file_title)
return {
"source": str(path),
"type": "mineru",
**parsed,
"raw_result": result_json,
}
raise ValueError(f"不支持的文件类型: {path.name}")

View File

@@ -0,0 +1,102 @@
# planning_agent/langchain_pipeline.py
import os
import sys
import json
from typing import Optional
from langchain_openai import ChatOpenAI
from planner.planning_agent.rag_pipeline import LocalKnowledgeBase
def generate_natural_plan(requirement_text: str, top_k: int = 4,) -> str:
"""
使用 Qwen3-32B 结合本地知识库RAG生成标准化、可执行的测试任务规划。
requirement_text: 待生成规划的需求文本(通常来自 MinerU 合并文本)。
"""
api_key = os.getenv("QWEN_API_KEY")
AI_BASE_URL = os.getenv("AI_BASE_URL")
AI_MODEL = os.getenv("AI_MODEL")
AI_API_KEY = os.getenv("AI_API_KEY")
if not api_key:
raise ValueError("请先在环境变量中设置 QWEN_API_KEY")
kb = LocalKnowledgeBase()
try:
context = kb.retrieve_context(requirement_text, top_k=top_k)
except FileNotFoundError:
# no knowledge files, set empty context
context = ""
# instantiate llm (OpenAI-compatible wrapper used for Qwen via dashscope)
# 关闭流式,直接获取完整响应
# llm = ChatOpenAI(
# model="Qwen/Qwen3-32B",
# openai_api_base="https://api.siliconflow.cn/v1",
# openai_api_key=api_key.strip(),
# streaming=False
# )
llm = ChatOpenAI(
model=AI_MODEL,
openai_api_base=AI_BASE_URL,
openai_api_key=AI_API_KEY.strip(),
streaming=False
)
# system prompt: inject retrieved context and strict output template
system_prompt = (
"Role / 角色设定"
"你是一位深耕实验室自动化测试领域、精通各种电子测量仪器(频谱仪、信号源、网络分析仪等)的测试任务规划专家。你的核心能力是将用户不完整、口语化的测试需求,转化为逻辑严密、参数完整、可供自动化系统直接解析的分步执行计划。"
"Task Description / 任务描述"
" 用户将提供一段包含测试器件DUT、仪器清单和粗略测试项的文本。你必须"
"1. 需求解构:识别核心测试项、测试频点、仪器参数及判定准则。"
"2. 内容补全:基于专业知识及接入的历史规划知识库,对模糊描述进行标准化(例如:补全上电时的 OCP/OVP 保护逻辑,细化开关矩阵的切换动作)。"
"3. 分步规划:将测试项拆解为“步骤级”动作。每一个步骤必须包含:设置参数、执行动作、读取数据。"
"Constraints / 约束条件"
" 严禁无关内容:输出必须紧扣测试步骤,不要凭空编造参数或操作,不要输出开场白或解释性废话。"
" 步骤独立性:每个步骤必须是一个闭环的动作,每个步骤都详细说明参数动作都是什么。"
" 硬件逻辑必须考虑开关矩阵Switch Matrix的切换逻辑确保信号路径在每个频段下是正确的。"
"Output Format / 输出模板"
"请始终按照以下结构进行输出:"
"[任务概览]"
" 测试器件:{DUT 描述}"
" 仪器环境:{仪器列表及连接关系概述}"
"[详细测试规程]"
"步骤 X{步骤名称}"
"目的:简述本步骤的目标。"
"仪器配置:"
" {仪器 A}{参数 1}={值}, {参数 2}={值}..."
" {开关矩阵}:切换至 {路径 X}"
" 执行动作:"
"1. {动作 1}"
"2. {动作 2}"
" 数据采集/处理:"
" 读取项:{例如 Max Peak 功率}"
" 计算逻辑:{如有,写出公式,不要写计算结果}"
" 判定标准:{如有,写出阈值及结果处理}"
"Execution Logic / 执行逻辑参考(知识库补全指导)"
"1. 上电项:必须包含电源初始化、设置 OCP过流保护/OVP过压保护、逐步升压、监测静态电流。"
"2. 射频测试项:必须先执行路径校准/路径切换,再设置信号源频率/功率最后设置频谱仪Span, RBW, VBW, Detector"
"【参考知识库片段】可以若有相同的测试方法可以作为模板参考参,若需求给的参数不全可以参考知识库对对应流程的不全参数进行补充,若需求明确提出,则使用需求中提到的参数。\n"
f"{context}\n\n"
)
# ---------------------------
# 直接生成完整内容(非流式)
# ---------------------------
messages = [
("system", system_prompt),
("user", requirement_text)
]
try:
# 直接调用 invoke 获取完整响应
response = llm.invoke(messages)
result = response.content.strip() if response.content else ""
return result
except Exception as e:
print(f"[ERROR] 生成过程发生错误: {e}", flush=True)
return ""

View File

@@ -0,0 +1,72 @@
# planning_agent/main.py
import argparse
import sys
import os
from pathlib import Path
# 注意这里使用了相对导入必须以模块方式运行python -m planner.planning_agent.main
from .planner import build_plan_from_text
from planner.mineru_client import extract_texts_with_mineru
def main():
# 尝试强制设置标准输出编码为 utf-8防止部分环境打印异常
if sys.stdout and hasattr(sys.stdout, 'reconfigure'):
try:
sys.stdout.reconfigure(encoding='utf-8')
except Exception:
pass
parser = argparse.ArgumentParser(description="Testing Task Planning Agent (Natural Language Mode)")
parser.add_argument("--input", required=True, help="输入文件或文件夹路径")
parser.add_argument("--output", required=True, help="输出目录路径")
# 新增手动参数接收
parser.add_argument("--manual_args", required=False, default="{}", help="手动填写的参数JSON字符串")
args = parser.parse_args()
input_path = Path(args.input)
output_dir = Path(args.output)
output_dir.mkdir(parents=True, exist_ok=True)
manual_args_str = args.manual_args
# 获取 planner 目录 (当前脚本的父目录的父目录)
# planning_agent/main.py -> planning_agent -> planner
current_file = Path(__file__).resolve()
planner_dir = current_file.parent.parent
mineru_save_dir = planner_dir / "mineru_result"
# 定义需要 MinerU 解析的文档类型
doc_extensions = ['.pdf', '.docx', '.doc', '.xlsx', '.xls', '.pptx']
# 判断逻辑:如果是目录,或者后缀在支持列表中,则使用 MinerU 解析
if input_path.is_dir() or (input_path.is_file() and input_path.suffix.lower() in doc_extensions):
print(f"[INFO] 检测到文档/文件夹输入,将使用 MinerU API 解析文件...")
try:
# 传入 save_dir 到 planner/mineru_result
merged_text_path = extract_texts_with_mineru(str(input_path), save_dir=str(mineru_save_dir))
with open(merged_text_path, "r", encoding="utf-8") as f:
requirement_text = f.read()
source_doc = merged_text_path
except Exception as e:
print(f"[ERROR] 文档解析失败: {e}")
return
else:
# 否则默认当作普通 UTF-8 文本文件读取 (如 .txt, .md)
print(f"[INFO] 检测到文本文件输入,直接读取...")
try:
with open(input_path, "r", encoding="utf-8") as f:
requirement_text = f.read()
source_doc = str(input_path)
except UnicodeDecodeError:
print(f"[ERROR] 文件读取编码错误: {input_path} 不是有效的 UTF-8 文本文件。如果是文档(如docx),请确保 MinerU 流程被正确触发。")
return
except Exception as e:
print(f"[ERROR] 读取文件失败: {e}")
return
# 将 manual_args_str 传递给 planner
output_path = build_plan_from_text(requirement_text, source_doc, str(output_dir), manual_args_str)
print(f"[SUCCESS] 测试任务规划生成完成: {output_path}")
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,43 @@
# planning_agent/planner.py
from pathlib import Path
import datetime
import textwrap
from typing import Optional
from .langchain_pipeline import generate_natural_plan
def build_plan_from_text(requirement_text: str, source_doc: str = "用户输入", output_dir: str = "") -> str:
"""
根据输入文本生成自然语言测试规划。
直接返回生成的 Markdown 内容(不保存文件)。
"""
# 调用 LLM 生成规划
plan_text = generate_natural_plan(requirement_text)
if not plan_text:
return ""
timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
# Markdown 包装模板
md_template = f"""
# 📘 自动化测试任务规划
**来源文档:** `{source_doc}`
**生成时间:** {timestamp}
---
## 📝 测试规划内容
{plan_text}
---
## 📌 说明
本规划由智能体自动生成,内容遵循自然语言任务规划格式,可直接用于后续自动化测试代码生成。
"""
md_content = textwrap.dedent(md_template).strip()
return md_content

View File

@@ -0,0 +1,130 @@
import os
import time
import json
from pathlib import Path
from typing import Optional
from langchain_core.documents import Document
from langchain_community.vectorstores import FAISS
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from planner.mineru_client import extract_texts_with_mineru
class LocalKnowledgeBase:
def __init__(
self,
knowledge_dir: str = "planner\knowledge",
db_path: str = r"planner\vector_db",
mineru_save_dir: str = "planner\mineru_result",
):
"""
初始化本地知识库。
:param knowledge_dir: 知识文件目录
:param db_path: 向量数据库存储路径
:param mineru_save_dir: MinerU 解析输出目录
"""
self.knowledge_dir = Path(knowledge_dir)
self.db_path = Path(db_path)
self.mineru_save_dir = Path(mineru_save_dir)
self.db_path.mkdir(parents=True, exist_ok=True)
self.mineru_save_dir.mkdir(parents=True, exist_ok=True)
# 向量嵌入模型(轻量中文支持)
self.embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
self.vectorstore: Optional[FAISS] = None
def _fingerprint_knowledge_dir(self) -> str:
"""
对知识文件目录计算指纹(文件名 + 大小 + 修改时间)
用于判断是否需要重新解析 MinerU 或重建索引。
"""
files = sorted(self.knowledge_dir.rglob("*"))
fingerprint_parts = []
for f in files:
if f.is_file():
stat = f.stat()
fingerprint_parts.append(f"{f.name}:{stat.st_size}:{stat.st_mtime_ns}")
return str(hash("|".join(fingerprint_parts)))
def _meta_path(self):
return self.db_path / "meta.json"
def build_or_load_db(self, force_rebuild: bool = False) -> FAISS:
"""
解析知识库并构建向量数据库。
若已存在向量数据库且知识文件未变,则直接加载。
"""
index_file = self.db_path / "index.faiss"
meta_path = self._meta_path()
# 判断是否需要重建
fingerprint = self._fingerprint_knowledge_dir()
if (
not force_rebuild
and index_file.exists()
and meta_path.exists()
):
try:
meta = json.loads(meta_path.read_text(encoding="utf-8"))
if meta.get("fingerprint") == fingerprint:
print("[INFO] 检测到知识库未变,直接加载向量数据库...")
self.vectorstore = FAISS.load_local(
str(self.db_path),
self.embedding_model,
allow_dangerous_deserialization=True,
)
print("[INFO] 向量数据库加载完成。")
return self.vectorstore
except Exception as e:
print(f"[INFO] 读取 meta.json 出错,将重新构建:{e}")
print("[INFO] 正在调用 MinerU API 解析知识库文件...")
parsed_text_path = extract_texts_with_mineru(str(self.knowledge_dir), save_dir=str(self.mineru_save_dir))
if not parsed_text_path or not Path(parsed_text_path).exists():
raise FileNotFoundError("[INFO] MinerU 解析失败,未生成知识文本。")
print(f"[INFO] MinerU 输出: {parsed_text_path}")
all_text = Path(parsed_text_path).read_text(encoding="utf-8", errors="ignore")
print("[INFO] 正在切分知识文本...")
splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=150)
chunks = splitter.split_text(all_text)
print(f"[INFO] 已切分为 {len(chunks)} 段。")
print("[INFO] 正在生成嵌入向量并构建数据库...")
docs = [Document(
page_content=c,
metadata={"source": "mineru_all_knowledge"}
) for c in chunks]
self.vectorstore = FAISS.from_documents(docs, self.embedding_model)
self.vectorstore.save_local(str(self.db_path))
# 保存 meta 信息
meta = {
"fingerprint": fingerprint,
"built_at": time.strftime("%Y-%m-%d %H:%M:%S"),
"num_chunks": len(chunks),
"mineru_text": str(parsed_text_path),
}
meta_path.write_text(json.dumps(meta, ensure_ascii=False, indent=2), encoding="utf-8")
print(f"[INFO] 知识库向量数据库构建完成,共 {len(chunks)} 段。")
return self.vectorstore
def retrieve_context(self, query: str, top_k: int = 4) -> str:
"""
从知识库中检索与 query 最相关的上下文。
"""
if self.vectorstore is None:
self.build_or_load_db()
docs = self.vectorstore.similarity_search(query, k=top_k)
context = "\n\n".join([d.page_content for d in docs])
print(f"[INFO] 已召回 {len(docs)} 条相关知识。")
return context

View File

@@ -0,0 +1,14 @@
from pathlib import Path
import json
def read_text(path: str) -> str:
p = Path(path)
if not p.exists():
raise FileNotFoundError(path)
return p.read_text(encoding="utf-8")
def write_json(path: str, data: dict):
p = Path(path)
p.parent.mkdir(parents=True, exist_ok=True)
with p.open("w", encoding="utf-8") as f:
json.dump(data, f, ensure_ascii=False, indent=2)