123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181 |
- # Author: XGJ2024
- # Date: 2025/8/14 10:32
- # !/usr/bin/env python3
- # batch_evaluator.py
- import os
- import subprocess
- import argparse
- import time
- import logging
- from pathlib import Path
- # 配置日志
- logging.basicConfig(
- level=logging.INFO,
- format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
- handlers=[
- logging.FileHandler("batch_evaluator.log"),
- logging.StreamHandler()
- ]
- )
- logger = logging.getLogger("batch_evaluator")
- # 固定参数配置
- DEFAULT_CONFIG = {
- "allConfigPath": r"D:\Kevin\zhaoyuan\code\zhaoyuan-master_v2.0\zhaoyuan_new\zhaoyuan\config\LST_1_all_metrics_config.yaml",
- "baseConfigPath": r"D:\Kevin\zhaoyuan\code\zhaoyuan-master_v2.0\zhaoyuan_new\zhaoyuan\config\LST_1_builtin_metrics_config.yaml",
- "customConfigPath": r"D:\Kevin\zhaoyuan\code\zhaoyuan-master_v2.0\zhaoyuan_new\zhaoyuan\config\LST_1_custom_metrics_config.yaml",
- "logDir": r"D:\Kevin\zhaoyuan\code\zhaoyuan-master_v2.0\zhaoyuan_new\zhaoyuan\logs",
- "reportPath": r"D:\Kevin\zhaoyuan\code\zhaoyuan-master_v2.0\zhaoyuan_new\zhaoyuan\reports",
- "plotPath": r"D:\Kevin\zhaoyuan\code\zhaoyuan-master_v2.0\zhaoyuan_new\zhaoyuan\plots",
- "customMetricsPath": r"D:\Kevin\zhaoyuan\code\zhaoyuan-master_v2.0\zhaoyuan_new\zhaoyuan\custom_metrics"
- }
- def run_evaluation(data_path: Path):
- """执行单个测试用例的评估"""
- try:
- # 构建命令行参数
- cmd = [
- "python",
- r"D:\Kevin\zhaoyuan\code\zhaoyuan-master_v2.0\zhaoyuan_new\zhaoyuan\scripts\evaluator_enhanced.py",
- "--dataPath", str(data_path),
- "--allConfigPath", DEFAULT_CONFIG["allConfigPath"],
- "--baseConfigPath", DEFAULT_CONFIG["baseConfigPath"],
- "--customConfigPath", DEFAULT_CONFIG["customConfigPath"],
- "--logDir", DEFAULT_CONFIG["logDir"],
- "--reportPath", DEFAULT_CONFIG["reportPath"],
- "--plotPath", DEFAULT_CONFIG["plotPath"],
- "--customMetricsPath", DEFAULT_CONFIG["customMetricsPath"]
- ]
- logger.info(f"开始评估用例: {data_path.name}")
- logger.debug(f"执行命令: {' '.join(cmd)}")
- # 执行评估
- start_time = time.time()
- # 修改这里:使用 errors='replace' 或 'ignore' 来处理编码错误,或使用 'gbk' 等其他编码
- result = subprocess.run(cmd, check=True, capture_output=True, text=True)
- elapsed_time = time.time() - start_time
- # 处理结果
- logger.info(f"用例评估完成: {data_path.name} (耗时: {elapsed_time:.1f}秒)")
- logger.debug(f"评估输出:\n{result.stdout}")
- return True
- except subprocess.CalledProcessError as e:
- logger.error(f"评估失败: {data_path.name}")
- logger.error(f"错误代码: {e.returncode}")
- logger.error(f"错误输出:\n{e.stderr}")
- return False
- except Exception as e:
- logger.error(f"执行异常: {str(e)}")
- return False
- def find_test_cases(parent_dir: Path):
- """在父目录中查找所有测试用例目录"""
- test_cases = []
- # 确保父目录存在
- if not parent_dir.exists():
- logger.error(f"父目录不存在: {parent_dir}")
- return test_cases
- # 扫描所有直接子目录
- for item in parent_dir.iterdir():
- if item.is_dir():
- # 检查是否包含必要的评估数据
- # 这里可以添加更复杂的检查逻辑
- if any(item.glob("*")): # 简单检查目录是否非空
- test_cases.append(item)
- else:
- logger.warning(f"跳过空目录: {item.name}")
- # 按名称排序
- test_cases.sort(key=lambda x: x.name)
- return test_cases
- def main():
- """主函数"""
- parser = argparse.ArgumentParser(description="批量评估系统 - 处理父目录下的所有测试用例")
- # parser.add_argument(
- # "parentDir",
- # default=r"D:\Kevin\zhaoyuan\data_new\LST1\二轮评测数据\单车",
- # type=str,
- # help="包含多个测试用例目录的父目录路径"
- # )
- parser.add_argument(
- "--parentDir", # 添加 -- 前缀改为可选参数
- default=r"D:\Kevin\zhaoyuan\data_new\LST1\二轮评测数据\单车",
- type=str,
- required=False, # 明确标记为非必填
- help="包含多个测试用例目录的父目录路径"
- )
- parser.add_argument(
- "--verbose", "-v",
- action="store_true",
- help="显示详细输出"
- )
- args = parser.parse_args()
- # 设置详细日志
- if args.verbose:
- logger.setLevel(logging.DEBUG)
- # 确保输出目录存在
- Path(DEFAULT_CONFIG["logDir"]).mkdir(parents=True, exist_ok=True)
- Path(DEFAULT_CONFIG["reportPath"]).mkdir(parents=True, exist_ok=True)
- Path(DEFAULT_CONFIG["plotPath"]).mkdir(parents=True, exist_ok=True)
- # 获取父目录路径
- parent_dir = Path(args.parentDir).resolve()
- logger.info("=" * 80)
- logger.info("开始批量评估")
- logger.info(f"父目录: {parent_dir}")
- logger.info(f"日志目录: {DEFAULT_CONFIG['logDir']}")
- logger.info(f"报告目录: {DEFAULT_CONFIG['reportPath']}")
- logger.info(f"图表目录: {DEFAULT_CONFIG['plotPath']}")
- logger.info("=" * 80)
- # 获取所有测试用例
- test_cases = find_test_cases(parent_dir)
- if not test_cases:
- logger.error(f"在 {parent_dir} 中未找到有效的测试用例目录")
- return
- logger.info(f"找到 {len(test_cases)} 个测试用例:")
- for i, case in enumerate(test_cases, 1):
- logger.info(f"{i}. {case.name}")
- # 执行评估
- total_count = len(test_cases)
- success_count = 0
- failed_count = 0
- start_time = time.time()
- for i, test_case in enumerate(test_cases, 1):
- logger.info(f"\n[处理 {i}/{total_count}] {test_case.name}")
- if run_evaluation(test_case):
- success_count += 1
- else:
- failed_count += 1
- # 统计结果
- elapsed_time = time.time() - start_time
- logger.info("\n" + "=" * 80)
- logger.info(f"批量评估完成")
- logger.info(f"总用例数: {total_count}")
- logger.info(f"成功: {success_count}")
- logger.info(f"失败: {failed_count}")
- logger.info(f"总耗时: {elapsed_time:.1f} 秒")
- logger.info(f"平均每用例: {elapsed_time / total_count if total_count > 0 else 0:.1f} 秒")
- logger.info("=" * 80)
- if __name__ == "__main__":
- main()
|