# Author: XGJ2024 # Date: 2025/8/14 10:32 # !/usr/bin/env python3 # batch_evaluator.py import os import subprocess import argparse import time import logging from pathlib import Path # 配置日志 logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', handlers=[ logging.FileHandler("batch_evaluator.log"), logging.StreamHandler() ] ) logger = logging.getLogger("batch_evaluator") # 固定参数配置 DEFAULT_CONFIG = { "allConfigPath": r"D:\Kevin\zhaoyuan\code\zhaoyuan-master_v2.0\zhaoyuan_new\zhaoyuan\config\LST_1_all_metrics_config.yaml", "baseConfigPath": r"D:\Kevin\zhaoyuan\code\zhaoyuan-master_v2.0\zhaoyuan_new\zhaoyuan\config\LST_1_builtin_metrics_config.yaml", "customConfigPath": r"D:\Kevin\zhaoyuan\code\zhaoyuan-master_v2.0\zhaoyuan_new\zhaoyuan\config\LST_1_custom_metrics_config.yaml", "logDir": r"D:\Kevin\zhaoyuan\code\zhaoyuan-master_v2.0\zhaoyuan_new\zhaoyuan\logs", "reportPath": r"D:\Kevin\zhaoyuan\code\zhaoyuan-master_v2.0\zhaoyuan_new\zhaoyuan\reports", "plotPath": r"D:\Kevin\zhaoyuan\code\zhaoyuan-master_v2.0\zhaoyuan_new\zhaoyuan\plots", "customMetricsPath": r"D:\Kevin\zhaoyuan\code\zhaoyuan-master_v2.0\zhaoyuan_new\zhaoyuan\custom_metrics" } def run_evaluation(data_path: Path): """执行单个测试用例的评估""" try: # 构建命令行参数 cmd = [ "python", r"D:\Kevin\zhaoyuan\code\zhaoyuan-master_v2.0\zhaoyuan_new\zhaoyuan\scripts\evaluator_enhanced.py", "--dataPath", str(data_path), "--allConfigPath", DEFAULT_CONFIG["allConfigPath"], "--baseConfigPath", DEFAULT_CONFIG["baseConfigPath"], "--customConfigPath", DEFAULT_CONFIG["customConfigPath"], "--logDir", DEFAULT_CONFIG["logDir"], "--reportPath", DEFAULT_CONFIG["reportPath"], "--plotPath", DEFAULT_CONFIG["plotPath"], "--customMetricsPath", DEFAULT_CONFIG["customMetricsPath"] ] logger.info(f"开始评估用例: {data_path.name}") logger.debug(f"执行命令: {' '.join(cmd)}") # 执行评估 start_time = time.time() # 修改这里:使用 errors='replace' 或 'ignore' 来处理编码错误,或使用 'gbk' 等其他编码 result = subprocess.run(cmd, check=True, capture_output=True, text=True) elapsed_time = time.time() - start_time # 处理结果 logger.info(f"用例评估完成: {data_path.name} (耗时: {elapsed_time:.1f}秒)") logger.debug(f"评估输出:\n{result.stdout}") return True except subprocess.CalledProcessError as e: logger.error(f"评估失败: {data_path.name}") logger.error(f"错误代码: {e.returncode}") logger.error(f"错误输出:\n{e.stderr}") return False except Exception as e: logger.error(f"执行异常: {str(e)}") return False def find_test_cases(parent_dir: Path): """在父目录中查找所有测试用例目录""" test_cases = [] # 确保父目录存在 if not parent_dir.exists(): logger.error(f"父目录不存在: {parent_dir}") return test_cases # 扫描所有直接子目录 for item in parent_dir.iterdir(): if item.is_dir(): # 检查是否包含必要的评估数据 # 这里可以添加更复杂的检查逻辑 if any(item.glob("*")): # 简单检查目录是否非空 test_cases.append(item) else: logger.warning(f"跳过空目录: {item.name}") # 按名称排序 test_cases.sort(key=lambda x: x.name) return test_cases def main(): """主函数""" parser = argparse.ArgumentParser(description="批量评估系统 - 处理父目录下的所有测试用例") # parser.add_argument( # "parentDir", # default=r"D:\Kevin\zhaoyuan\data_new\LST1\二轮评测数据\单车", # type=str, # help="包含多个测试用例目录的父目录路径" # ) parser.add_argument( "--parentDir", # 添加 -- 前缀改为可选参数 default=r"D:\Kevin\zhaoyuan\data_new\LST1\二轮评测数据\单车", type=str, required=False, # 明确标记为非必填 help="包含多个测试用例目录的父目录路径" ) parser.add_argument( "--verbose", "-v", action="store_true", help="显示详细输出" ) args = parser.parse_args() # 设置详细日志 if args.verbose: logger.setLevel(logging.DEBUG) # 确保输出目录存在 Path(DEFAULT_CONFIG["logDir"]).mkdir(parents=True, exist_ok=True) Path(DEFAULT_CONFIG["reportPath"]).mkdir(parents=True, exist_ok=True) Path(DEFAULT_CONFIG["plotPath"]).mkdir(parents=True, exist_ok=True) # 获取父目录路径 parent_dir = Path(args.parentDir).resolve() logger.info("=" * 80) logger.info("开始批量评估") logger.info(f"父目录: {parent_dir}") logger.info(f"日志目录: {DEFAULT_CONFIG['logDir']}") logger.info(f"报告目录: {DEFAULT_CONFIG['reportPath']}") logger.info(f"图表目录: {DEFAULT_CONFIG['plotPath']}") logger.info("=" * 80) # 获取所有测试用例 test_cases = find_test_cases(parent_dir) if not test_cases: logger.error(f"在 {parent_dir} 中未找到有效的测试用例目录") return logger.info(f"找到 {len(test_cases)} 个测试用例:") for i, case in enumerate(test_cases, 1): logger.info(f"{i}. {case.name}") # 执行评估 total_count = len(test_cases) success_count = 0 failed_count = 0 start_time = time.time() for i, test_case in enumerate(test_cases, 1): logger.info(f"\n[处理 {i}/{total_count}] {test_case.name}") if run_evaluation(test_case): success_count += 1 else: failed_count += 1 # 统计结果 elapsed_time = time.time() - start_time logger.info("\n" + "=" * 80) logger.info(f"批量评估完成") logger.info(f"总用例数: {total_count}") logger.info(f"成功: {success_count}") logger.info(f"失败: {failed_count}") logger.info(f"总耗时: {elapsed_time:.1f} 秒") logger.info(f"平均每用例: {elapsed_time / total_count if total_count > 0 else 0:.1f} 秒") logger.info("=" * 80) if __name__ == "__main__": main()