evaluator_enhanced.py 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680
  1. #!/usr/bin/env python3
  2. # evaluator_enhanced.py
  3. import sys
  4. import warnings
  5. import time
  6. import importlib
  7. import importlib.util
  8. import yaml
  9. from pathlib import Path
  10. import argparse
  11. from concurrent.futures import ThreadPoolExecutor
  12. from functools import lru_cache
  13. from typing import Dict, Any, List, Optional, Type, Tuple, Callable, Union
  14. from datetime import datetime
  15. import logging
  16. import traceback
  17. import json
  18. import inspect
  19. # 常量定义
  20. DEFAULT_WORKERS = 4
  21. CUSTOM_METRIC_PREFIX = "metric_"
  22. CUSTOM_METRIC_FILE_PATTERN = "*.py"
  23. # 安全设置根目录路径
  24. if hasattr(sys, "_MEIPASS"):
  25. _ROOT_PATH = Path(sys._MEIPASS)
  26. else:
  27. _ROOT_PATH = Path(__file__).resolve().parent.parent
  28. sys.path.insert(0, str(_ROOT_PATH))
  29. class ConfigManager:
  30. """配置管理组件"""
  31. def __init__(self, logger: logging.Logger):
  32. self.logger = logger
  33. self.base_config: Dict[str, Any] = {}
  34. self.custom_config: Dict[str, Any] = {}
  35. self.merged_config: Dict[str, Any] = {}
  36. def split_configs(self, all_metrics_path: Path, builtin_metrics_path: Path, custom_metrics_path: Path) -> None:
  37. """从all_metrics_config.yaml拆分成内置和自定义配置"""
  38. try:
  39. with open(all_metrics_path, 'r', encoding='utf-8') as f:
  40. all_metrics_dict = yaml.safe_load(f) or {}
  41. with open(builtin_metrics_path, 'r', encoding='utf-8') as f:
  42. builtin_metrics_dict = yaml.safe_load(f) or {}
  43. custom_metrics_dict = self._find_custom_metrics(all_metrics_dict, builtin_metrics_dict)
  44. if custom_metrics_dict:
  45. with open(custom_metrics_path, 'w', encoding='utf-8') as f:
  46. yaml.dump(custom_metrics_dict, f, allow_unicode=True, sort_keys=False, indent=2)
  47. self.logger.info(f"Split configs: custom metrics saved to {custom_metrics_path}")
  48. except Exception as err:
  49. self.logger.error(f"Failed to split configs: {str(err)}")
  50. raise
  51. def _find_custom_metrics(self, all_metrics, builtin_metrics, current_path=""):
  52. """递归比较找出自定义指标"""
  53. custom_metrics = {}
  54. if isinstance(all_metrics, dict) and isinstance(builtin_metrics, dict):
  55. for key in all_metrics:
  56. if key not in builtin_metrics:
  57. custom_metrics[key] = all_metrics[key]
  58. else:
  59. child_custom = self._find_custom_metrics(
  60. all_metrics[key],
  61. builtin_metrics[key],
  62. f"{current_path}.{key}" if current_path else key
  63. )
  64. if child_custom:
  65. custom_metrics[key] = child_custom
  66. elif all_metrics != builtin_metrics:
  67. return all_metrics
  68. if custom_metrics:
  69. return self._ensure_structure(custom_metrics, all_metrics, current_path)
  70. return None
  71. def _ensure_structure(self, metrics_dict, full_dict, path):
  72. """确保每级包含name和priority"""
  73. if not isinstance(metrics_dict, dict):
  74. return metrics_dict
  75. current = full_dict
  76. for key in path.split('.'):
  77. if key in current:
  78. current = current[key]
  79. else:
  80. break
  81. result = {}
  82. if isinstance(current, dict):
  83. if 'name' in current:
  84. result['name'] = current['name']
  85. if 'priority' in current:
  86. result['priority'] = current['priority']
  87. for key, value in metrics_dict.items():
  88. if key not in ['name', 'priority']:
  89. result[key] = self._ensure_structure(value, full_dict, f"{path}.{key}" if path else key)
  90. return result
  91. def load_configs(self, all_config_path: Optional[Path], builtin_metrics_path: Optional[Path], custom_metrics_path: Optional[Path]) -> Dict[str, Any]:
  92. """加载并合并配置"""
  93. # 自动拆分配置
  94. if all_config_path.exists():
  95. self.split_configs(all_config_path, builtin_metrics_path, custom_metrics_path)
  96. self.base_config = self._safe_load_config(builtin_metrics_path) if builtin_metrics_path else {}
  97. self.custom_config = self._safe_load_config(custom_metrics_path) if custom_metrics_path else {}
  98. self.merged_config = self._merge_configs(self.base_config, self.custom_config)
  99. return self.merged_config
  100. def _safe_load_config(self, config_path: Path) -> Dict[str, Any]:
  101. """安全加载YAML配置"""
  102. try:
  103. if not config_path.exists():
  104. self.logger.warning(f"Config file not found: {config_path}")
  105. return {}
  106. with config_path.open('r', encoding='utf-8') as f:
  107. config_dict = yaml.safe_load(f) or {}
  108. self.logger.info(f"Loaded config: {config_path}")
  109. return config_dict
  110. except Exception as err:
  111. self.logger.error(f"Failed to load config {config_path}: {str(err)}")
  112. return {}
  113. def _merge_configs(self, builtin_config: Dict, custom_config: Dict) -> Dict:
  114. """智能合并配置"""
  115. merged_config = builtin_config.copy()
  116. for level1_key, level1_value in custom_config.items():
  117. if not isinstance(level1_value, dict) or 'name' not in level1_value:
  118. if level1_key not in merged_config:
  119. merged_config[level1_key] = level1_value
  120. continue
  121. if level1_key not in merged_config:
  122. merged_config[level1_key] = level1_value
  123. else:
  124. for level2_key, level2_value in level1_value.items():
  125. if level2_key in ['name', 'priority']:
  126. continue
  127. if isinstance(level2_value, dict):
  128. if level2_key not in merged_config[level1_key]:
  129. merged_config[level1_key][level2_key] = level2_value
  130. else:
  131. for level3_key, level3_value in level2_value.items():
  132. if level3_key in ['name', 'priority']:
  133. continue
  134. if isinstance(level3_value, dict):
  135. if level3_key not in merged_config[level1_key][level2_key]:
  136. merged_config[level1_key][level2_key][level3_key] = level3_value
  137. return merged_config
  138. def get_config(self) -> Dict[str, Any]:
  139. return self.merged_config
  140. def get_base_config(self) -> Dict[str, Any]:
  141. return self.base_config
  142. def get_custom_config(self) -> Dict[str, Any]:
  143. return self.custom_config
  144. class MetricLoader:
  145. """指标加载器组件"""
  146. def __init__(self, logger: logging.Logger, config_manager: ConfigManager):
  147. self.logger = logger
  148. self.config_manager = config_manager
  149. self.metric_modules: Dict[str, Type] = {}
  150. self.custom_metric_modules: Dict[str, Any] = {}
  151. def load_builtin_metrics(self) -> Dict[str, Type]:
  152. """加载内置指标模块"""
  153. module_mapping = {
  154. "safety": ("modules.metric.safety", "SafeManager"),
  155. "comfort": ("modules.metric.comfort", "ComfortManager"),
  156. "traffic": ("modules.metric.traffic", "TrafficManager"),
  157. "efficient": ("modules.metric.efficient", "EfficientManager"),
  158. "function": ("modules.metric.function", "FunctionManager"),
  159. }
  160. self.metric_modules = {
  161. name: self._load_module(*info)
  162. for name, info in module_mapping.items()
  163. }
  164. self.logger.info(f"Loaded builtin metrics: {', '.join(self.metric_modules.keys())}")
  165. return self.metric_modules
  166. @lru_cache(maxsize=32)
  167. def _load_module(self, module_path: str, class_name: str) -> Type:
  168. """动态加载Python模块"""
  169. try:
  170. module = __import__(module_path, fromlist=[class_name])
  171. return getattr(module, class_name)
  172. except (ImportError, AttributeError) as e:
  173. self.logger.error(f"Failed to load module: {module_path}.{class_name} - {str(e)}")
  174. raise
  175. def load_custom_metrics(self, custom_metrics_path: Optional[Path]) -> Dict[str, Any]:
  176. """加载自定义指标模块"""
  177. if not custom_metrics_path or not custom_metrics_path.is_dir():
  178. self.logger.info("No custom metrics path or path not exists")
  179. return {}
  180. loaded_count = 0
  181. for py_file in custom_metrics_path.glob(CUSTOM_METRIC_FILE_PATTERN):
  182. if py_file.name.startswith(CUSTOM_METRIC_PREFIX):
  183. if self._process_custom_metric_file(py_file):
  184. loaded_count += 1
  185. self.logger.info(f"Loaded {loaded_count} custom metric modules")
  186. return self.custom_metric_modules
  187. def _process_custom_metric_file(self, file_path: Path) -> bool:
  188. """处理单个自定义指标文件"""
  189. try:
  190. metric_key = self._validate_metric_file(file_path)
  191. module_name = f"custom_metric_{file_path.stem}"
  192. spec = importlib.util.spec_from_file_location(module_name, file_path)
  193. module = importlib.util.module_from_spec(spec)
  194. spec.loader.exec_module(module)
  195. from modules.lib.metric_registry import BaseMetric
  196. metric_class = None
  197. for name, obj in inspect.getmembers(module):
  198. if (inspect.isclass(obj) and
  199. issubclass(obj, BaseMetric) and
  200. obj != BaseMetric):
  201. metric_class = obj
  202. break
  203. if metric_class:
  204. self.custom_metric_modules[metric_key] = {
  205. 'type': 'class',
  206. 'module': module,
  207. 'class': metric_class
  208. }
  209. self.logger.info(f"Loaded class-based custom metric: {metric_key}")
  210. elif hasattr(module, 'evaluate'):
  211. self.custom_metric_modules[metric_key] = {
  212. 'type': 'function',
  213. 'module': module
  214. }
  215. self.logger.info(f"Loaded function-based custom metric: {metric_key}")
  216. else:
  217. raise AttributeError(f"Missing evaluate() function or BaseMetric subclass: {file_path.name}")
  218. return True
  219. except ValueError as e:
  220. self.logger.warning(str(e))
  221. return False
  222. except Exception as e:
  223. self.logger.error(f"Failed to load custom metric {file_path}: {str(e)}")
  224. return False
  225. def _validate_metric_file(self, file_path: Path) -> str:
  226. """验证自定义指标文件命名规范"""
  227. stem = file_path.stem[len(CUSTOM_METRIC_PREFIX):]
  228. parts = stem.split('_')
  229. if len(parts) < 3:
  230. raise ValueError(f"Invalid custom metric filename: {file_path.name}, should be metric_<level1>_<level2>_<level3>.py")
  231. level1, level2, level3 = parts[:3]
  232. if not self._is_metric_configured(level1, level2, level3):
  233. raise ValueError(f"Unconfigured metric: {level1}.{level2}.{level3}")
  234. return f"{level1}.{level2}.{level3}"
  235. def _is_metric_configured(self, level1: str, level2: str, level3: str) -> bool:
  236. """检查指标是否在配置中注册"""
  237. custom_config = self.config_manager.get_custom_config()
  238. try:
  239. return (level1 in custom_config and
  240. isinstance(custom_config[level1], dict) and
  241. level2 in custom_config[level1] and
  242. isinstance(custom_config[level1][level2], dict) and
  243. level3 in custom_config[level1][level2] and
  244. isinstance(custom_config[level1][level2][level3], dict))
  245. except Exception:
  246. return False
  247. def get_builtin_metrics(self) -> Dict[str, Type]:
  248. return self.metric_modules
  249. def get_custom_metrics(self) -> Dict[str, Any]:
  250. return self.custom_metric_modules
  251. class EvaluationEngine:
  252. """评估引擎组件"""
  253. def __init__(self, logger: logging.Logger, config_manager: ConfigManager, metric_loader: MetricLoader):
  254. self.logger = logger
  255. self.config_manager = config_manager
  256. self.metric_loader = metric_loader
  257. def evaluate(self, data: Any) -> Dict[str, Any]:
  258. """执行评估流程"""
  259. raw_results = self._collect_builtin_metrics(data)
  260. custom_results = self._collect_custom_metrics(data)
  261. return self._process_merged_results(raw_results, custom_results)
  262. def _collect_builtin_metrics(self, data: Any) -> Dict[str, Any]:
  263. """收集内置指标结果"""
  264. metric_modules = self.metric_loader.get_builtin_metrics()
  265. raw_results: Dict[str, Any] = {}
  266. with ThreadPoolExecutor(max_workers=len(metric_modules)) as executor:
  267. futures = {
  268. executor.submit(self._run_module, module, data, module_name): module_name
  269. for module_name, module in metric_modules.items()
  270. }
  271. for future in futures:
  272. module_name = futures[future]
  273. try:
  274. result = future.result()
  275. raw_results[module_name] = result[module_name]
  276. except Exception as e:
  277. self.logger.error(
  278. f"{module_name} evaluation failed: {str(e)}",
  279. exc_info=True,
  280. )
  281. raw_results[module_name] = {
  282. "status": "error",
  283. "message": str(e),
  284. "timestamp": datetime.now().isoformat(),
  285. }
  286. return raw_results
  287. def _collect_custom_metrics(self, data: Any) -> Dict[str, Dict]:
  288. """收集自定义指标结果"""
  289. custom_metrics = self.metric_loader.get_custom_metrics()
  290. if not custom_metrics:
  291. return {}
  292. custom_results = {}
  293. for metric_key, metric_info in custom_metrics.items():
  294. try:
  295. level1, level2, level3 = metric_key.split('.')
  296. if metric_info['type'] == 'class':
  297. metric_class = metric_info['class']
  298. metric_instance = metric_class(data)
  299. metric_result = metric_instance.calculate()
  300. else:
  301. module = metric_info['module']
  302. metric_result = module.evaluate(data)
  303. if level1 not in custom_results:
  304. custom_results[level1] = {}
  305. custom_results[level1] = metric_result
  306. self.logger.info(f"Calculated custom metric: {level1}.{level2}.{level3}")
  307. except Exception as e:
  308. self.logger.error(f"Custom metric {metric_key} failed: {str(e)}")
  309. try:
  310. level1, level2, level3 = metric_key.split('.')
  311. if level1 not in custom_results:
  312. custom_results[level1] = {}
  313. custom_results[level1] = {
  314. "status": "error",
  315. "message": str(e),
  316. "timestamp": datetime.now().isoformat(),
  317. }
  318. except Exception:
  319. pass
  320. return custom_results
  321. def _process_merged_results(self, raw_results: Dict, custom_results: Dict) -> Dict:
  322. """处理合并后的评估结果"""
  323. from modules.lib.score import Score
  324. final_results = {}
  325. merged_config = self.config_manager.get_config()
  326. for level1, level1_data in raw_results.items():
  327. if level1 in custom_results:
  328. level1_data.update(custom_results[level1])
  329. try:
  330. evaluator = Score(merged_config, level1)
  331. final_results.update(evaluator.evaluate(level1_data))
  332. except Exception as e:
  333. final_results[level1] = self._format_error(e)
  334. for level1, level1_data in custom_results.items():
  335. if level1 not in raw_results:
  336. try:
  337. evaluator = Score(merged_config, level1)
  338. final_results.update(evaluator.evaluate(level1_data))
  339. except Exception as e:
  340. final_results[level1] = self._format_error(e)
  341. return final_results
  342. def _format_error(self, e: Exception) -> Dict:
  343. return {
  344. "status": "error",
  345. "message": str(e),
  346. "timestamp": datetime.now().isoformat()
  347. }
  348. def _run_module(self, module_class: Any, data: Any, module_name: str) -> Dict[str, Any]:
  349. """执行单个评估模块"""
  350. try:
  351. instance = module_class(data)
  352. return {module_name: instance.report_statistic()}
  353. except Exception as e:
  354. self.logger.error(f"{module_name} execution error: {str(e)}", exc_info=True)
  355. return {module_name: {"error": str(e)}}
  356. class LoggingManager:
  357. """日志管理组件"""
  358. def __init__(self, log_path: Path):
  359. self.log_path = log_path
  360. self.logger = self._init_logger()
  361. def _init_logger(self) -> logging.Logger:
  362. """初始化日志系统"""
  363. try:
  364. from modules.lib.log_manager import LogManager
  365. log_manager = LogManager(self.log_path)
  366. return log_manager.get_logger()
  367. except (ImportError, PermissionError, IOError) as e:
  368. logger = logging.getLogger("evaluator")
  369. logger.setLevel(logging.INFO)
  370. console_handler = logging.StreamHandler()
  371. console_handler.setFormatter(logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s'))
  372. logger.addHandler(console_handler)
  373. logger.warning(f"Failed to init standard logger: {str(e)}, using fallback logger")
  374. return logger
  375. def get_logger(self) -> logging.Logger:
  376. return self.logger
  377. class DataProcessor:
  378. """数据处理组件"""
  379. def __init__(self, logger: logging.Logger, data_path: Path, config_path: Optional[Path] = None):
  380. self.logger = logger
  381. self.data_path = data_path
  382. self.config_path = config_path
  383. self.processor = self._load_processor()
  384. self.case_name = self.data_path.name
  385. def _load_processor(self) -> Any:
  386. """加载数据处理器"""
  387. try:
  388. from modules.lib import data_process
  389. return data_process.DataPreprocessing(self.data_path, self.config_path)
  390. except ImportError as e:
  391. self.logger.error(f"Failed to load data processor: {str(e)}")
  392. raise RuntimeError(f"Failed to load data processor: {str(e)}") from e
  393. def validate(self) -> None:
  394. """验证数据路径"""
  395. if not self.data_path.exists():
  396. raise FileNotFoundError(f"Data path not exists: {self.data_path}")
  397. if not self.data_path.is_dir():
  398. raise NotADirectoryError(f"Invalid data directory: {self.data_path}")
  399. class EvaluationPipeline:
  400. """评估流水线控制器"""
  401. def __init__(self, all_config_path: str, base_config_path: str, log_path: str, data_path: str, report_path: str,
  402. custom_metrics_path: Optional[str] = None, custom_config_path: Optional[str] = None):
  403. # 路径初始化
  404. self.all_config_path = Path(all_config_path) if all_config_path else None
  405. self.base_config_path = Path(base_config_path) if base_config_path else None
  406. self.custom_config_path = Path(custom_config_path) if custom_config_path else None
  407. self.data_path = Path(data_path)
  408. self.report_path = Path(report_path)
  409. self.custom_metrics_path = Path(custom_metrics_path) if custom_metrics_path else None
  410. # 日志
  411. self.logging_manager = LoggingManager(Path(log_path))
  412. self.logger = self.logging_manager.get_logger()
  413. # 配置
  414. self.config_manager = ConfigManager(self.logger)
  415. self.config = self.config_manager.load_configs(
  416. self.all_config_path, self.base_config_path, self.custom_config_path
  417. )
  418. # 指标加载
  419. self.metric_loader = MetricLoader(self.logger, self.config_manager)
  420. self.metric_loader.load_builtin_metrics()
  421. self.metric_loader.load_custom_metrics(self.custom_metrics_path)
  422. # 数据处理
  423. self.data_processor = DataProcessor(self.logger, self.data_path, self.all_config_path)
  424. self.evaluation_engine = EvaluationEngine(self.logger, self.config_manager, self.metric_loader)
  425. def execute(self) -> Dict[str, Any]:
  426. """执行评估流水线"""
  427. try:
  428. self.data_processor.validate()
  429. self.logger.info(f"Start evaluation: {self.data_path.name}")
  430. start_time = time.perf_counter()
  431. results = self.evaluation_engine.evaluate(self.data_processor.processor)
  432. elapsed_time = time.perf_counter() - start_time
  433. self.logger.info(f"Evaluation completed, time: {elapsed_time:.2f}s")
  434. report = self._generate_report(self.data_processor.case_name, results)
  435. return report
  436. except Exception as e:
  437. self.logger.critical(f"Evaluation failed: {str(e)}", exc_info=True)
  438. return {"error": str(e), "traceback": traceback.format_exc()}
  439. def _add_overall_result(self, report: Dict[str, Any]) -> Dict[str, Any]:
  440. """处理评测报告并添加总体结果字段"""
  441. # 加载阈值参数
  442. thresholds = {
  443. "T0": self.config['T_threshold']['T0_threshold'],
  444. "T1": self.config['T_threshold']['T1_threshold'],
  445. "T2": self.config['T_threshold']['T2_threshold']
  446. }
  447. # 初始化计数器
  448. counters = {'p0': 0, 'p1': 0, 'p2': 0}
  449. # 遍历报告中的所有键,包括内置和自定义一级指标
  450. for category, category_data in report.items():
  451. # 跳过非指标键(如metadata等)
  452. if not isinstance(category_data, dict) or category == "metadata":
  453. continue
  454. # 如果该维度的结果为False,根据其priority增加对应计数
  455. if not category_data.get('result', True):
  456. priority = category_data.get('priority')
  457. if priority == 0:
  458. counters['p0'] += 1
  459. elif priority == 1:
  460. counters['p1'] += 1
  461. elif priority == 2:
  462. counters['p2'] += 1
  463. # 阈值判断逻辑
  464. thresholds_exceeded = (
  465. counters['p0'] > thresholds['T0'],
  466. counters['p1'] > thresholds['T1'],
  467. counters['p2'] > thresholds['T2']
  468. )
  469. # 生成处理后的报告
  470. processed_report = report.copy()
  471. processed_report['overall_result'] = not any(thresholds_exceeded)
  472. # 添加统计信息
  473. processed_report['threshold_checks'] = {
  474. 'T0_threshold': thresholds['T0'],
  475. 'T1_threshold': thresholds['T1'],
  476. 'T2_threshold': thresholds['T2'],
  477. 'actual_counts': counters
  478. }
  479. self.logger.info(f"Added overall result: {processed_report['overall_result']}")
  480. return processed_report
  481. def _generate_report(self, case_name: str, results: Dict[str, Any]) -> Dict[str, Any]:
  482. """生成评估报告"""
  483. from modules.lib.common import dict2json
  484. self.report_path.mkdir(parents=True, exist_ok=True)
  485. results["metadata"] = {
  486. "case_name": case_name,
  487. "timestamp": datetime.now().isoformat(),
  488. "version": "3.1.0",
  489. }
  490. # 添加总体结果评估
  491. results = self._add_overall_result(results)
  492. report_file = self.report_path / f"{case_name}_report.json"
  493. dict2json(results, report_file)
  494. self.logger.info(f"Report generated: {report_file}")
  495. return results
  496. def main():
  497. """命令行入口"""
  498. parser = argparse.ArgumentParser(
  499. description="Autonomous Driving Evaluation System V3.1",
  500. formatter_class=argparse.ArgumentDefaultsHelpFormatter,
  501. )
  502. parser.add_argument(
  503. "--logPath",
  504. type=str,
  505. default="logs/test.log",
  506. help="Log file path",
  507. )
  508. parser.add_argument(
  509. "--dataPath",
  510. type=str,
  511. default=r"D:\Kevin\zhaoyuan\data\V2V_CSAE53-2020_ForwardCollision_LST_01-02",
  512. help="Input data directory",
  513. )
  514. parser.add_argument(
  515. "--allConfigPath",
  516. type=str,
  517. default="config/all_metrics_config.yaml",
  518. help="Full metrics config file path (built-in + custom)",
  519. )
  520. parser.add_argument(
  521. "--baseConfigPath",
  522. type=str,
  523. default="config/builtin_metrics_config.yaml",
  524. help="Built-in metrics config file path",
  525. )
  526. parser.add_argument(
  527. "--reportPath",
  528. type=str,
  529. default="reports",
  530. help="Output report directory",
  531. )
  532. parser.add_argument(
  533. "--customMetricsPath",
  534. type=str,
  535. default="custom_metrics",
  536. help="Custom metrics scripts directory (optional)",
  537. )
  538. parser.add_argument(
  539. "--customConfigPath",
  540. type=str,
  541. default="config/custom_metrics_config.yaml",
  542. help="Custom metrics config path (optional)",
  543. )
  544. args = parser.parse_args()
  545. try:
  546. pipeline = EvaluationPipeline(
  547. all_config_path=args.allConfigPath,
  548. base_config_path=args.baseConfigPath,
  549. log_path=args.logPath,
  550. data_path=args.dataPath,
  551. report_path=args.reportPath,
  552. custom_metrics_path=args.customMetricsPath,
  553. custom_config_path=args.customConfigPath
  554. )
  555. start_time = time.perf_counter()
  556. result = pipeline.execute()
  557. elapsed_time = time.perf_counter() - start_time
  558. if "error" in result:
  559. print(f"Evaluation failed: {result['error']}")
  560. sys.exit(1)
  561. print(f"Evaluation completed, total time: {elapsed_time:.2f}s")
  562. print(f"Report path: {pipeline.report_path}")
  563. except KeyboardInterrupt:
  564. print("\nUser interrupted")
  565. sys.exit(130)
  566. except Exception as e:
  567. print(f"Execution error: {str(e)}")
  568. traceback.print_exc()
  569. sys.exit(1)
  570. if __name__ == "__main__":
  571. warnings.filterwarnings("ignore")
  572. main()