run.py 8.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301
  1. #!/usr/bin/env python
  2. # -*- coding: utf-8 -*-
  3. """
  4. LST数据处理系统入口文件
  5. 支持多种数据格式和插件扩展
  6. """
  7. import argparse
  8. import sys
  9. import traceback
  10. from pathlib import Path
  11. from core.config_manager import load_config, update_config
  12. from core.optimized_processor import process_lst_data, process_pgvil_data # 新增导入
  13. from core.plugin_manager import PluginManager
  14. from core.resource_manager import ResourceManager
  15. def parse_arguments():
  16. """解析命令行参数"""
  17. parser = argparse.ArgumentParser(
  18. description='数据处理系统,支持多种数据格式和插件扩展'
  19. )
  20. # 新增数据类型参数
  21. parser.add_argument(
  22. '--data-type',
  23. type=str,
  24. choices=['lst', 'pgvil'],
  25. default='lst',
  26. help='要处理的数据类型 (lst 或 pgvil)'
  27. )
  28. # 定义参数
  29. parser.add_argument(
  30. '--zip-path',
  31. type=Path,
  32. # default=Path('/home/server/桌面/XGJ/dataprocess/V2V_CSAE53-2020_ForwardCollision_LST_02-01.zip'),
  33. default=Path('/home/server/桌面/XGJ/dataprocess/V2I_CSAE53-2020_HazardousLocationW_LST_02-01.zip'),
  34. help='输入的ZIP数据文件路径'
  35. )
  36. parser.add_argument(
  37. '--trafficlight-json',
  38. type=Path,
  39. default=None,
  40. help='交通信号灯JSON配置文件路径'
  41. )
  42. parser.add_argument(
  43. '--output-dir',
  44. type=Path,
  45. default=Path('./data_zhaoyuan3/'),
  46. help='输出目录的基础路径'
  47. )
  48. parser.add_argument(
  49. '--utm-zone',
  50. type=int,
  51. default=51,
  52. help='UTM坐标系区域 (默认: 51)'
  53. )
  54. parser.add_argument(
  55. '--x-offset',
  56. type=float,
  57. default=0.0,
  58. help='X坐标偏移量'
  59. )
  60. parser.add_argument(
  61. '--y-offset',
  62. type=float,
  63. default=0.0,
  64. help='Y坐标偏移量'
  65. )
  66. parser.add_argument(
  67. '--config',
  68. type=Path,
  69. default=Path('config/config.json'),
  70. help='配置文件路径'
  71. )
  72. parser.add_argument(
  73. '--plugins-dir',
  74. type=Path,
  75. default=Path('plugins'),
  76. help='插件目录路径'
  77. )
  78. parser.add_argument(
  79. '--resources-dir',
  80. type=Path,
  81. default=Path('resources'),
  82. help='资源文件目录路径'
  83. )
  84. parser.add_argument(
  85. '--use-parallel',
  86. action='store_true',
  87. help='启用并行处理'
  88. )
  89. parser.add_argument(
  90. '--no-parallel',
  91. action='store_true',
  92. help='禁用并行处理'
  93. )
  94. parser.add_argument(
  95. '--max-workers',
  96. type=int,
  97. default=None,
  98. help='并行处理的最大工作线程数'
  99. )
  100. parser.add_argument(
  101. '--batch-size',
  102. type=int,
  103. default=10000,
  104. help='处理大数据集时的批处理大小'
  105. )
  106. parser.add_argument(
  107. '--log-level',
  108. type=str,
  109. choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'],
  110. default='INFO',
  111. help='日志级别'
  112. )
  113. parser.add_argument(
  114. '--log-dir',
  115. type=Path,
  116. default=Path('logs'),
  117. help='日志文件目录'
  118. )
  119. parser.add_argument(
  120. '--no-log-file',
  121. action='store_true',
  122. help='禁用文件日志'
  123. )
  124. return parser.parse_args()
  125. def setup_config(args):
  126. """设置配置"""
  127. # 根据ZIP文件名创建输出目录
  128. zip_name = args.zip_path.stem
  129. output_dir = args.output_dir / zip_name
  130. output_dir.mkdir(parents=True, exist_ok=True)
  131. print(f"输出目录: {output_dir}")
  132. # 加载配置
  133. config = load_config(args.config)
  134. # 更新配置中的输出目录为包含zip名称的子目录
  135. config['paths']['output_dir'] = str(output_dir)
  136. config['paths']['data_dir'] = str(output_dir) # 确保数据也输出到同一目录
  137. config['paths']['temp_dir'] = str(output_dir) # 确保临时文件也在同一目录
  138. # 使用命令行参数覆盖配置文件中的设置
  139. if args.use_parallel and args.no_parallel:
  140. print("警告: 同时指定了 --use-parallel 和 --no-parallel,将使用 --use-parallel")
  141. config['processing']['use_parallel'] = True
  142. elif args.use_parallel:
  143. config['processing']['use_parallel'] = True
  144. elif args.no_parallel:
  145. config['processing']['use_parallel'] = False
  146. if args.max_workers is not None:
  147. config['processing']['max_workers'] = args.max_workers
  148. if args.batch_size != 10000: # 不等于默认值
  149. config['processing']['batch_size'] = args.batch_size
  150. # 更新日志配置
  151. config['logging']['level'] = args.log_level
  152. config['logging']['log_dir'] = str(args.log_dir)
  153. config['logging']['log_to_file'] = not args.no_log_file
  154. # 更新坐标系配置
  155. config['coordinates']['utm_zone'] = args.utm_zone
  156. config['coordinates']['x_offset'] = args.x_offset
  157. config['coordinates']['y_offset'] = args.y_offset
  158. # 更新路径配置
  159. config['paths']['plugins_dir'] = str(args.plugins_dir)
  160. config['paths']['resources_dir'] = str(args.resources_dir)
  161. # 应用更新后的配置
  162. update_config(config)
  163. return output_dir
  164. def process_plugins(args, output_dir, final_csv_path):
  165. """处理插件数据"""
  166. # 初始化插件处理管理器
  167. plugin_manager = PluginManager(args.plugins_dir)
  168. resource_manager = ResourceManager(args.resources_dir)
  169. # 处理自定义数据
  170. print("处理并合并自定义数据...")
  171. folders = resource_manager.list_zip_folders(args.zip_path)
  172. for folder in folders:
  173. plugin = plugin_manager.get_plugin_for_data(args.zip_path, folder)
  174. if not plugin:
  175. print(f"未找到文件夹的插件: {folder}")
  176. continue
  177. print(f"使用插件 '{plugin.__name__}' 处理文件夹 '{folder}'")
  178. plugin_instance = plugin()
  179. plugin_output = plugin_instance.process_data(
  180. args.zip_path,
  181. folder,
  182. output_dir
  183. )
  184. if plugin_output is not None and not plugin_output.empty:
  185. output_file = output_dir / f"{folder}_processed.csv"
  186. print(f'插件输出文件: {output_file}')
  187. plugin_output.to_csv(output_file, index=False)
  188. if not resource_manager.validate_plugin_output(output_file):
  189. print(f"警告: 插件输出验证失败: {folder}")
  190. continue
  191. # 合并自定义数据与主数据文件
  192. print(f"合并 {folder} 数据...")
  193. if resource_manager.merge_plugin_data(
  194. final_csv_path,
  195. output_file,
  196. final_csv_path
  197. ):
  198. print(f"成功合并 {folder} 数据")
  199. else:
  200. print(f"警告: 合并 {folder} 数据失败")
  201. else:
  202. print(f"警告: 插件处理失败: {folder}")
  203. def main():
  204. """主函数"""
  205. args = parse_arguments()
  206. try:
  207. # 设置配置
  208. output_dir = setup_config(args)
  209. print("开始数据处理流程")
  210. print(f"从以下位置加载配置: {args.config}")
  211. # 根据数据类型选择处理流程
  212. if args.data_type == 'lst':
  213. final_csv_path = process_lst_data(
  214. zip_data_path=args.zip_path,
  215. output_base_dir=output_dir,
  216. trafficlight_json_path=args.trafficlight_json,
  217. utm_zone=args.utm_zone,
  218. x_offset=args.x_offset,
  219. y_offset=args.y_offset
  220. )
  221. elif args.data_type == 'pgvil':
  222. final_csv_path = process_pgvil_data(
  223. zip_data_path=args.zip_path,
  224. output_base_dir=output_dir,
  225. utm_zone=args.utm_zone,
  226. x_offset=args.x_offset,
  227. y_offset=args.y_offset
  228. )
  229. else:
  230. print(f"不支持的数据类型: {args.data_type}")
  231. sys.exit(1)
  232. if not final_csv_path:
  233. print(f"{args.data_type}内置数据处理失败")
  234. sys.exit(1)
  235. print(f"\n{args.data_type}内置处理流程成功完成!")
  236. # 处理插件数据
  237. process_plugins(args, output_dir, final_csv_path)
  238. print("LST数据处理成功完成")
  239. print(f"所有处理结果已保存到: {output_dir}")
  240. sys.exit(0)
  241. except Exception as e:
  242. print(f"\n处理过程中出现错误: {e}")
  243. traceback.print_exc()
  244. sys.exit(1)
  245. if __name__ == "__main__":
  246. main()