Browse Source

version -v2.0

XGJ_zhaoyuan 1 week ago
parent
commit
be35c0d8f7
5 changed files with 641 additions and 307 deletions
  1. 45 40
      core/config_manager.py
  2. 51 19
      core/optimized_processor.py
  3. 238 98
      core/processors/built_in/lst.py
  4. 149 144
      core/processors/built_in/pgvil.py
  5. 158 6
      run.py

+ 45 - 40
core/config_manager.py

@@ -3,10 +3,12 @@ import os
 from pathlib import Path
 from typing import Dict, Any, Optional, Union, List
 import yaml
+from core.optimized_processor import get_base_path
 
 # 全局配置字典
 _CONFIG: Dict[str, Any] = {}
-
+base_path = get_base_path()
+print("base_path is", base_path)
 # 默认配置
 DEFAULT_CONFIG = {
     # 数据处理配置
@@ -44,27 +46,30 @@ DEFAULT_CONFIG = {
         "resources_dir": "resources",
         "plugins_dir": "plugins",
         "output_dir": "output",
-        "engine_path": "_internal/engine",
-        "map_path": "_internal/data_map",
-        "dbc_path": "_internal/VBox.dbc"
+        "engine_path": f"{os.path.join(base_path, '_internal/engine')}",
+        "map_path": f"{os.path.join(base_path, '_internal/data_map')}",
+        "dbc_path": f"{os.path.join(base_path, '_internal/VBox.dbc')}"
+        # "engine_path": '_internal/engine',
+        # "map_path": '_internal/data_map',
+        # "dbc_path": '_internal/VBox.dbc'
     }
 }
 
 
 def load_config(config_path: Optional[Union[str, Path]] = None) -> Dict[str, Any]:
     """加载配置文件
-    
+
     Args:
         config_path: 配置文件路径,支持JSON和YAML格式
-        
+
     Returns:
         配置字典
     """
     global _CONFIG
-    
+
     # 首先使用默认配置
     _CONFIG = DEFAULT_CONFIG.copy()
-    
+
     if config_path is None:
         # 尝试查找默认配置文件
         default_paths = [
@@ -75,28 +80,28 @@ def load_config(config_path: Optional[Union[str, Path]] = None) -> Dict[str, Any
             Path("config/config.yaml"),
             Path("config/config.yml")
         ]
-        
+
         for path in default_paths:
             if path.exists():
                 config_path = path
                 break
-    
+
     if config_path is None:
         print("未找到配置文件,使用默认配置")
         return _CONFIG
-    
+
     # 确保config_path是Path对象
     if isinstance(config_path, str):
         config_path = Path(config_path)
-    
+
     if not config_path.exists():
         print(f"配置文件不存在: {config_path},使用默认配置")
         return _CONFIG
-    
+
     try:
         # 根据文件扩展名选择解析方法
         suffix = config_path.suffix.lower()
-        
+
         if suffix in [".yaml", ".yml"]:
             with open(config_path, "r", encoding="utf-8") as f:
                 user_config = yaml.safe_load(f)
@@ -106,29 +111,29 @@ def load_config(config_path: Optional[Union[str, Path]] = None) -> Dict[str, Any
         else:
             print(f"不支持的配置文件格式: {suffix},使用默认配置")
             return _CONFIG
-        
+
         # 递归合并配置
         _CONFIG = _merge_configs(_CONFIG, user_config)
         print(f"成功加载配置文件: {config_path}")
-        
+
     except Exception as e:
         print(f"加载配置文件失败: {e}")
-    
+
     return _CONFIG
 
 
 def _merge_configs(base: Dict[str, Any], override: Dict[str, Any]) -> Dict[str, Any]:
     """递归合并配置字典
-    
+
     Args:
         base: 基础配置
         override: 覆盖配置
-        
+
     Returns:
         合并后的配置
     """
     result = base.copy()
-    
+
     for key, value in override.items():
         # 如果两个都是字典,递归合并
         if key in result and isinstance(result[key], dict) and isinstance(value, dict):
@@ -136,28 +141,28 @@ def _merge_configs(base: Dict[str, Any], override: Dict[str, Any]) -> Dict[str,
         else:
             # 否则直接覆盖
             result[key] = value
-    
+
     return result
 
 
 def get_config(section: Optional[str] = None) -> Any:
     """获取配置
-    
+
     Args:
         section: 配置节名称,如果为None则返回整个配置字典
-        
+
     Returns:
         配置值或配置字典
     """
     global _CONFIG
-    
+
     # 如果配置为空,加载默认配置
     if not _CONFIG:
         _CONFIG = DEFAULT_CONFIG.copy()
-    
+
     if section is None:
         return _CONFIG
-    
+
     # 支持点号分隔的多级配置访问,如 "database.batch_query_size"
     if "." in section:
         parts = section.split(".")
@@ -169,36 +174,36 @@ def get_config(section: Optional[str] = None) -> Any:
                 print(f"配置节不存在: {section}")
                 return None
         return current
-    
+
     # 单级配置访问
     return _CONFIG.get(section)
 
 
 def set_config(section: str, value: Any) -> None:
     """设置配置
-    
+
     Args:
         section: 配置节名称,支持点号分隔的多级配置
         value: 配置值
     """
     global _CONFIG
-    
+
     # 如果配置为空,加载默认配置
     if not _CONFIG:
         _CONFIG = DEFAULT_CONFIG.copy()
-    
+
     # 支持点号分隔的多级配置设置
     if "." in section:
         parts = section.split(".")
         current = _CONFIG
-        
+
         # 遍历到倒数第二级
         for part in parts[:-1]:
             # 如果中间节点不存在或不是字典,创建新字典
             if part not in current or not isinstance(current[part], dict):
                 current[part] = {}
             current = current[part]
-        
+
         # 设置最后一级的值
         current[parts[-1]] = value
     else:
@@ -208,23 +213,23 @@ def set_config(section: str, value: Any) -> None:
 
 def save_config(config_path: Union[str, Path], format: str = "json") -> bool:
     """保存配置到文件
-    
+
     Args:
         config_path: 配置文件路径
         format: 文件格式,支持 "json" 和 "yaml"
-        
+
     Returns:
         是否保存成功
     """
     global _CONFIG
-    
+
     # 确保config_path是Path对象
     if isinstance(config_path, str):
         config_path = Path(config_path)
-    
+
     # 确保目录存在
     config_path.parent.mkdir(parents=True, exist_ok=True)
-    
+
     try:
         # 根据格式选择保存方法
         if format.lower() == "yaml":
@@ -233,10 +238,10 @@ def save_config(config_path: Union[str, Path], format: str = "json") -> bool:
         else:  # 默认使用JSON
             with open(config_path, "w", encoding="utf-8") as f:
                 json.dump(_CONFIG, f, indent=2, ensure_ascii=False)
-        
+
         print(f"配置已保存到: {config_path}")
         return True
-        
+
     except Exception as e:
         print(f"保存配置失败: {e}")
         return False
@@ -244,7 +249,7 @@ def save_config(config_path: Union[str, Path], format: str = "json") -> bool:
 
 def update_config(new_config: Dict[str, Any]) -> None:
     """更新全局配置
-    
+
     Args:
         new_config: 新的配置字典,将与现有配置合并
     """

+ 51 - 19
core/optimized_processor.py

@@ -1,6 +1,8 @@
 from pathlib import Path
 from typing import Optional
 import traceback
+import sys
+import os
 
 from .processors.built_in.lst import ZipCSVProcessor, RosbagProcessor, Config
 
@@ -14,14 +16,37 @@ from core.processors.built_in.pgvil import run_pgvil_engine, PGVILProcessor
 from core.processors.built_in.pgvil import Config as PGVILConfig
 
 
+def get_base_path():
+    """获取可执行文件所在目录"""
+    # if getattr(sys, 'frozen', False):  #pyinstaller打包后环境(pyinstaller会自动设置sys.frozen)
+    if "__compiled__" in globals():  # nuikta会通过这个语句检测是否被Nuitka打包
+        base_path = os.path.dirname(sys.executable)  # 可执行文件目录
+        # base_path1 = getattr(sys, '_MEIPASS', os.path.dirname(sys.executable))  # 可执行文件目录
+        # base_path = Path(base_path1).parent
+        print("base_path", Path(base_path))
+    else:  # 开发环境
+        base_path = os.path.dirname('.')
+        print("file path is", base_path)
+    return base_path
+
+
+def resource_path(relative_path):
+    """ 获取资源绝对路径,兼容开发环境和单文件模式 """
+    if hasattr(sys, '_MEIPASS'):
+        base_path = sys._MEIPASS
+    else:
+        base_path = os.path.abspath(".")
+    return os.path.join(base_path, relative_path)
+
+
 def process_lst_data(
-    zip_data_path: Path,
-    output_base_dir: Path,
-    trafficlight_json_path: Optional[Path] = None,
-    utm_zone: int = 51,
-    x_offset: float = 0.0,
-    y_offset: float = 0.0,
-    continue_to_iterate: bool = False,
+        zip_data_path: Path,
+        output_base_dir: Path,
+        trafficlight_json_path: Optional[Path] = None,
+        utm_zone: int = 51,
+        x_offset: float = 0.0,
+        y_offset: float = 0.0,
+        continue_to_iterate: bool = False,
 ) -> Optional[Path]:
     """
     Processes LST data using an optimized pipeline.
@@ -50,6 +75,7 @@ def process_lst_data(
         trafficlight_json_path = None
 
     try:
+        base_path = get_base_path()
         # Initialize configuration
         config = Config(
             zip_path=zip_data_path.resolve(),
@@ -57,14 +83,18 @@ def process_lst_data(
             json_path=(
                 trafficlight_json_path.resolve() if trafficlight_json_path else None
             ),
-            dbc_path=Path("_internal/VBox.dbc").resolve(),
-            engine_path=Path("_internal/engine").resolve(),
-            map_path=Path("_internal/data_map").resolve(),
+
+            # dbc_path = os.path.join(base_path, '_internal/VBox.dbc'),
+            # engine_path = os.path.join(base_path, '_internal/engine'),
+            # map_path = os.path.join(base_path, '_internal/data_map'),
+            dbc_path=resource_path('VBox.dbc'),
+            engine_path=resource_path('engine'),
+            map_path=resource_path('data_map'),
             utm_zone=utm_zone,
             x_offset=x_offset,
             y_offset=y_offset,
         )
-
+        print("engine path is", config.engine_path)
         # Process built-in data types
         print("Processing built-in data types...")
         zip_processor = ZipCSVProcessor(config)
@@ -99,11 +129,11 @@ def process_lst_data(
 
 
 def process_pgvil_data(
-    zip_data_path: Path,
-    output_base_dir: Path,
-    utm_zone: int = 51,
-    x_offset: float = 0.0,
-    y_offset: float = 0.0,
+        zip_data_path: Path,
+        output_base_dir: Path,
+        utm_zone: int = 51,
+        x_offset: float = 0.0,
+        y_offset: float = 0.0,
 ) -> Optional[Path]:
     """处理PGVIL数据
 
@@ -117,14 +147,17 @@ def process_pgvil_data(
     Returns:
         Optional[Path]: 处理后的CSV文件路径,处理失败则返回None
     """
+    base_path = get_base_path()
     pgvil_config = PGVILConfig(
         zip_path=zip_data_path,
         output_path=output_base_dir,
         utm_zone=utm_zone,
         x_offset=x_offset,
         y_offset=y_offset,
-        engine_path=Path("_internal/engine").resolve(),
-        map_path=Path("_internal/data_map").resolve(),
+        # engine_path = os.path.join(base_path, '_internal/engine'),
+        # map_path = os.path.join(base_path, '_internal/data_map'),
+        engine_path=resource_path('engine'),
+        map_path=resource_path('data_map')
     )
 
     if not zip_data_path.exists():
@@ -138,7 +171,6 @@ def process_pgvil_data(
         pgvil_root = processor.process_zip()
         if not pgvil_root.exists():
             raise RuntimeError("Failed to extract ZIP file")
-        
 
         # Run C++ engine for additional processing
         if not run_pgvil_engine(pgvil_config):

+ 238 - 98
core/processors/built_in/lst.py

@@ -10,11 +10,12 @@ import subprocess
 import numpy as np
 import pandas as pd
 from collections import Counter
-from datetime import datetime
+# from datetime import datetime
 import argparse
 import sys
 from pyproj import Proj
-from bagpy.bagreader import bagreader
+from scipy.spatial import cKDTree
+
 import shutil
 import json
 from dataclasses import dataclass, field
@@ -22,7 +23,9 @@ from dataclasses import dataclass, field
 # --- Constants ---
 PLAYER_ID_EGO = int(1)
 PLAYER_ID_OBJ = int(2)
+PLAYER_ID_PEDESTRIAN = int(5)
 DEFAULT_TYPE = int(1)
+PEDESTRIAN_TYPE = int(5)
 OUTPUT_CSV_OBJSTATE = "ObjState.csv"
 OUTPUT_CSV_TEMP_OBJSTATE = "ObjState_temp_intermediate.csv"  # Should be eliminated
 OUTPUT_CSV_EGOSTATE = "EgoState.csv"  # Not used in final merge? Check logic if needed.
@@ -30,6 +33,7 @@ OUTPUT_CSV_MERGED = "merged_ObjState.csv"
 OUTPUT_CSV_OBU = "OBUdata.csv"
 OUTPUT_CSV_LANEMAP = "LaneMap.csv"
 OUTPUT_CSV_EGOMAP = "EgoMap.csv"
+MERGED_CSV_EGOMAP = "merged_egomap.csv"
 OUTPUT_CSV_FUNCTION = "Function.csv"
 ROADMARK_CSV = "RoadMark.csv"
 HD_LANE_CSV = "hd_lane.csv"
@@ -70,7 +74,8 @@ class ZipCSVProcessor:
         "travelDist", "composite_v", "relative_dist", "x_relative_dist", "y_relative_dist", "type"  # Added type
     ]
     OBJ_COLS_OLD_SUFFIXED = [
-        "v_obj", "speedX_obj", "speedY_obj", "speedZ_obj", "posH_obj", "pitch_obj", "roll_obj", "roll_rate_obj", "pitch_rate_obj", "speedH_obj",
+        "v_obj", "speedX_obj", "speedY_obj", "speedZ_obj", "posH_obj", "pitch_obj", "roll_obj", "roll_rate_obj",
+        "pitch_rate_obj", "speedH_obj",
         "posX_obj", "posY_obj", "accelX_obj", "accelY_obj", "accelZ_obj", "travelDist_obj"
     ]
     OBJ_COLS_MAPPING = {old: new for old, new in
@@ -84,6 +89,7 @@ class ZipCSVProcessor:
         self._init_keyword_mapping()
 
     def _load_dbc(self, dbc_path: Optional[Path]) -> Optional[cantools.db.Database]:
+        dbc_path = Path(dbc_path)
         if not dbc_path or not dbc_path.exists():
             print("DBC path not provided or file not found.")
             return None
@@ -114,8 +120,6 @@ class ZipCSVProcessor:
                 "posH": "yaw",
                 "pitch": "tilt",
                 "roll": "roll",
-                "roll_rate": "roll_rate",
-                "pitch_rate": "tilt_rate",
                 "speedH": "yaw_rate",
                 "posX": "latitude_dd",  # Source before projection
                 "posY": "longitude_dd",  # Source before projection
@@ -130,9 +134,10 @@ class ZipCSVProcessor:
                 "y_relative_dist": "y_distance",
                 "type": None  # Will be set later
             },
-            "db_columns": ["ID", "second", "usecond", "speed", "y_speed", "x_speed", "z_speed", "tilt_rate", "z_acceleration",
-                           "yaw", "tilt", "roll", "yaw_rate", "latitude_dd", "longitude_dd", "roll_rate", "total_distance",
-                           "x_acceleration", "y_acceleration", "total_distance", "distance", "x_distance", "y_distance"]  # Actual cols to SELECT
+            "db_columns": ["ID", "second", "usecond", "speed", "y_speed", "x_speed", "z_speed", "z_acceleration",
+                           "yaw", "tilt", "roll", "yaw_rate", "latitude_dd", "longitude_dd", "total_distance",
+                           "x_acceleration", "y_acceleration", "total_distance", "distance", "x_distance", "y_distance"]
+            # Actual cols to SELECT
         }
 
     def _get_can_config(self):
@@ -141,8 +146,8 @@ class ZipCSVProcessor:
             "mapping": {  # Map unified output columns to CAN signals or direct fields
                 # EGO mappings (VUT = Vehicle Under Test)
                 "v": "VUT_Speed_mps",
-                "speedX": "VUT_Speed_x_mps",
-                "speedY": "VUT_Speed_y_mps",
+                "speedX": "VUT_Speed_long_mps",
+                "speedY": "VUT_Speed_lat_mps",
                 "speedZ": "VUT_Speed_z_mps",
                 "speedH": "VUT_Yaw_Rate",
                 "posX": "VUT_GPS_Latitude",  # Source before projection
@@ -153,12 +158,12 @@ class ZipCSVProcessor:
                 "pitch_rate": None,
                 "roll_rate": None,
                 "accelX": "VUT_Acc_X",
-                "accelY": "VUT_Acc_Y",
+                "accelY": "VUT_Acc_Y2",
                 "accelZ": "VUT_Acc_Z",
                 # OBJ mappings (UFO = Unidentified Flying Object / Other Vehicle)
                 "v_obj": "Speed_mps",
-                "speedX_obj": "UFO_Speed_x_mps",
-                "speedY_obj": "UFO_Speed_y_mps",
+                "speedX_obj": "UFO_Speed_long_mps",
+                "speedY_obj": "UFO_Speed_lat_mps",
                 "speedZ_obj": "UFO_Speed_z_mps",
                 "speedH_obj": "Yaw_Rate",
                 "posX_obj": "GPS_Latitude",  # Source before projection
@@ -174,8 +179,8 @@ class ZipCSVProcessor:
                 # Relative Mappings
                 "composite_v": "VUT_Rel_speed_long_mps",
                 "relative_dist": "VUT_Dist_MRP_Abs",
-                "x_relative_dist": "VUT_Dist_MRP_X",
-                "y_relative_dist": "VUT_Dist_MRP_Y",
+                "x_relative_dist": "VUT_Dist_MRP_long",
+                "y_relative_dist": "VUT_Dist_MRP_lat",
                 # travelDist often calculated, not direct CAN signal
                 "travelDist": None,  # Placeholder
                 "travelDist_obj": None  # Placeholder
@@ -188,7 +193,7 @@ class ZipCSVProcessor:
         self.keyword_mapping = {
             "gnss": ("gnss_table", OUTPUT_CSV_OBJSTATE),
             # GNSS likely represents ego, writing to ObjState first? Revisit logic if needed.
-            "can2": ("can_table", OUTPUT_CSV_OBJSTATE),  # Process CAN data into the combined ObjState file
+            "can4": ("can_table", OUTPUT_CSV_OBJSTATE),  # Process CAN data into the combined ObjState file
         }
 
     def process_zip(self) -> None:
@@ -224,7 +229,7 @@ class ZipCSVProcessor:
                                 shutil.copyfileobj(source, target)
 
                             # Process the extracted DB file
-                            self._process_db_file(extracted_path, output_dir, table_type, csv_name)
+                            self._process_db_file(file_info.filename, extracted_path, output_dir, table_type, csv_name)
 
                         except (sqlite3.Error, pd.errors.EmptyDataError, FileNotFoundError, KeyError) as e:
                             print(f"Error processing DB file {file_info.filename}: {e}")
@@ -249,7 +254,7 @@ class ZipCSVProcessor:
         return None
 
     def _process_db_file(
-            self, db_path: Path, output_dir: Path, table_type: str, csv_name: str
+            self, filename: str, db_path: Path, output_dir: Path, table_type: str, csv_name: str
     ) -> None:
         """Connects to SQLite DB and processes the specified table type."""
         output_csv_path = output_dir / csv_name
@@ -267,7 +272,7 @@ class ZipCSVProcessor:
 
                 print(f"Exporting data from table '{table_type}' to {output_csv_path}")
                 if table_type == "can_table":
-                    self._process_can_table_optimized(cursor, output_csv_path)
+                    self._process_can_table_optimized(filename, cursor, output_csv_path)
                 elif table_type == "gnss_table":
                     # Pass output_path directly, avoid intermediate steps
                     self._process_gnss_table(cursor, output_csv_path)
@@ -376,9 +381,29 @@ class ZipCSVProcessor:
                 processed_data.append(record)
 
             if processed_data:
-                df_final = pd.DataFrame(processed_data)[output_columns].iloc[::4].reset_index(drop=True)  # Ensure column order
+                df_final = pd.DataFrame(processed_data)[output_columns].iloc[::4].reset_index(
+                    drop=True)  # Ensure column order
+                # df_final.to_csv("/home/output/V2I_CSAE53-2020_LeftTurnAssist_LST_01/ObjState_old.csv", index=False, encoding="utf-8")
+                # print(df_final)
+                # df_final['speedY'] = -df_final['speedY']
+                # df_final['accelY'] = -df_final['accelY']
+
+                # df_final['speedZ'] = -df_final['speedZ']
+                # df_final['accelZ'] = -df_final['accelZ']
                 df_final['simFrame'] = np.arange(1, len(df_final) + 1)
-                df_final.to_csv(output_path, index=False, encoding="utf-8")
+                df_final["pitch_rate"] = df_final["pitch"].diff() / df_final["simTime"].diff()
+                df_final["roll_rate"] = df_final["roll"].diff() / df_final["simTime"].diff()
+                # print("df_final[\"posH\"] is", df_final["posH"])
+                df_final["posH"] = (90 - df_final["posH"])
+                stopcar_flag = self.is_valid_interval(df_final)
+                # print("stopcar_flag is", stopcar_flag)
+                if stopcar_flag:
+                    first_gt_1 = df_final['v'].gt(1).idxmax()
+                    last_gt_1 = df_final['v'].gt(0.15)[::-1].idxmax()
+                    result_df = df_final.loc[first_gt_1:last_gt_1].copy()
+                    result_df.to_csv(output_path, index=False, encoding="utf-8")
+                else:
+                    df_final.to_csv(output_path, index=False, encoding="utf-8")
                 print(f"Successfully wrote GNSS data to {output_path}")
             else:
                 print("No processable records found in gnss_table.")
@@ -388,7 +413,7 @@ class ZipCSVProcessor:
         except Exception as e:
             print(f"Unexpected error during GNSS processing: {e}")
 
-    def _process_can_table_optimized(self, cursor, output_path: Path) -> None:
+    def _process_can_table_optimized(self, filename, cursor, output_path: Path) -> None:
         """Processes CAN data directly into the final merged DataFrame format."""
         config = self.table_config["can_table"]
         db_columns = config["db_columns"]
@@ -420,15 +445,24 @@ class ZipCSVProcessor:
 
             # Separate EGO and OBJ data based on available columns
             df_ego = self._extract_vehicle_data(df_raw, PLAYER_ID_EGO)
+            # if 'pedestrian' in filename.lower():
+            #     df_obj = self._extract_vehicle_data(df_raw, PLAYER_ID_PEDESTRIAN)
+            # else:
             df_obj = self._extract_vehicle_data(df_raw, PLAYER_ID_OBJ)
 
             # Project coordinates
             df_ego = self._project_coordinates(df_ego, 'posX', 'posY')
+            # df_ego = self._project_coordinates(df_ego, 'posX', 'posY', 'speedX', 'speedY', 'speedZ', 'accelX', 'accelY', 'accelZ', 'posH', 'pitch', 'roll')
+            print("df_ego is", df_ego.columns)
             df_obj = self._project_coordinates(df_obj, 'posX', 'posY')  # Use same column names after extraction
+            # df_obj = self._project_coordinates(df_obj, 'posX', 'posY', 'speedX', 'speedY', 'speedZ', 'accelX', 'accelY', 'accelZ', 'posH', 'pitch', 'roll')  # Use same column names after extraction
 
             # Add calculated/default columns
             df_ego['type'] = DEFAULT_TYPE
-            df_obj['type'] = DEFAULT_TYPE
+            if 'pedestrian' in filename.lower():
+                df_obj['type'] = PEDESTRIAN_TYPE
+            else:
+                df_obj['type'] = DEFAULT_TYPE
             # Note: travelDist is often calculated later or not available directly
 
             # Ensure both have the same columns before merging
@@ -437,23 +471,31 @@ class ZipCSVProcessor:
             df_obj = df_obj.reindex(columns=final_columns).iloc[::4]
 
             # Reindex simFrame of ego and obj
-            df_ego['simFrame'] = np.arange(1, len(df_ego)+1)
-            df_obj['simFrame'] = np.arange(1, len(df_obj)+1)
+            df_ego['simFrame'] = np.arange(1, len(df_ego) + 1)
+            df_obj['simFrame'] = np.arange(1, len(df_obj) + 1)
 
             # Merge EGO and OBJ dataframes
             df_merged = pd.concat([df_ego, df_obj], ignore_index=True)
 
             # Sort and clean up
             df_merged.sort_values(by=["simTime", "simFrame", "playerId"], inplace=True)
-            df_merged.fillna(0, inplace = True)
+            df_merged.fillna(0, inplace=True)
             df_merged.reset_index(drop=True, inplace=True)
 
             # Fill potential NaNs introduced by reindexing or missing data
             # Choose appropriate fill strategy (e.g., 0, forward fill, or leave as NaN)
             # df_merged.fillna(0.0, inplace=True)  # Example: fill with 0.0
-
-            # Save the final merged DataFrame
-            df_merged.to_csv(output_path, index=False, encoding="utf-8")
+            stopcar_flag = self.is_valid_interval(df_merged)
+            print("stopcar_flag is", stopcar_flag)
+            if stopcar_flag:
+                print("筛选非静止车辆数据!")
+                first_gt_01 = df_merged[df_merged['playerId'] == 1]['v'].gt(1).idxmax()
+                last_gt_01 = df_merged[df_merged['playerId'] == 1]['v'].gt(0.15)[::-1].idxmax()
+                result_df = df_merged.loc[first_gt_01:last_gt_01 - 1].copy()
+                # Save the final merged DataFrame
+                result_df.to_csv(output_path, index=False, encoding="utf-8")
+            else:
+                df_merged.to_csv(output_path, index=False, encoding="utf-8")
             print(f"Successfully processed CAN data and wrote merged output to {output_path}")
 
         except sqlite3.Error as e:
@@ -540,6 +582,7 @@ class ZipCSVProcessor:
             # Select EGO columns (not ending in _obj) + relative columns
             ego_cols = {target: source for target, source in self.table_config['can_table']['mapping'].items()
                         if source and not isinstance(source, tuple) and not target.endswith('_obj')}
+            print("ego_cols is", ego_cols)
             rename_map = {}
             select_cols_raw = []
             for target_col, source_info in ego_cols.items():
@@ -559,8 +602,7 @@ class ZipCSVProcessor:
                 df_vehicle_temps_ego[col] = df_vehicle_temp[col].dropna().reset_index(drop=True)
             df_vehicle = pd.concat([df_vehicle, df_vehicle_temps_ego], axis=1)
 
-
-        elif player_id == PLAYER_ID_OBJ:
+        elif (player_id == PLAYER_ID_OBJ) or (player_id == PLAYER_ID_PEDESTRIAN):
             # Select OBJ columns (ending in _obj)
             obj_cols = {target: source for target, source in self.table_config['can_table']['mapping'].items()
                         if source and not isinstance(source, tuple) and target.endswith('_obj')}
@@ -592,12 +634,15 @@ class ZipCSVProcessor:
         # df_vehicle.dropna(subset=[col for col in required_pos if col in df_vehicle.columns], inplace=True)
 
         try:
-            df_vehicle["simTime"] = np.round(np.linspace(df_raw["simTime"].tolist()[0]+28800, df_raw["simTime"].tolist()[0]+28800 + 0.01*(len(df_vehicle)), len(df_vehicle)), 2)
+            df_vehicle["simTime"] = np.round(
+                np.linspace(df_raw["simTime"].tolist()[0], df_raw["simTime"].tolist()[0] + 0.01 * (len(df_vehicle)),
+                            len(df_vehicle)), 2)
             df_vehicle["simFrame"] = np.arange(1, len(df_vehicle) + 1)
             df_vehicle["playerId"] = int(player_id)
             df_vehicle['playerId'] = pd.to_numeric(df_vehicle['playerId']).astype(int)
             df_vehicle["pitch_rate"] = df_vehicle["pitch"].diff() / df_vehicle["simTime"].diff()
             df_vehicle["roll_rate"] = df_vehicle["roll"].diff() / df_vehicle["simTime"].diff()
+            # print("df_vehicle is", df_vehicle)
         except ValueError as ve:
             print(f"{ve}")
         except TypeError as te:
@@ -613,6 +658,15 @@ class ZipCSVProcessor:
             # Ensure data is numeric and handle potential errors/missing values
             lat = pd.to_numeric(df[lat_col], errors='coerce')
             lon = pd.to_numeric(df[lon_col], errors='coerce')
+            # speedX = pd.to_numeric(df[speedX_col], errors='coerce')
+            # speedY = pd.to_numeric(df[speedY_col], errors='coerce')
+            # speedZ = pd.to_numeric(0, errors='coerce')
+            # accelX = pd.to_numeric(df[accelX_col], errors='coerce')
+            # accelY = pd.to_numeric(df[accelY_col], errors='coerce')
+            # accelZ = pd.to_numeric(0, errors='coerce')
+            # posh = pd.to_numeric(df[posh_col], errors='coerce')
+            # pitch = pd.to_numeric(df[pitch_col], errors='coerce')
+            # roll = pd.to_numeric(df[roll_col], errors='coerce')
             valid_coords = lat.notna() & lon.notna()
 
             if valid_coords.any():
@@ -628,6 +682,7 @@ class ZipCSVProcessor:
 
             # Rename columns AFTER projection for clarity
             df.rename(columns={lat_col: 'posX', lon_col: 'posY'}, inplace=True)
+
         else:
             # Ensure columns exist even if projection didn't happen
             if 'posX' not in df.columns: df['posX'] = np.nan
@@ -636,13 +691,34 @@ class ZipCSVProcessor:
 
         return df
 
+    def is_valid_interval(self, df, threshold=1, window_sec=0.1):
+        '''
+        检查区间[start_idx, end_idx]是否满足前后window_sec秒的速度均 < threshold
+        '''
+        # 获取时间范围
+        print("获取时间范围...")
+        start_time = df['simTime'].tolist()[0]
+        end_time = df['simTime'].tolist()[-1]
+
+        # 前5秒数据
+        # print("前5秒数据...")
+        mask_before = (df['simTime'] >= start_time) & \
+                      (df['simTime'] < start_time + window_sec)
+        # 后5秒数据
+        mask_after = (df['simTime'] < end_time) & \
+                     (df['simTime'] >= end_time - window_sec)
+
+        # 判断前后是否均 < threshold
+        return (df.loc[mask_before, 'v'].max() < threshold) and \
+            (df.loc[mask_after, 'v'].max() < threshold)
+
 
 # --- Polynomial Fitting (Largely unchanged, minor cleanup) ---
 class PolynomialCurvatureFitting:
     """Calculates curvature and its derivative using polynomial fitting."""
 
     def __init__(self, lane_map_path: Path, degree: int = 3):
-        self.lane_map_path = lane_map_path
+        self.lane_map_path = Path(lane_map_path)
         self.degree = degree
         self.data = self._load_data()
         if self.data is not None:
@@ -654,6 +730,7 @@ class PolynomialCurvatureFitting:
 
     def _load_data(self) -> Optional[pd.DataFrame]:
         """Loads lane map data safely."""
+
         if not self.lane_map_path.exists() or self.lane_map_path.stat().st_size == 0:
             print(f"Warning: LaneMap file not found or empty: {self.lane_map_path}")
             return None
@@ -1049,6 +1126,38 @@ class FinalDataProcessor:
         self.config = config
         self.output_dir = config.output_dir
 
+    def _axis_to_ENU(self, speedX, speedY, speedZ, accelX, accelY, accelZ, posH, pitch, roll):
+        posh_ENU = posH % 360
+        posh_ENU = posh_ENU * np.pi / 180
+        pitch = pitch * np.pi / 180
+        roll = roll * np.pi / 180
+        east_speedX, north_speedY, north_speedZ = [], [], []
+        east_accelX, north_accelY, north_accelZ = [], [], []
+        for i in range(len(posH)):
+            sy = np.sin(posh_ENU[i])
+            cy = np.cos(posh_ENU[i])
+            cp = np.cos(pitch[i])
+            sp = np.sin(pitch[i])
+            cr = np.cos(roll[i])
+            sr = np.sin(roll[i])
+            trametrix = np.array([[sy * cp, sy * sp * sr - cy * cr, sy * sp * cr + cy * sr],
+                                  [cy * cp, cy * sp * sr + sy * cr, cy * sp * cr - sy * sr], [-sp, cp * sr, cp * cr]])
+            # trametrix = np.array([[sy, cy], [-cy, sy]])
+            east_speedX_i, north_speedY_i, north_speedZ_i = np.linalg.pinv(trametrix) @ np.array(
+                [speedX[i], speedY[i], speedZ[i]])
+            # east_speedX_i, north_speedY_i = np.linalg.pinv(trametrix) @ np.array([speedX[i], speedY[i]])
+            east_accelX_i, north_accelY_i, north_accelZ_i = np.linalg.pinv(trametrix) @ np.array(
+                [accelX[i], accelY[i], accelZ[i]])
+            # east_accelX_i, north_accelY_i = np.linalg.pinv(trametrix) @ np.array([accelX[i], accelY[i]])
+            east_speedX.append(east_speedX_i)
+            north_speedY.append(north_speedY_i)
+            north_speedZ.append(north_speedZ_i)
+            east_accelX.append(east_accelX_i)
+            north_accelY.append(north_accelY_i)
+            north_accelZ.append(north_accelZ_i)
+        return east_speedX, north_speedY, speedZ, east_accelX, north_accelY, accelZ
+        # return east_speedX, north_speedY, east_accelX, north_accelY
+
     def process(self) -> bool:
         """执行最终数据合并和处理步骤。"""
         print("--- Starting Final Data Processing ---")
@@ -1069,6 +1178,33 @@ class FinalDataProcessor:
                 print(f"Successfully created traffic light data file: {traffic_csv_path}")
             # Load and process data
             df_object = pd.read_csv(obj_state_path, dtype={"simTime": float}, low_memory=False)
+            # 坐标转换
+            speedX = df_object['speedX']
+            speedY = df_object['speedY']
+            speedZ = df_object['speedZ']
+            accelX = df_object['accelX']
+            accelY = df_object['accelY']
+            accelZ = df_object['accelZ']
+            posH = df_object['posH']
+            pitch = df_object['pitch']
+            roll = df_object['roll']
+            east_speedX, north_speedY, north_speedZ, east_accelX, north_accelY, north_accelZ = self._axis_to_ENU(speedX,
+                                                                                                                 speedY,
+                                                                                                                 speedZ,
+                                                                                                                 accelX,
+                                                                                                                 accelY,
+                                                                                                                 accelZ,
+                                                                                                                 posH,
+                                                                                                                 pitch,
+                                                                                                                 roll)
+            # east_speedX, north_speedY, east_accelX, north_accelY = self._axis_to_ENU(speedX, speedY, speedZ, accelX, accelY, accelZ, posH, pitch, roll)
+            df_object['speedX'] = east_speedX
+            df_object['speedY'] = north_speedY
+            df_object['speedZ'] = north_speedZ
+            df_object['accelX'] = east_accelX
+            df_object['accelY'] = north_accelY
+            df_object['accelZ'] = north_accelZ
+
             df_ego = df_object[df_object["playerId"] == 1]
             points = df_ego[["posX", "posY"]].values
             window_size = 4
@@ -1089,7 +1225,7 @@ class FinalDataProcessor:
                 print("计算值的长度与 playerId == 1 的行数不匹配!")
             # Process and merge data
             df_merged = self._merge_optional_data(df_object)
-            df_merged[['speedH', 'accelX', 'accelY']] = -df_merged[['speedH', 'accelX', 'accelY']]
+            # df_merged[['speedH', 'accelX']] = -df_merged[['speedH', 'accelX']]
 
             # Save final merged file directly to output directory
             merged_csv_path = self.output_dir / OUTPUT_CSV_MERGED
@@ -1180,13 +1316,14 @@ class FinalDataProcessor:
 
         # --- 合并 EgoMap ---
         egomap_path = self.output_dir / OUTPUT_CSV_EGOMAP
+        merged_egomap_path = self.output_dir / MERGED_CSV_EGOMAP
         if egomap_path.exists() and egomap_path.stat().st_size > 0:
             try:
                 df_ego = pd.read_csv(egomap_path, dtype={"simTime": float})
                 ego_column = ['posX', 'posY', 'posH']
                 ego_new_column = ['posX_map', 'posY_map', 'posH_map']
-                df_ego = df_ego.rename(columns = dict(zip(ego_column, ego_new_column)))
-            # 删除 simFrame 列,因为使用主数据的 simFrame
+                df_ego = df_ego.rename(columns=dict(zip(ego_column, ego_new_column)))
+                # 删除 simFrame 列,因为使用主数据的 simFrame
                 if 'simFrame' in df_ego.columns:
                     df_ego = df_ego.drop(columns=['simFrame'])
 
@@ -1199,13 +1336,19 @@ class FinalDataProcessor:
                 df_merged.sort_values(['simTime', 'playerId'], inplace=True)
 
                 # 使用 merge_asof 进行就近合并,不包括 simFrame
-                df_merged = pd.merge_asof(
+                # df_merged = pd.merge_asof(
+                #     df_merged,
+                #     df_ego,
+                #     on='simTime',
+                #     by='playerId',
+                #     direction='nearest',
+                #     tolerance=0.01  # 10ms tolerance
+                # )
+                df_merged = pd.merge(
                     df_merged,
                     df_ego,
-                    on='simTime',
-                    by='playerId',
-                    direction='nearest',
-                    tolerance=0.01  # 10ms tolerance
+                    how='left',
+                    on='simTime'
                 )
 
                 # 打印合并后的列名
@@ -1219,7 +1362,8 @@ class FinalDataProcessor:
                     else:
                         print("警告: 合并 EgoMap 后找不到 simTime 列!")
 
-                df_merged = df_merged.drop(columns = ['posX_map', 'posY_map', 'posH_map'])
+                df_merged = df_merged.drop(columns=['posX_map', 'posY_map', 'posH_map', 'stateMask'])
+                df_merged.to_csv(merged_egomap_path, index=False, float_format='%.6f')
 
                 print("EgoMap data merged.")
             except Exception as e:
@@ -1232,34 +1376,6 @@ class FinalDataProcessor:
 
         # --- 合并hd_lane.csv,hd_road.csv ---
         current_file_path = os.path.abspath(__file__)
-        # root_lane_csv_path1 = os.path.dirname(current_file_path)
-        # root_lane_csv_path2 = os.path.dirname(root_lane_csv_path1)
-        # root_lane_csv_path3 = os.path.dirname(root_lane_csv_path2)
-        # root_lane_csv_path4 = os.path.dirname(root_lane_csv_path3)
-        # lane_path = os.path.join(root_lane_csv_path4, "_internal")
-        # data_path = os.path.join(lane_path, "data_map")
-        # lane_csv_path = os.path.join(data_path, "hd_lane.csv")
-        # road_csv_path = os.path.join(data_path, "hd_link.csv")
-        # df_lane = pd.read_csv(lane_csv_path)
-        # column_to_read = ['link_type', 'link_coords']
-
-        # df_road = pd.read_csv(road_csv_path, usecols = column_to_read)
-        # df_road["simFrame"] = np.arange(1, len(df_road) + 1, 1)
-        # df_road = df_road.rename(columns={'link_type': 'road_type'})
-
-
-        # df_merged = pd.merge(
-        #     df_merged,
-        #     df_lane,
-        #     on='lane_id',
-        #     how = 'left'
-        # )
-        # df_merged = pd.merge(
-        #     df_merged,
-        #     df_road,
-        #     on='simFrame',
-        #     how = 'left'
-        # )
 
         # --- 合并 Traffic ---
         traffic_path = self.output_dir / "Traffic.csv"
@@ -1311,32 +1427,43 @@ class FinalDataProcessor:
                     7: 'S',  # 南左转
                     8: 'W',  # 西左转
                     9: 'N',  # 北左转
-                    10: 'E', # 东左转
-                    11: 'N', # 北行人
-                    12: 'E', # 东行人
-                    13: 'S', # 南右转
-                    14: 'W', # 西右转
-                    15: 'N', # 北右转
+                    10: 'E',  # 东左转
+                    11: 'N',  # 北行人
+                    12: 'E',  # 东行人
+                    13: 'S',  # 南右转
+                    14: 'W',  # 西右转
+                    15: 'N',  # 北右转
                     16: 'E'  # 东右转
                 }
 
                 # 创建 trafficlight_id 到方向的映射
                 trafficlight_to_direction = {
                     # 南向北方向的红绿灯
-                    48100017: 'S',
-                    48100038: 'S',
-                    48100043: 'S',
-                    48100030: 'S',
+                    # 48100017: 'S',
+                    # 48100038: 'S',
+                    # 48100043: 'S',
+                    # 48100030: 'S',
+                    48100017: 'N',
+                    48100038: 'N',
+                    48100043: 'N',
+                    48100030: 'N',
                     # 西向东方向的红绿灯
-                    48100021: 'W',
-                    48100039: 'W',
+                    # 48100021: 'W',
+                    # 48100039: 'W',
+                    48100021: 'E',
+                    48100039: 'E',
                     # 东向西方向的红绿灯
-                    48100041: 'E',
-                    48100019: 'E',
+                    # 48100041: 'E',
+                    # 48100019: 'E',
+                    48100041: 'W',
+                    48100019: 'W',
                     # 北向南方向的红绿灯
-                    48100033: 'N',
-                    48100018: 'N',
-                    48100022: 'N'
+                    # 48100033: 'N',
+                    # 48100018: 'N',
+                    # 48100022: 'N'
+                    48100033: 'S',
+                    48100018: 'S',
+                    48100022: 'S'
                 }
 
                 # 添加时间列用于合并
@@ -1354,12 +1481,23 @@ class FinalDataProcessor:
                         return df_merged
 
                 df_merged['time'] = df_merged['simTime'].round(2).astype(float)
+                tree = cKDTree(df_traffic[['simTime']])
 
+                # 查询df1中每个时间戳的最近邻
+                distances, indices = tree.query(df_merged[['simTime']], k=1)
+                # df_merged['time1'] = df_merged['simTime'].round(0).astype(float)
+                # df_traffic1 = df_traffic.rename(columns={'simTime' 'simTime1'})
+                # df_traffic['time1'] = df_traffic['time'].round(0).astype(float)
                 # 合并 Traffic 数据
-                df_merged = pd.merge(df_merged, df_traffic, on=["time"], how="left")
+                df_merged['matched_time'] = df_traffic.iloc[indices.flatten()]['simTime'].values
 
+                # 合并DataFrame
+                df_merged = pd.merge(df_merged, df_traffic, left_on='matched_time', right_on='simTime', how='left')
+                # df_merged = pd.merge(df_merged, df_traffic, on=["time1"], how="left")
+                # df_merged = df_merged.drop(columns = ['time1'])
                 # 再次处理可能的列名重复问题
                 df_merged = clean_duplicate_columns(df_merged)
+                df_merged = df_merged.drop(columns=['time_x', 'time_y', 'matched_time'])
 
                 # 检查trafficlight_id列是否存在
                 trafficlight_col = 'trafficlight_id'
@@ -1386,11 +1524,14 @@ class FinalDataProcessor:
                     if phase_direction == row['vehicle_direction']:
                         # 查找该方向的所有红绿灯 ID
                         relevant_ids = [tid for tid, direction in trafficlight_to_direction.items()
-                                       if direction == phase_direction]
+                                        if direction == phase_direction]
 
                         # 如果 trafficlight_id 在 EgoMap 中且方向匹配
-                        if trafficlight_col in row and not pd.isna(row[trafficlight_col]) and row[trafficlight_col] in relevant_ids:
-                            return row[trafficlight_col]
+                        # if trafficlight_col in row and not pd.isna(row[trafficlight_col]) and row[trafficlight_col] in relevant_ids:
+                        if trafficlight_col in row:
+                            if not pd.isna(row[trafficlight_col]):
+                                if row[trafficlight_col] in relevant_ids:
+                                    return row[trafficlight_col]
 
                     return np.nan
 
@@ -1398,9 +1539,9 @@ class FinalDataProcessor:
                 df_merged['filtered_trafficlight_id'] = df_merged.apply(filter_relevant_traffic_light, axis=1)
 
                 # 清理临时列
-                print(f"删除 time 列前 df_merged 的列: {df_merged.columns.tolist()}")
-                df_merged.drop(columns=['time'], inplace=True)
-                print(f"删除 time 列后 df_merged 的列: {df_merged.columns.tolist()}")
+                # print(f"删除 time 列前 df_merged 的列: {df_merged.columns.tolist()}")
+                # df_merged.drop(columns=['time'], inplace=True)
+                # print(f"删除 time 列后 df_merged 的列: {df_merged.columns.tolist()}")
 
                 # 确保 simTime 列存在
                 if 'simTime' not in df_merged.columns:
@@ -1545,7 +1686,6 @@ class FinalDataProcessor:
 
         return df_merged
 
-
     def _process_trafficlight_data(self) -> pd.DataFrame:
         """Processes traffic light JSON data if available."""
         # Check if json_path is provided and exists
@@ -1640,6 +1780,7 @@ class FinalDataProcessor:
             print(f"Unexpected error processing traffic light data: {e}")
             return pd.DataFrame()
 
+
 # --- Rosbag Processing ---
 class RosbagProcessor:
     """Extracts data from HMIdata files within a ZIP archive."""
@@ -1652,7 +1793,6 @@ class RosbagProcessor:
         """Finds, extracts, and processes rosbags from the ZIP file."""
         print(f"--- Processing HMIdata in {self.config.zip_path} ---")
 
-
         with tempfile.TemporaryDirectory() as tmp_dir_str:
             try:
                 with zipfile.ZipFile(self.config.zip_path, 'r') as zip_ref:
@@ -1712,7 +1852,7 @@ def run_cpp_engine(config: Config):
             check=True,  # Raise exception on non-zero exit code
             capture_output=True,  # Capture stdout/stderr
             text=True,  # Decode output as text
-            cwd=config.engine_path.parent  # Run from the engine's directory? Or script's? Adjust if needed.
+            # cwd=config.engine_path.parent  # Run from the engine's directory? Or script's? Adjust if needed.
         )
         print("C++ Engine Output:")
         print(result.stdout)
@@ -1737,4 +1877,4 @@ def run_cpp_engine(config: Config):
 
 
 if __name__ == "__main__":
-    pass
+    pass

+ 149 - 144
core/processors/built_in/pgvil.py

@@ -7,8 +7,8 @@ from dataclasses import dataclass, field
 from typing import Dict, Optional
 from pathlib import Path
 import pandas as pd
-from core.error_handler import ErrorHandler
-from core.config_manager import get_config
+# from core.error_handler import ErrorHandler
+# from core.config_manager import get_config
 import sys
 import csv
 import os
@@ -65,7 +65,7 @@ def run_pgvil_engine(config: Config):
             check=True,  # Raise exception on non-zero exit code
             capture_output=True,  # Capture stdout/stderr
             text=True,  # Decode output as text
-            cwd=config.engine_path.parent  # Run from the engine's directory? Or script's? Adjust if needed.
+            # cwd=config.engine_path.parent  # Run from the engine's directory? Or script's? Adjust if needed.
         )
         print("C++ Engine Output:")
         print(result.stdout)
@@ -91,21 +91,40 @@ def run_pgvil_engine(config: Config):
 
 def remove_conflicting_columns(df_object, df_csv_info):
     """
-    delete the columns that are in both dataframes and are not simTime, simFrame, or playerId
+    找到连个表中除(simTime, simFrame, or playerId) 都存在的列,删掉df_csv_info中对应的重复列
     """
+    renamed = {}
     conflicting_columns = set(df_object.columns) & set(df_csv_info.columns)
     for col in conflicting_columns:
-        if col not in ["simTime", "simFrame", "playerId"]:
+        # if col not in ["simTime", "simFrame", "playerId"]:
+        if col not in ["simFrame", "playerId"]:
             del df_csv_info[col]
+
     return df_csv_info
 
 
 def align_simtime_by_simframe(df):
     # 创建一个映射,将simFrame映射到其对应的simTime代表值
     sim_frame_to_time_map = df.groupby('simFrame')['simTime'].first().to_dict()
+    frames_sorted = sorted(sim_frame_to_time_map.keys())
+    times_sorted = [sim_frame_to_time_map[f] for f in frames_sorted]
+    times_head = times_sorted[:100]
+    if len(times_head) > 2:
+
+        diffs = np.diff(times_head)
+
+        diffs_rounded = np.round(diffs, 3)
+        values, counts = np.unique(diffs_rounded, return_counts=True)
+        mode_dt = values[np.argmax(counts)]
+        new_frame_to_time_map = {
+            frame: round(times_sorted[0] + mode_dt * i, 3)
+            for i, frame in enumerate(frames_sorted)
+        }
+    else:
+        new_frame_to_time_map = sim_frame_to_time_map
 
     # 使用映射来更新DataFrame中的simTime值
-    df['simTime'] = df['simFrame'].map(sim_frame_to_time_map)
+    df['simTime'] = df['simFrame'].map(new_frame_to_time_map)
     # 检查simFrame列是否为空或包含非整数类型的数据
     if df['simFrame'].empty or not df['simFrame'].apply(lambda x: isinstance(x, (int, np.integer))).all():
         return df
@@ -131,7 +150,7 @@ def align_simtime_by_simframe(df):
                     new_row[col] = (prev_row[col] + next_row[col]) / 2
 
             # 更新simTime值
-            new_row['simTime'] = sim_frame_to_time_map.get(missing_frame, np.nan)
+            new_row['simTime'] = new_frame_to_time_map.get(missing_frame, np.nan)
 
             # 将新行添加到DataFrame中
             new_rows.append(new_row)
@@ -180,6 +199,48 @@ def convert_heading(posH_rad):
     return round(heading_deg, 3)
 
 
+# def find_closest_time(sim_time, sim_time_to_index, tolerance=0.01):
+#     # 找到最接近的时间点,并且该时间点的差异小于 tolerance
+#     closest_time = min(sim_time_to_index.keys(), key=lambda y: abs(y - sim_time) if abs(y - sim_time) < tolerance else float('inf'))
+#     return closest_time
+
+def find_closest_time(sim_time, sim_time_to_index, tolerance=0.04):
+    # 计算所有 simTime 的差值
+    diffs = {k: abs(k - sim_time) for k in sim_time_to_index.keys()}
+
+    # Step 1: 优先在容差范围内找
+    within_tolerance = {k: v for k, v in diffs.items() if v <= tolerance}
+    if within_tolerance:
+        return min(within_tolerance, key=within_tolerance.get)
+
+    # Step 2: 容忍失败,强制返回最近值
+    return min(diffs, key=diffs.get)
+
+
+def convert_numeric_columns(df):
+    numeric_cols = df.select_dtypes(include=['number']).columns
+    # 强制保留为 int 类型的列,其余为float
+    int_columns = ["simFrame", "playerId", "type", "stateMask", "ctrlId", "ifwarning"]
+    for col in numeric_cols:
+        if col in int_columns and col in df.columns:
+            df[col] = df[col].astype(int)
+        else:
+            df[col] = df[col].astype(float)
+    return df
+
+
+def safe_convert_numeric(df, name):
+    if df is None or df.empty:
+        return df
+    return convert_numeric_columns(df)
+
+
+def safe_align_simtime(df, name):
+    if df is None or df.empty:
+        return df
+    return align_simtime_by_simframe(df)
+
+
 class PGVILProcessor:
     """PGVIL数据处理器,实现PGVIL特有的处理逻辑"""
 
@@ -218,8 +279,6 @@ class PGVILProcessor:
         x_offset = self.config.x_offset
         y_offset = self.config.y_offset
         data_path = self.config.output_dir
-        # X_OFFSET = 258109.4239876
-        # Y_OFFSET = 4149969.964821
 
         # 定义CSV文件路径
         try:
@@ -233,7 +292,7 @@ class PGVILProcessor:
             function_path = os.path.join(data_path, "Function.csv")
         except FileNotFoundError:
             raise Exception("File not found")
-
+        print("777777:")
         df_object = read_csv_with_filled_columns(obj_state_path)
         df_map_info = read_csv_with_filled_columns(ego_map_path)
         df_lane_map = read_csv_with_filled_columns(lane_map_path)
@@ -245,66 +304,83 @@ class PGVILProcessor:
         if os.path.exists(function_path):
             df_function = read_csv_with_filled_columns(function_path)
 
-        # 检查并转换数值型列
-        def convert_numeric_columns(df):
-            numeric_cols = df.select_dtypes(include=['number']).columns
-            df[numeric_cols] = df[numeric_cols].astype(float)
-            return df
-
-        df_object = convert_numeric_columns(df_object)
-        df_map_info = convert_numeric_columns(df_map_info)
-        df_lane_map = convert_numeric_columns(df_lane_map)
-        df_laneINfo = convert_numeric_columns(df_laneINfo)
-        df_roadPos = convert_numeric_columns(df_roadPos)
-        df_vehicleystems = convert_numeric_columns(df_vehicleystems)
-        df_trafficlight = convert_numeric_columns(df_trafficlight)
-        if df_function is not None:
-            df_function = convert_numeric_columns(df_function)
-
         # 对df_object中的posX和posY应用偏置
-        df_object['posX'] += x_offset
-        df_object['posY'] += y_offset
+        if df_object is not None and not df_object.empty:
+            df_object['posX'] += x_offset
+            df_object['posY'] += y_offset
 
         # 对齐simTime和simFrame
-        df_object = align_simtime_by_simframe(df_object)
-        df_map_info = align_simtime_by_simframe(df_map_info)
-        df_lane_map = align_simtime_by_simframe(df_lane_map)
-        df_laneINfo = align_simtime_by_simframe(df_laneINfo)
-        df_roadPos = align_simtime_by_simframe(df_roadPos)
-        df_vehicleystems = align_simtime_by_simframe(df_vehicleystems)
-        df_trafficlight = align_simtime_by_simframe(df_trafficlight)
-
-        del_ego_map = remove_conflicting_columns(df_object, df_map_info)  # 去掉重复的列
-        # 合并数据
-        merged_df = pd.merge(df_object, del_ego_map, on=["simTime", "simFrame", "playerId"], how="left")
-
-        # 使用simTime, simFrame, playerId合并ObjState和LaneMap\trafficlight\trafficlight
-        del_lane_map = remove_conflicting_columns(merged_df, df_lane_map)
-        merged_df = pd.merge(merged_df, del_lane_map, on=["simTime", "simFrame", "playerId"],
-                             how="left").drop_duplicates()
-        del_laneINfo = remove_conflicting_columns(merged_df, df_laneINfo)
-        merged_df = pd.merge(merged_df, del_laneINfo, on=["simTime", "simFrame", "playerId"],
-                             how="left").drop_duplicates()
-        del_roadPos = remove_conflicting_columns(merged_df, df_roadPos)
-        merged_df = pd.merge(merged_df, del_roadPos, on=["simTime", "simFrame", "playerId"],
-                             how="left").drop_duplicates()
-        del_trafficlight = remove_conflicting_columns(merged_df, df_trafficlight)
-        merged_df = pd.merge(merged_df, del_trafficlight, on=["simTime", "simFrame"], how="left").drop_duplicates()
-        del_vehicleystems = remove_conflicting_columns(merged_df, df_vehicleystems)
-        merged_df = pd.merge(merged_df, del_vehicleystems, on=["simTime", "simFrame", "playerId"],
-                             how="left").drop_duplicates()
-
-        tolerance = 0.01
-
-        def find_closest_time(sim_time, sim_time_to_index, tolerance=0.01):
-            # 找到最接近的时间点,并且该时间点的差异小于 tolerance
-            closest_time = min(sim_time_to_index.keys(),
-                               key=lambda y: abs(y - sim_time) if abs(y - sim_time) < tolerance else float('inf'))
-            return closest_time
+        df_object = safe_align_simtime(df_object, "df_object")
+        df_map_info = safe_align_simtime(df_map_info, "df_map_info")
+        df_lane_map = safe_align_simtime(df_lane_map, "df_lane_map")
+        df_laneINfo = safe_align_simtime(df_laneINfo, "df_laneINfo")
+        df_roadPos = safe_align_simtime(df_roadPos, "df_roadPos")
+        df_vehicleystems = safe_align_simtime(df_vehicleystems, "df_vehicleystems")
+        df_trafficlight = safe_align_simtime(df_trafficlight, "df_trafficlight")
+        print("0000000<<<<<<<<<<<<<<<<<<<<<")
+
+        df_object = safe_convert_numeric(df_object, "df_object")
+        df_map_info = safe_convert_numeric(df_map_info, "df_map_info")
+        df_lane_map = safe_convert_numeric(df_lane_map, "df_lane_map")
+        df_laneINfo = safe_convert_numeric(df_laneINfo, "df_laneINfo")
+        df_roadPos = safe_convert_numeric(df_roadPos, "df_roadPos")
+        df_vehicleystems = safe_convert_numeric(df_vehicleystems, "df_vehicleystems")
+        df_trafficlight = safe_convert_numeric(df_trafficlight, "df_trafficlight")
+        print("1111111<<<<<<<<<<<<<<<<<<<<<")
+        if df_function is not None:
+            df_function = safe_convert_numeric(df_function, "df_function")
+
+        # 使用simTime, simFrame, playerId合并ObjState和df_roadPos
+        del_roadPos = remove_conflicting_columns(df_object, df_roadPos)
+        if df_object is not None and not df_object.empty and df_roadPos is not None and not df_roadPos.empty:
+            merged_df = df_object.merge(df_roadPos, on=["simFrame", "playerId"], how="inner")
+        # merged_df = pd.merge(df_object, del_roadPos, on=["simTime", "simFrame", "playerId"], how="left").drop_duplicates()
 
         # 创建一个映射,存储 df_object 中每个 simTime 值及其对应的行索引
         sim_time_to_index = {row['simTime']: idx for idx, row in merged_df.iterrows()}
-        if df_function is not None:
+        ego_df = merged_df[merged_df["playerId"] == 1].copy()  # 拆成ego和other
+        other_df = merged_df[merged_df["playerId"] != 1].copy()
+        print("444444<<<<<<<<<<<<<<<<<<<<<")
+        # ego merge del_trafficlight
+        if df_trafficlight is not None and not df_trafficlight.empty:
+            df_trafficlight = df_trafficlight[df_trafficlight["ctrlId"] == 3][
+                ["simTime", "simFrame", "stateMask", "ctrlId"]].copy()
+            df_trafficlight = df_trafficlight.drop_duplicates(subset=["simTime", "simFrame", "ctrlId"]).reset_index(
+                drop=True)
+            if df_trafficlight.empty:
+                ego_df["stateMask"] = np.nan
+                ego_df["ctrlId"] = np.nan
+            else:
+                ego_df = pd.merge(ego_df, df_trafficlight, on=["simTime", "simFrame"], how="left")
+        else:
+            ego_df["stateMask"] = np.nan
+            ego_df["ctrlId"] = np.nan
+        merged_df = pd.concat([ego_df, other_df], ignore_index=True)
+
+        print("33333333<<<<<<<<<<<<<<<<<<<<<")
+        if df_laneINfo is not None and not df_laneINfo.empty:
+            del_laneINfo = remove_conflicting_columns(merged_df, df_laneINfo)
+            # merged_df = pd.merge(merged_df, del_laneINfo, on=["simTime", "simFrame", "playerId"], how="left").drop_duplicates()
+            merged_df = pd.merge(merged_df, del_laneINfo, on=["simFrame", "playerId"], how="left").drop_duplicates()
+
+        if df_map_info is not None and not df_map_info.empty:
+            del_ego_map = remove_conflicting_columns(merged_df, df_map_info)
+            # merged_df = pd.merge(merged_df, del_ego_map, on=["simTime", "simFrame", "playerId"], how="left")
+            merged_df = pd.merge(merged_df, del_ego_map, on=["simFrame", "playerId"], how="left")
+
+        if df_lane_map is not None and not df_lane_map.empty:
+            del_lane_map = remove_conflicting_columns(merged_df, df_lane_map)
+            # merged_df = pd.merge(merged_df, del_lane_map, on=["simTime", "simFrame", "playerId"], how="left").drop_duplicates()
+            merged_df = pd.merge(merged_df, del_lane_map, on=["simFrame", "playerId"], how="left").drop_duplicates()
+
+        if df_vehicleystems is not None and not df_vehicleystems.empty:
+            del_vehicleystems = remove_conflicting_columns(merged_df, df_vehicleystems)
+            # merged_df = pd.merge(merged_df, del_vehicleystems, on=["simTime", "simFrame", "playerId"], how="left").drop_duplicates()
+            merged_df = pd.merge(merged_df, del_vehicleystems, on=["simFrame", "playerId"],
+                                 how="left").drop_duplicates()
+
+        if df_function is not None and not df_function.empty:
+            tolerance = 0.01
             df_function = df_function.sort_values(by='simTime').reset_index(drop=True)  # 按simTime列排序
             # 找到 function.csv 中每个 simTime 值在 df_object 中的最近时间点
             df_function['nearest_simTime'] = df_function['simTime'].apply(
@@ -349,12 +425,18 @@ class PGVILProcessor:
         last_row_nearest_simtime = df_function.iloc[-1]['nearest_simTime']
         print(f"最后一行的 nearest_simTime: {last_row_nearest_simtime}")
         """
-        # 将弧度转换为角度
-        columns_to_convert = ['posH', 'speedH', 'accelH']
+
+        columns_to_convert = ['posH']
         for col in columns_to_convert:
             if col in merged_df.columns:
                 merged_df[col] = merged_df[col].apply(convert_heading)
 
+        # 将弧度/秒转换为度/秒
+        rad_to_deg = 180 / np.pi
+        for col in ['speedH', 'accelH']:
+            if col in merged_df.columns:
+                merged_df[col] = merged_df[col] * rad_to_deg
+
         if 'posP' in merged_df.columns:
             merged_df.rename(columns={'posP': 'pitch_rate'}, inplace=True)
             merged_df['pitch_rate'] = merged_df['pitch_rate'].apply(convert_heading)
@@ -365,6 +447,7 @@ class PGVILProcessor:
         # 先使用 infer_objects 来确保类型一致
         merged_df = merged_df.infer_objects()
         merged_df.fillna(np.nan, inplace=True)  # 确保空值填充为 NaN
+        merged_df = merged_df.sort_values(by=["simTime", "simFrame", "playerId"]).reset_index(drop=True)
         merged_csv_path = Path(data_path) / "merged_ObjState.csv"
 
         # merged_df.to_csv(merged_csv_path, index=False,na_rep="NaN")
@@ -372,81 +455,3 @@ class PGVILProcessor:
 
         return merged_csv_path
 
-    # @ErrorHandler.measure_performance
-    # def process_built_in_data(self) -> Dict[str, Path]:
-    #     """实现PGVIL特有的内置数据处理逻辑
-
-    #     处理顺序:
-    #     1. 处理CAN数据
-    #     2. 处理传感器数据
-    #     3. 处理其他PGVIL特有数据
-    #     4. 合并内置数据
-
-    #     Returns:
-    #         处理结果文件路径字典
-    #     """
-    #     result_files = {}
-
-    #     # 1. 处理CAN数据
-    #     print("1. 处理CAN数据...")
-    #     can_results = self._process_can_data()
-    #     if can_results:
-    #         result_files.update(can_results)
-    #     else:
-    #         print("警告: CAN数据处理失败或无数据")
-
-    #     # 2. 处理传感器数据
-    #     print("\n2. 处理传感器数据...")
-    #     sensor_results = self._process_sensor_data()
-    #     if sensor_results:
-    #         result_files.update(sensor_results)
-    #     else:
-    #         print("警告: 传感器数据处理失败或无数据")
-
-    #     # 3. 处理其他PGVIL特有数据
-    #     print("\n3. 处理其他PGVIL数据...")
-    #     other_results = self._process_other_data()
-    #     if other_results:
-    #         result_files.update(other_results)
-
-    #     # 4. 合并内置数据
-    #     print("\n4. 合并内置数据...")
-    #     if not self._merge_built_in_data(result_files):
-    #         print("警告: 内置数据合并失败")
-
-    #     return result_files
-
-    # def _process_can_data(self) -> Dict[str, Path]:
-    #     """处理CAN数据"""
-    #     # TODO: 实现CAN数据处理逻辑
-    #     return {}
-
-    # def _process_sensor_data(self) -> Dict[str, Path]:
-    #     """处理传感器数据"""
-    #     # TODO: 实现传感器数据处理逻辑
-    #     return {}
-
-    # def _process_other_data(self) -> Dict[str, Path]:
-    #     """处理其他PGVIL特有数据"""
-    #     # TODO: 实现其他数据处理逻辑
-    #     return {}
-
-    # def _merge_built_in_data(self, result_files: Dict[str, Path]) -> bool:
-    #     """合并PGVIL内置数据
-
-    #     Args:
-    #         result_files: 处理结果文件路径字典
-
-    #     Returns:
-    #         合并是否成功
-    #     """
-    #     try:
-    #         # 实现PGVIL特有的数据合并逻辑
-    #         return True
-    #     except Exception as e:
-    #         print(f"内置数据合并失败: {e}")
-    #         return False
-
-
-
-

+ 158 - 6
run.py

@@ -27,7 +27,7 @@ def parse_arguments():
         '--data-type',
         type=str,
         choices=['lst', 'pgvil'],
-        default='lst',
+        default='pgvil',
         help='要处理的数据类型 (lst 或 pgvil)'
     )
 
@@ -35,21 +35,170 @@ def parse_arguments():
     parser.add_argument(
         '--zip-path',
         type=Path,
-        default=Path('/home/kevin/kevin/zhaoyuan/sqlite3_demo/docker_build/preprocess_run/data/V2I_CSAE53-2020_RedLightViolationW_LST_01-01.zip'),
+        # nargs = '+',
+        # default=Path('/home/AD_CSAE285-2022_PedestrianAndTwo-wheelerRecognitionAndResponse_LST_010.zip'),
+        # default=Path('/home/AD_CSAE285-2022_PedestrianAndTwo-wheelerRecognitionAndResponse_LST_01.zip'),
+        # default=Path('/home/AD_CSAE285-2022_PedestrianAndTwo-wheelerRecognitionAndResponse_LST_02.zip'),
+        # default=Path('/home/TSRAR-09_2025-07-14_11-12-48.zip'),
+        # default=Path('/home/BEI_2025-07-15_17-52-09.zip'),
+        # default=Path('/home/DONG_2025-07-15_17-48-21.zip'),
+        # default=Path('/home/DONGCAN_2025-07-17_11-37-45.zip'),
+        # default=Path('/home/BEICAN_2025-07-17_11-33-48.zip'),
+        # default=Path('/home/AD_GBT41798-2022_VehiclesIdentifR_PGVL_03.zip'),
+        # default=Path('/home/YAWCAN_2025-07-17_11-40-03.zip'),
+        # default=Path('/home/YAWCANUFO_2025-07-17_16-39-37.zip'),
+        # default=Path('/home/DONGCANUFO_2025-07-17_16-35-49.zip'),
+        # default=Path('/home/BEICANUFO_2025-07-17_16-30-57.zip'),
+        # default=Path('/home/all_data_TrafficLightRecognitionAndResponse/AD_CSAE285-2022_TrafficLightRecognitionAndResponse_LST_01.zip'),
+        # default=Path('/home/AD_GBT41798-2022_TrafficSignalRecognitionAndResponse_LST_03.zip'),
+        # default=Path('/home/V2I_CSAE53-2020_LeftTurnAssist_PGVIL_ini.zip'),
+        # default = Path('/home/三轮预处理数据/AD_GBT41798-2022_RoadTraInfraObstRR_LST_14.zip'),
+        # default = Path('/home/三轮预处理数据/AD_GBT41798-2022_RoadTraInfraObstRR_LST_15.zip'),
+        # default = Path('/home/三轮预处理数据/AD_GBT41798-2022_TrafficSignalRR_LST_04.zip'),
+        # default = Path('/home/三轮预处理数据/AD_GBT41798-2022_TrafficSignalRR_LST_05.zip'),
+        # default = Path('/home/三轮预处理数据/AD_GBT41798-2022_TrafficSignalRR_LST_06.zip'),
+        # default = Path('/home/三轮预处理数据/AD_GBT41798-2022_TrafficSignalRR_LST_07.zip'),
+        # default = Path('/home/三轮预处理数据/AD_GBT41798-2022_TrafficSignalRR_LST_08.zip'),
+        # default = Path('/home/三轮预处理数据/AD_GBT41798-2022_TrafficSignalRR_LST_09.zip'),
+        # default = Path('/home/三轮预处理数据/V2C_ReferenceArchitecture1.0_LitterW_LST_01.zip'),
+        # default = Path('/home/三轮预处理数据/V2C_ReferenceArchitecture1.0_LitterW_LST_02.zip'),
+        # default = Path('/home/三轮预处理数据/V2I_CSAE53-2020_HazardousLocationW_LST_01.zip'),
+        # default = Path('/home/三轮预处理数据/V2I_CSAE53-2020_HazardousLocationW_LST_02.zip'),
+        # default = Path('/home/三轮预处理数据/V2I_CSAE53-2020_HazardousLocationW_LST_03.zip'),
+        # 25-07-26
+        # default = Path('/home/三轮预处理数据/AD_GBT41798-2022_LeastRiskStrateg_LST_02.zip'),
+        # default = Path('/home/三轮预处理数据/AD_GBT41798-2022_LeastRiskStrateg_LST_03.zip'),
+        # default = Path('/home/三轮预处理数据/AD_GBT41798-2022_RoadTraInfraObstRR_LST_01.zip'),
+        # default = Path('/home/三轮预处理数据/AD_GBT41798-2022_RoadTraInfraObstRR_LST_02.zip'),
+        # default = Path('/home/三轮预处理数据/V2I_CSAE53-2020_LeftTurnAssist_LST_01.zip'),
+        # default = Path('/home/三轮预处理数据/V2I_CSAE53-2020_LeftTurnAssist_LST_02.zip'),
+        # default = Path('/home/三轮预处理数据/V2V_CSAE53-2020_LeftTurnAssist_LST_03.zip'),
+        # default = Path('/home/三轮预处理数据/V2V_CSAE53-2020_LeftTurnAssist_LST_04.zip'),
+        # 25-07-27
+        # default = Path('/home/三轮预处理数据/双车/AD_GBT41798-2022_RoadTraInfraObstRR_LST_08.zip'),
+        # default = Path('/home/三轮预处理数据/双车/AD_GBT41798-2022_RoadTraInfraObstRR_LST_09.zip'),
+        # default = Path('/home/三轮预处理数据/双车/AD_GBT41798-2022_RoadTraInfraObstRR_LST_010.zip'),
+        # default = Path('/home/三轮预处理数据/双车/AD_GBT41798-2022_RoadTraInfraObstRR_LST_011.zip'),
+        # default = Path('/home/三轮预处理数据/双车/AD_GBT41798-2022_RoadTraInfraObstRR_LST_012.zip'),
+        # default = Path('/home/三轮预处理数据/双车/AD_GBT41798-2022_RoadTraInfraObstRR_LST_013.zip'),
+        # default = Path('/home/三轮预处理数据/双车/V2C_ReferenceArchitecture1.0_OtherVehicleRedLightViolationW_LST_01.zip'),
+        # default = Path('/home/三轮预处理数据/双车/V2C_ReferenceArchitecture1.0_OtherVehicleRedLightViolationW_LST_02.zip'),
+        # default = Path('/home/三轮预处理数据/双车/V2I_CSAE157-2020_CooperativeIntersectionPassing_LST_02.zip'),
+        # default = Path('/home/三轮预处理数据/双车/V2V_CSAE53-2020_EmergencyBrakeW_LST_01.zip'),
+        # default = Path('/home/三轮预处理数据/双车/V2V_CSAE53-2020_EmergencyBrakeW_LST_02.zip'),
+        # default = Path('/home/三轮预处理数据/双车/V2V_CSAE53-2020_ForwardCollision_LST_01.zip'),
+        # default = Path('/home/三轮预处理数据/双车/V2V_CSAE53-2020_ForwardCollision_LST_02.zip'),
+        # default = Path('/home/三轮预处理数据/双车/V2V_CSAE53-2020_ForwardCollision_LST_03.zip'),
+        # default = Path('/home/三轮预处理数据/双车/V2V_CSAE53-2020_ForwardCollision_LST_04.zip'),
+        # default = Path('/home/三轮预处理数据/双车/V2V_CSAE53-2020_ForwardCollision_LST_05.zip'),
+        # default = Path('/home/三轮预处理数据/双车/V2V_CSAE53-2020_ForwardCollision_LST_06.zip'),
+        # default = Path('/home/三轮预处理数据/双车/V2V_CSAE53-2020_ForwardCollision_LST_07.zip'),
+        # default = Path('/home/三轮预处理数据/双车/V2V_CSAE53-2020_ForwardCollision_LST_08.zip'),
+        # default = Path('/home/三轮预处理数据/双车/V2V_CSAE53-2020_ForwardCollision_LST_09.zip'),
+        # default = Path('/home/三轮预处理数据/双车/V2V_CSAE53-2020_ForwardCollision_LST_016.zip'),
+        # default = Path('/home/三轮预处理数据/双车/V2V_CSAE53-2020_ForwardCollision_LST_017.zip'),
+        # default = Path('/home/三轮预处理数据/双车/V2V_CSAE53-2020_ForwardCollision_LST_018.zip'),
+        # default = Path('/home/三轮预处理数据/双车/V2V_CSAE53-2020_ForwardCollision_LST_019.zip'),
+        # default = Path('/home/三轮预处理数据/双车/V2V_CSAE53-2020_ForwardCollision_LST_020.zip'),
+        # default = Path('/home/三轮预处理数据/双车/V2V_CSAE53-2020_ForwardCollision_LST_021.zip'),
+        # default = Path('/home/三轮预处理数据/双车/V2V_CSAE53-2020_ForwardCollision_LST_022.zip'),
+        # default = Path('/home/三轮预处理数据/双车/V2V_CSAE53-2020_ForwardCollision_LST_023.zip'),
+        # default = Path('/home/三轮预处理数据/双车/V2V_CSAE53-2020_ForwardCollision_LST_024.zip'),
+        # default = Path('/home/三轮预处理数据/双车/V2V_CSAE53-2020_ForwardCollision_LST_025.zip'),
+        # default = Path('/home/三轮预处理数据/双车/V2V_CSAE53-2020_ForwardCollision_LST_026.zip'),
+        # default = Path('/home/三轮预处理数据/双车/V2V_CSAE53-2020_ForwardCollision_LST_027.zip'),
+        # 25-07-29
+        # default = Path('/home/三轮预处理数据/25-07-29/AD_GBT41798-2022_AutoEmergencyEvacuation_LST_06.zip'),
+        # default = Path('/home/三轮预处理数据/25-07-29/AD_GBT41798-2022_RoadTraInfraObstRR_LST_16.zip'),
+        # default = Path('/home/三轮预处理数据/25-07-29/AD_GBT41798-2022_RoadTraInfraObstRR_LST_17.zip'),
+        # default = Path('/home/三轮预处理数据/25-07-29/AD_GBT41798-2022_VehiclesIdentifyRR_LST_03.zip'),
+        # default = Path('/home/三轮预处理数据/25-07-29/AD_GBT41798-2022_VehiclesIdentifyRR_LST_04.zip'),
+        # default = Path('/home/三轮预处理数据/25-07-29/AD_GBT41798-2022_VehiclesIdentifyRR_LST_05.zip'),
+        # default = Path('/home/三轮预处理数据/25-07-29/AD_GBT41798-2022_VehiclesIdentifyRR_LST_06.zip'),
+        # default = Path('/home/三轮预处理数据/25-07-29/AD_GBT41798-2022_VehiclesIdentifyRR_LST_07.zip'),
+        # default = Path('/home/三轮预处理数据/25-07-29/AD_GBT41798-2022_VehiclesIdentifyRR_LST_08.zip'),
+        # default = Path('/home/三轮预处理数据/25-07-29/AD_GBT41798-2022_VehiclesIdentifyRR_LST_09.zip'),
+        # default = Path('/home/三轮预处理数据/25-07-29/AD_GBT41798-2022_VehiclesIdentifyRR_LST_010.zip'),
+        # default = Path('/home/三轮预处理数据/25-07-29/V2C_ReferenceArchitecture1.0_AbnormalVehicleW_LST_01.zip'),
+        # default = Path('/home/三轮预处理数据/25-07-29/V2C_ReferenceArchitecture1.0_AbnormalVehicleW_LST_02.zip'),
+        # default = Path('/home/三轮预处理数据/25-07-29/V2C_ReferenceArchitecture1.0_AbnormalVehicleW_LST_03.zip'),
+        # default = Path('/home/三轮预处理数据/25-07-29/V2C_ReferenceArchitecture1.0_RampMerge_LST_01.zip'),
+        # default = Path('/home/三轮预处理数据/25-07-29/V2C_ReferenceArchitecture1.0_RampMerge_LST_02.zip'),
+        # 25-07-31
+        # default = Path('/home/三轮预处理数据/25-7-31/AD_GBT41798-2022_ParkingT_01.zip'),
+        # default = Path('/home/三轮预处理数据/25-7-31/AD_GBT41798-2022_ParkingT_02.zip'),
+        # default = Path('/home/三轮预处理数据/25-7-31/AD_GBT41798-2022_RoadTraInfraObstRR_LST_05.zip'),
+        # default = Path('/home/三轮预处理数据/25-7-31/AD_GBT41798-2022_RoadTraInfraObstRR_LST_06.zip'),
+        # default = Path('/home/三轮预处理数据/25-7-31/V2C_ReferenceArchitecture1.0_DrivingLaneRecommendation_LST_01.zip'),
+        # default = Path('/home/三轮预处理数据/25-7-31/V2C_ReferenceArchitecture1.0_DrivingLaneRecommendation_LST_02.zip'),
+        # default = Path('/home/三轮预处理数据/25-7-31/V2C_ReferenceArchitecture1.0_DrivingLaneRecommendation_LST_03.zip'),
+        # default = Path('/home/三轮预处理数据/25-7-31/V2C_ReferenceArchitecture1.0_DynamicSpeedLimitingInformation_LST_01.zip'),
+        # default = Path('/home/三轮预处理数据/25-7-31/V2C_ReferenceArchitecture1.0_DynamicSpeedLimitingInformation_LST_02.zip'),
+        # default = Path('/home/三轮预处理数据/25-7-31/V2C_ReferenceArchitecture1.0_EmergencyVehiclesPriority_LST_01.zip'),
+        # default = Path('/home/三轮预处理数据/25-7-31/V2C_ReferenceArchitecture1.0_SignalLightReminder_LST_01.zip'),
+        # default = Path('/home/三轮预处理数据/25-7-31/V2C_ReferenceArchitecture1.0_SignalLightReminder_LST_02.zip'),
+        # default = Path('/home/三轮预处理数据/25-7-31/V2C_ReferenceArchitecture1.0_VisibilityW_LST_01.zip'),
+        # default = Path('/home/三轮预处理数据/25-7-31/V2C_ReferenceArchitecture1.0_VisibilityW_LST_02.zip'),
+        # default = Path('/home/三轮预处理数据/25-7-31/V2I_CSAE53-2020_GreenLightOptimalSpeedAdvisory_LST_01.zip'),
+        # default = Path('/home/三轮预处理数据/25-7-31/V2I_CSAE53-2020_GreenLightOptimalSpeedAdvisory_LST_02.zip'),
+        # default = Path('/home/三轮预处理数据/25-7-31/V2I_CSAE53-2020_RedLightViolationW_LST_01.zip'),
+        default = Path('/home/V2X_CSAE53-2020_RedLightViolationW_PGVIL_02_VR.zip'),
+        # default = Path('/home/三轮预处理数据/25-7-31/V2I_CSAE53-2020_RedLightViolationW_LST_02.zip'),
+        # default = Path('/home/三轮预处理数据/25-7-31/V2I_CSAE53-2020_RedLightViolationW_LST_03.zip'),
+        # 25-7-31-2
+        # default = Path('/home/三轮预处理数据/25-7-31-2/AD_GBT41798-2022_AutoEmergencyEvacuation_LST_01.zip'),
+        # default = Path('/home/三轮预处理数据/25-7-31-2/AD_GBT41798-2022_AutoEmergencyEvacuation_LST_02.zip'),
+        # default = Path('/home/三轮预处理数据/25-7-31-2/AD_GBT41798-2022_AutoEmergencyEvacuation_LST_03.zip'),
+        # default = Path('/home/三轮预处理数据/25-7-31-2/AD_GBT41798-2022_AutoEmergencyEvacuation_LST_04.zip'),
+        # default = Path('/home/三轮预处理数据/25-7-31-2/AD_GBT41798-2022_DynaDrivTaskInterventionTakeover_LST_01.zip'),
+        # default = Path('/home/三轮预处理数据/25-7-31-2/AD_GBT41798-2022_PedNonMotoVehIdentifyRR_LST_01.zip'),
+        # default = Path('/home/三轮预处理数据/25-7-31-2/AD_GBT41798-2022_PedNonMotoVehIdentifyRR_LST_02.zip'),
+        # default = Path('/home/三轮预处理数据/25-7-31-2/AD_GBT41798-2022_PedNonMotoVehIdentifyRR_LST_03.zip'),
+        # default = Path('/home/三轮预处理数据/25-7-31-2/AD_GBT41798-2022_PedNonMotoVehIdentifyRR_LST_04.zip'),
+        # default = Path('/home/三轮预处理数据/25-7-31-2/AD_GBT41798-2022_PedNonMotoVehIdentifyRR_LST_05.zip'),
+        # default = Path('/home/三轮预处理数据/25-7-31-2/AD_GBT41798-2022_PedNonMotoVehIdentifyRR_LST_06.zip'),
+        # default = Path('/home/三轮预处理数据/25-7-31-2/AD_GBT41798-2022_PedNonMotoVehIdentifyRR_LST_07.zip'),
+        # default = Path('/home/三轮预处理数据/25-7-31-2/AD_GBT41798-2022_PedNonMotoVehIdentifyRR_LST_08.zip'),
+        # default = Path('/home/三轮预处理数据/25-7-31-2/AD_GBT41798-2022_TrafficSignalRR_LST_01.zip'),
+        # default = Path('/home/三轮预处理数据/25-7-31-2/AD_GBT41798-2022_TrafficSignalRR_LST_02.zip'),
+        # default = Path('/home/三轮预处理数据/25-7-31-2/AD_GBT41798-2022_TrafficSignalRR_LST_03.zip'),
+        # default = Path('/home/三轮预处理数据/25-7-31-2/AD_GBT41798-2022_VehiclesIdentifyRR_LST_01.zip'),
+        # default = Path('/home/三轮预处理数据/25-7-31-2/AD_GBT41798-2022_VehiclesIdentifyRR_LST_02.zip'),
+        # default = Path('/home/三轮预处理数据/25-7-31-2/V2C_ReferenceArchitecture1.0_NsightVulnerableRoadUserCollisionW_LST_01.zip'),
+        # default = Path('/home/三轮预处理数据/25-7-31-2/V2C_ReferenceArchitecture1.0_NsightVulnerableRoadUserCollisionW_LST_02.zip'),
+        # default = Path('/home/三轮预处理数据/25-7-31-2/V2C_ReferenceArchitecture1.0_NsightVulnerableRoadUserCollisionW_LST_03.zip'),
+        # 25-08-01
+        # default = Path('/home/三轮预处理数据/25-08-01/AD_GBT41798-2022_RoadTraInfraObstRR_LST_01.zip'),
+        # default = Path('/home/三轮预处理数据/25-08-01/AD_GBT41798-2022_RoadTraInfraObstRR_LST_02.zip'),
+        # default = Path('/home/三轮预处理数据/25-08-01/V2C_ReferenceArchitecture1.0_TrafficJamW_LST_01.zip'),
+        # default = Path('/home/三轮预处理数据/25-08-01/V2C_ReferenceArchitecture1.0_TrafficJamW_LST_02.zip'),
+        # default = Path('/home/三轮预处理数据/25-08-01/V2I_CSAE157-2020_CooperativeIntersectionPassing_LST_01.zip'),
+        # default = Path('/home/三轮预处理数据/25-08-01/V2I_CSAE157-2020_CooperativeIntersectionPassing_LST_03.zip'),
+        # delivery
+        # default = Path('/home/三轮预处理数据/delivery/AD_GBT41798-2022_TrafficSignalRecognitionAndResponse_LST_01.zip'),
+        # default = Path('/home/三轮预处理数据/delivery/AD_GBT41798-2022_TrafficSignalRecognitionAndResponse_LST_02.zip'),
+        # default = Path('/home/三轮预处理数据/delivery/AD_GBT41798-2022_TrafficSignalRecognitionAndResponse_LST_03.zip'),
+        # default = Path('/home/三轮预处理数据/delivery/AD_GBT41798-2022_TrafficSignalRecognitionAndResponse_LST_04.zip'),
+        # default = Path('/home/三轮预处理数据/delivery/AD_GBT41798-2022_TrafficSignalRecognitionAndResponse_LST_05.zip'),
+        # default = Path('/home/三轮预处理数据/delivery/AD_GBT41798-2022_TrafficSignalRecognitionAndResponse_LST_06.zip'),
+        # default = Path('/home/三轮预处理数据/delivery/AD_GBT41798-2022_TrafficSignalRecognitionAndResponse_LST_07.zip'),
+        # default = Path('/home/三轮预处理数据/delivery/AD_GBT41798-2022_TrafficSignalRecognitionAndResponse_LST_08.zip'),
+        # default = Path('/home/三轮预处理数据/delivery/AD_GBT41798-2022_TrafficSignalRecognitionAndResponse_LST_09.zip'),
         help='输入的ZIP数据文件路径'
     )
 
     parser.add_argument(
         '--trafficlight-json',
         type=Path,
-        default='/home/kevin/kevin/zhaoyuan/sqlite3_demo/docker_build/preprocess_run/data/process_20250421_154131.json',
+        default='/home/predataprocess_report.json',
+        # default='/home/RedLightViolationW_LST_01.json',
         help='交通信号灯JSON配置文件路径'
     )
 
     parser.add_argument(
         '--output-dir',
         type=Path,
-        default=Path('./output/'),
+        default=Path('/home/output/'),
         help='输出目录的基础路径'
     )
 
@@ -63,14 +212,16 @@ def parse_arguments():
     parser.add_argument(
         '--x-offset',
         type=float,
-        default=0.0,
+        # default=0.0,
+        default=258106.4554485,
         help='X坐标偏移量'
     )
 
     parser.add_argument(
         '--y-offset',
         type=float,
-        default=0.0,
+        # default=0.0,
+        default=4149956.271320,
         help='Y坐标偏移量'
     )
 
@@ -205,6 +356,7 @@ def process_plugins(args, output_dir, final_csv_path):
 
     # 处理自定义数据
     print("处理并合并自定义数据...")
+    # for zip_path in args.zip_path:
     folders = resource_manager.list_zip_folders(args.zip_path)
 
     for folder in folders: