data_process.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357
  1. import os
  2. import numpy as np
  3. import pandas as pd
  4. import yaml
  5. from modules.lib.log_manager import LogManager
  6. class DataPreprocessing:
  7. def __init__(self, data_path, config_path):
  8. self.data_path = data_path
  9. self.case_name = os.path.basename(os.path.normpath(data_path))
  10. self.config_path = config_path
  11. # 初始化配置容器
  12. self.vehicle_config = {}
  13. self.safety_config = {}
  14. self.comfort_config = {}
  15. self.efficient_config = {}
  16. self.function_config = {}
  17. self.traffic_config = {}
  18. # 数据容器
  19. self.object_df = pd.DataFrame()
  20. self.driver_ctrl_df = pd.DataFrame()
  21. self.vehicle_sys_df = pd.DataFrame()
  22. self.ego_data_df = pd.DataFrame()
  23. self.obj_data = {}
  24. self.ego_data = {}
  25. self.obj_id_list = []
  26. self.data_quality_level = 15
  27. # 加载配置并处理数据
  28. self._get_yaml_config()
  29. self._process_mode()
  30. self.report_info = self._get_report_info(self.ego_data)
  31. def _process_mode(self):
  32. """当前支持默认处理方式"""
  33. self._real_process_object_df()
  34. def _get_yaml_config(self):
  35. with open(self.config_path, 'r') as f:
  36. full_config = yaml.safe_load(f)
  37. T_threshold_config = {"T_threshold": full_config.get("T_threshold", {})}
  38. self.vehicle_config = full_config.get("vehicle", {})
  39. self.safety_config = {"safety": full_config.get("safety", {})}
  40. self.safety_config.update(T_threshold_config)
  41. self.comfort_config = {"comfort": full_config.get("comfort", {})}
  42. self.comfort_config.update(T_threshold_config)
  43. self.efficient_config = {"efficient": full_config.get("efficient", {})}
  44. self.efficient_config.update(T_threshold_config)
  45. self.function_config = {"function": full_config.get("function", {})}
  46. self.function_config.update(T_threshold_config)
  47. self.traffic_config = {"traffic": full_config.get("traffic", {})}
  48. self.traffic_config.update(T_threshold_config)
  49. def _plot_coordinate_comparison(self, ego_df):
  50. """绘制全局坐标系与自车坐标系的运动学量对比图"""
  51. try:
  52. import matplotlib.pyplot as plt
  53. from matplotlib.gridspec import GridSpec
  54. import os
  55. plt.figure(figsize=(18, 10))
  56. gs = GridSpec(3, 3, figure=plt.gcf()) # 改为 3 行 3 列布局
  57. # 1. 速度对比 - 纵向分量
  58. ax1 = plt.subplot(gs[0, 0])
  59. # ax1.plot(ego_df['simTime'], ego_df['speedX'], 'r-', label='Global Vx')
  60. # ax1.plot(ego_df['simTime'], ego_df['speedY'], 'g-', label='Global Vy')
  61. ax1.plot(ego_df['simTime'], ego_df['lon_v_vehicle'], 'b-', label='Ego Frame Lon V (2D)')
  62. # ax1.plot(ego_df['simTime'], ego_df['lon_v_vehicle_3D'], 'y--', label='Ego Frame Lon V (3D)')
  63. ax1.set_title('Longitudinal Velocity Comparison')
  64. ax1.set_ylabel('Velocity (m/s)')
  65. ax1.legend()
  66. ax1.grid(True)
  67. # 2. 速度对比 - 横向分量
  68. ax2 = plt.subplot(gs[0, 1])
  69. # ax2.plot(ego_df['simTime'], ego_df['speedX'], 'r-', label='Global Vx')
  70. # ax2.plot(ego_df['simTime'], ego_df['speedY'], 'g-', label='Global Vy')
  71. ax2.plot(ego_df['simTime'], ego_df['lat_v_vehicle'], 'b-', label='Ego Frame Lat V (2D)')
  72. # ax2.plot(ego_df['simTime'], ego_df['lat_v_vehicle_3D'], 'y--', label='Ego Frame Lat V (3D)')
  73. ax2.set_title('Lateral Velocity Comparison')
  74. ax2.legend()
  75. ax2.grid(True)
  76. # 3. 航向角速度(新增子图)
  77. ax3 = plt.subplot(gs[0, 2])
  78. ax3.plot(ego_df['simTime'], ego_df['speedH'], 'm-', label='Heading Rate')
  79. ax3.set_title('Ego Heading Rate')
  80. ax3.set_ylabel('Yaw Rate (deg/s)')
  81. ax3.grid(True)
  82. # 4. 加速度对比 - 纵向分量
  83. ax4 = plt.subplot(gs[1, 0])
  84. # ax4.plot(ego_df['simTime'], ego_df['accelX'], 'r-', label='Global Ax')
  85. # ax4.plot(ego_df['simTime'], ego_df['accelY'], 'g-', label='Global Ay')
  86. ax4.plot(ego_df['simTime'], ego_df['lon_acc_vehicle'], 'b-', label='Ego Frame Lon A (2D)')
  87. # ax4.plot(ego_df['simTime'], ego_df['lon_acc_vehicle_3D'], 'y--', label='Ego Frame Lon A (3D)')
  88. ax4.set_title('Longitudinal Acceleration Comparison')
  89. ax4.set_ylabel('Acceleration (m/s²)')
  90. ax4.legend()
  91. ax4.grid(True)
  92. # 5. 加速度对比 - 横向分量
  93. ax5 = plt.subplot(gs[1, 1])
  94. # ax5.plot(ego_df['simTime'], ego_df['accelX'], 'r-', label='Global Ax')
  95. # ax5.plot(ego_df['simTime'], ego_df['accelY'], 'g-', label='Global Ay')
  96. ax5.plot(ego_df['simTime'], ego_df['lon_acc_rate'], 'b-', label='Ego Frame Lat A (2D)')
  97. # ax5.plot(ego_df['simTime'], ego_df['lat_acc_vehicle_3D'], 'y--', label='Ego Frame Lat A (3D)')
  98. ax5.set_title('Lateral Acceleration Comparison')
  99. ax5.legend()
  100. ax5.grid(True)
  101. # 6. 航向角变化(原图,平移至右侧)
  102. ax6 = plt.subplot(gs[1, 2])
  103. ax6.plot(ego_df['simTime'], ego_df['ego_posH'], 'b-', label='Ego Heading')
  104. ax6.set_title('Ego Vehicle Heading')
  105. ax6.set_ylabel('Heading (deg)')
  106. ax6.grid(True)
  107. plt.tight_layout()
  108. # 保存图像到当前目录
  109. plot_path = os.path.join(os.getcwd(), 'coordinate_transform_comparison.png')
  110. plt.savefig(plot_path)
  111. plt.close()
  112. logger = LogManager().get_logger()
  113. logger.info(f"坐标系转换对比图已保存至: {plot_path}")
  114. except ImportError:
  115. logger = LogManager().get_logger()
  116. logger.warning("Matplotlib未安装,无法生成坐标系转换对比图")
  117. except Exception as e:
  118. logger = LogManager().get_logger()
  119. logger.error(f"生成坐标系转换对比图时出错: {e}", exc_info=True)
  120. def _real_process_object_df(self):
  121. """读取并处理对象数据(含自车)"""
  122. try:
  123. merged_csv_path = os.path.join(self.data_path, "merged_ObjState.csv")
  124. if not os.path.exists(merged_csv_path):
  125. logger = LogManager().get_logger()
  126. logger.error(f"文件不存在: {merged_csv_path}")
  127. raise FileNotFoundError(f"文件不存在: {merged_csv_path}")
  128. df = pd.read_csv(
  129. merged_csv_path,
  130. dtype={"simTime": float},
  131. on_bad_lines="skip",
  132. na_values=["", "NA", "null", "NaN"]
  133. ).drop_duplicates(subset=["simTime", "simFrame", "playerId"])
  134. # 列名标准化
  135. df.columns = [col.replace("+AF8-", "_") for col in df.columns]
  136. df = df.dropna(subset=["type"]).reset_index(drop=True)
  137. # ===== 修正航向角处理 =====
  138. # 确保航向角在-180到180度范围内(0度正北,90度正东)
  139. if 'posH' in df.columns:
  140. df['posH'] = df['posH'] % 360 # 规范化到0-360度
  141. df['posH'] = np.where(df['posH'] > 180, df['posH'] - 360, df['posH'])
  142. # ===== 提取自车数据 =====
  143. # 自车ID固定为1
  144. EGO_PLAYER_ID = 1
  145. ego_mask = df['playerId'] == EGO_PLAYER_ID
  146. # 确保所有车辆都有pitch和roll列
  147. for col in ['pitch', 'roll']:
  148. if col not in df.columns:
  149. df[col] = 0.0
  150. # 提取自车姿态数据
  151. ego_cols = ['simTime', 'posX', 'posY', 'posH', 'pitch', 'roll']
  152. ego_df = df[ego_mask][ego_cols].copy()
  153. ego_df.columns = ['simTime'] + [f'ego_{col}' for col in ego_cols[1:]]
  154. # 合并自车姿态信息到主表
  155. df = pd.merge(df, ego_df, on='simTime', how='left')
  156. # ===== 计算相对位置(在自车坐标系下)=====
  157. # 计算全局坐标差
  158. dx_global = df['posX'] - df['ego_posX']
  159. dy_global = df['posY'] - df['ego_posY']
  160. # 转换为弧度并计算三角函数
  161. ego_heading_rad = np.deg2rad(df['ego_posH'])
  162. cos_h = np.cos(ego_heading_rad)
  163. sin_h = np.sin(ego_heading_rad)
  164. # 向量化计算相对坐标(在自车坐标系下)
  165. df['x_relative_start_dist'] = dx_global * sin_h + dy_global * cos_h
  166. df['y_relative_start_dist'] = -dx_global * cos_h + dy_global * sin_h
  167. # 自车相对位置设为0
  168. df.loc[ego_mask, 'x_relative_start_dist'] = 0.0
  169. df.loc[ego_mask, 'y_relative_start_dist'] = 0.0
  170. # 保存原始数据
  171. self.object_df = df.copy()
  172. # 处理运动学数据(所有车辆在自车坐标系下)
  173. all_processed_dfs = []
  174. self.obj_data = {}
  175. for obj_id, obj_df in df.groupby("playerId"):
  176. # 使用自车姿态计算运动学量
  177. processed_df = self._calculate_kinematics_in_ego_frame(obj_df)
  178. self.obj_data[obj_id] = processed_df
  179. all_processed_dfs.append(processed_df)
  180. # 合并处理后的数据
  181. if all_processed_dfs:
  182. combined_processed = pd.concat(all_processed_dfs)
  183. # 生成唯一键
  184. self.object_df['merge_key'] = self.object_df['playerId'].astype(str) + '_' + self.object_df[
  185. 'simTime'].astype(str)
  186. combined_processed['merge_key'] = combined_processed['playerId'].astype(str) + '_' + combined_processed[
  187. 'simTime'].astype(str)
  188. # 合并新列
  189. new_columns = [col for col in combined_processed.columns
  190. if col not in self.object_df.columns or col == 'merge_key']
  191. self.object_df = pd.merge(
  192. self.object_df,
  193. combined_processed[new_columns],
  194. on='merge_key',
  195. how='left'
  196. ).drop(columns=['merge_key'])
  197. # 设置自车数据
  198. self.obj_id_list = list(self.obj_data.keys())
  199. self.ego_data = self.obj_data.get(EGO_PLAYER_ID, None)
  200. if self.ego_data is not None:
  201. # self._plot_coordinate_comparison(self.ego_data)
  202. pass
  203. except Exception as e:
  204. logger = LogManager().get_logger()
  205. logger.error(f"处理对象数据帧时出错: {e}", exc_info=True)
  206. raise
  207. def _calculate_kinematics_in_ego_frame(self, df):
  208. """计算车辆在自车坐标系下的运动学量,优化版本"""
  209. # 创建副本避免修改原始数据,只复制必要的列以减少内存使用
  210. needed_cols = ['simTime', 'ego_posH', 'ego_pitch', 'ego_roll', 'speedX', 'speedY', 'accelX', 'accelY', 'speedH']
  211. df = df[needed_cols + [col for col in df.columns if col not in needed_cols]].copy()
  212. # 确保有必要的列,使用向量化操作
  213. for col in ['speedZ', 'accelZ']:
  214. if col not in df.columns:
  215. df[col] = 0.0
  216. # 处理时间序列 - 使用向量化操作
  217. df.sort_values(by="simTime", inplace=True)
  218. df.reset_index(drop=True, inplace=True)
  219. df["time_diff"] = df["simTime"].diff().replace(0, np.nan).fillna(method="bfill").fillna(0.01)
  220. # 预计算所有三角函数值,避免重复计算
  221. # ===== 1. 航向角处理 =====
  222. ego_yaw_rad = np.deg2rad(90 - df['ego_posH'].values) # 转换为数学坐标系
  223. ego_pitch_rad = np.deg2rad(df['ego_pitch'].values)
  224. ego_roll_rad = np.deg2rad(df['ego_roll'].values)
  225. yaw_rad = np.deg2rad(df['ego_posH'].values)
  226. # 预计算所有三角函数值
  227. cy_math, sy_math = np.cos(ego_yaw_rad), np.sin(ego_yaw_rad)
  228. cp, sp = np.cos(ego_pitch_rad), np.sin(ego_pitch_rad)
  229. cr, sr = np.cos(ego_roll_rad), np.sin(ego_roll_rad)
  230. cy, sy = np.cos(yaw_rad), np.sin(yaw_rad)
  231. # 获取全局速度/加速度向量
  232. vx = df['speedX'].values # 东
  233. vy = df['speedY'].values # 北
  234. vz = df['speedZ'].values # 天
  235. ax = df['accelX'].values
  236. ay = df['accelY'].values
  237. az = df['accelZ'].values
  238. # ===== 2. 三维转换 =====
  239. # 构建旋转矩阵 - 使用批量计算
  240. r00 = cy_math * cp
  241. r01 = cy_math * sp * sr - sy_math * cr
  242. r02 = cy_math * sp * cr + sy_math * sr
  243. r10 = sy_math * cp
  244. r11 = sy_math * sp * sr + cy_math * cr
  245. r12 = sy_math * sp * cr - cy_math * sr
  246. r20 = -sp
  247. r21 = cp * sr
  248. r22 = cp * cr
  249. # 转换速度向量 - 使用向量化计算代替einsum
  250. df['lon_v_vehicle_3D'] = r00 * vx + r01 * vy + r02 * vz
  251. df['lat_v_vehicle_3D'] = r10 * vx + r11 * vy + r12 * vz
  252. df['vel_z_vehicle_3D'] = r20 * vx + r21 * vy + r22 * vz
  253. # 转换加速度向量 - 使用向量化计算
  254. df['lon_acc_vehicle_3D'] = r00 * ax + r01 * ay + r02 * az
  255. df['lat_acc_vehicle_3D'] = r10 * ax + r11 * ay + r12 * az
  256. df['acc_z_vehicle_3D'] = r20 * ax + r21 * ay + r22 * az
  257. # ===== 3. 二维转换(与3D水平面一致)=====
  258. # 使用原始航向角定义(0°=北,90°=东,顺时针为正)
  259. # 使用向量化计算
  260. df['lon_v_vehicle'] = vx * sy - vy * cy
  261. df['lat_v_vehicle'] = vx * cy + vy * sy
  262. df['lon_acc_vehicle'] = ax * sy - ay * cy
  263. df['lat_acc_vehicle'] = ax * cy + ay * sy
  264. # ===== 5. 计算其他运动学量 =====
  265. # 速度大小 - 使用向量化计算
  266. df["v"] = np.sqrt(vx ** 2 + vy ** 2)
  267. # 加速度变化率 - 使用向量化计算
  268. lat_acc_diff = np.diff(df["lat_acc_vehicle"].values, prepend=df["lat_acc_vehicle"].values[0])
  269. lon_acc_diff = np.diff(df["lon_acc_vehicle"].values, prepend=df["lon_acc_vehicle"].values[0])
  270. yawrate_diff = np.diff(df["speedH"].values, prepend=df["speedH"].values[0])
  271. time_diff = df["time_diff"].values
  272. # 使用numpy操作代替pandas操作
  273. df["lat_acc_rate"] = np.where(time_diff == 0, 0, lat_acc_diff / time_diff)
  274. df["lon_acc_rate"] = np.where(time_diff == 0, 0, lon_acc_diff / time_diff)
  275. df["accelH"] = np.where(time_diff == 0, 0, yawrate_diff / time_diff)
  276. # 替换无穷值
  277. df.replace([np.inf, -np.inf], np.nan, inplace=True)
  278. df.fillna(0, inplace=True)
  279. return df
  280. def _get_report_info(self, df):
  281. # 提取里程与时长
  282. return {
  283. "mileage": self._mileage_cal(df),
  284. "duration": self._duration_cal(df)
  285. }
  286. def _mileage_cal(self, df):
  287. if len(df) < 2:
  288. return 0.0
  289. if df["travelDist"].nunique() == 1:
  290. temp_df = df.copy()
  291. temp_df["time_diff"] = temp_df["simTime"].diff().fillna(0)
  292. temp_df["avg_speed"] = (temp_df["v"] + temp_df["v"].shift()).fillna(0) / 2
  293. temp_df["distance_increment"] = temp_df["avg_speed"] * temp_df["time_diff"] / 3.6
  294. temp_df["travelDist"] = temp_df["distance_increment"].cumsum().fillna(0)
  295. return round(temp_df["travelDist"].iloc[-1] - temp_df["travelDist"].iloc[0], 2)
  296. else:
  297. return round(df["travelDist"].max() - df["travelDist"].min(), 2)
  298. def _duration_cal(self, df):
  299. return df["simTime"].iloc[-1] - df["simTime"].iloc[0]