data_process.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463
  1. #!/usr/bin/env python
  2. # -*- coding: utf-8 -*-
  3. ##################################################################
  4. #
  5. # Copyright (c) 2024 CICV, Inc. All Rights Reserved
  6. #
  7. ##################################################################
  8. """
  9. @Authors: zhanghaiwen(zhanghaiwen@china-icv.cn)
  10. @Data: 2024/10/17
  11. @Last Modified: 2024/10/17
  12. @Summary: Evaluation functions
  13. """
  14. import os
  15. import sys
  16. import yaml
  17. import traceback
  18. import numpy as np
  19. import pandas as pd
  20. from collections import Counter
  21. from pathlib import Path
  22. root_path = Path(__file__).resolve().parent.parent
  23. sys.path.append(str(root_path))
  24. from models.common import log
  25. from config import config
  26. log_path = config.LOG_PATH # 后边改成python包传递路径
  27. logger = log.get_logger(log_path)
  28. class DataQuality(object):
  29. def __init__(self, df=None):
  30. self.df = df
  31. self.frame_list = []
  32. self.frame_diff_list = []
  33. self.frame_diff_counter = {}
  34. self.total_frame_count = 0
  35. self.max_frame_number = 0
  36. self.frame_loss_count = 0
  37. self.frame_loss_rate = 0
  38. self.frame_loss_max = 0
  39. self.frame_loss_max_start = []
  40. self.result = ""
  41. def quality_detect(self):
  42. self.frame_extract()
  43. if self.frame_list:
  44. self.cal_total_frame_count()
  45. self.cal_max_frame_number()
  46. self.cal_frame_diff()
  47. self.cal_frame_loss_count()
  48. self.cal_frame_loss_rate()
  49. self.cal_frame_loss_max()
  50. self.cal_frame_diff_counter()
  51. self.result_print()
  52. else:
  53. self.result = "No data in this file."
  54. print("No data in this file.")
  55. def frame_extract(self):
  56. self.df = self.df.dropna(subset=["simFrame"])
  57. self.frame_list = sorted(self.df["simFrame"].unique())
  58. def cal_frame_diff(self):
  59. maxx = -1
  60. for i in range(1, len(self.frame_list)):
  61. diff = self.frame_list[i] - self.frame_list[i - 1] - 1
  62. if diff > maxx and diff != 0:
  63. maxx = diff
  64. self.frame_loss_max_start = [self.frame_list[i - 1], self.frame_list[i]]
  65. self.frame_diff_list.append(int(diff))
  66. def cal_total_frame_count(self):
  67. self.total_frame_count = len(self.frame_list) - 1 if self.frame_list else 0
  68. def cal_max_frame_number(self):
  69. self.max_frame_number = self.frame_list[-1] if self.frame_list else 0
  70. def cal_frame_loss_count(self):
  71. self.frame_loss_count = sum(self.frame_diff_list)
  72. def cal_frame_loss_rate(self):
  73. if self.total_frame_count + 1 > 0:
  74. self.frame_loss_rate = round(
  75. self.frame_loss_count / (self.total_frame_count + 1), 4
  76. )
  77. else:
  78. self.frame_loss_rate = 0
  79. def cal_frame_loss_max(self):
  80. self.frame_loss_max = max(self.frame_diff_list) if self.frame_diff_list else 0
  81. def cal_frame_diff_counter(self):
  82. self.frame_diff_counter = dict(Counter(self.frame_diff_list))
  83. sorted_items = sorted(
  84. self.frame_diff_counter.items(), key=lambda x: x[1], reverse=True
  85. )
  86. self.frame_diff_counter = dict(sorted_items[1:]) # 跳过不丢帧次数统计
  87. def result_print(self):
  88. self.result += f"丢帧率: {self.frame_loss_rate * 100:.2f}%, "
  89. self.result += f"总帧数: {self.total_frame_count + 1}, "
  90. self.result += f"丢帧数量: {self.frame_loss_count}, "
  91. self.result += f"最长丢帧数量: {self.frame_loss_max}, "
  92. self.result += f"最长丢帧时起始帧: {self.frame_loss_max_start}, "
  93. self.result += f"丢帧数及次数统计: {self.frame_diff_counter}."
  94. print("此文件总帧数:", self.total_frame_count + 1)
  95. print("此文件最大帧数:", self.max_frame_number)
  96. print("此文件丢帧数量:", self.frame_loss_count)
  97. print("此文件丢帧率:", f"{self.frame_loss_rate * 100:.2f}%")
  98. print("此文件最长丢帧数量:", self.frame_loss_max)
  99. print("此文件最长丢帧时起始帧:", self.frame_loss_max_start)
  100. print("此文件丢帧数及次数统计:", self.frame_diff_counter)
  101. def get_all_files(path):
  102. return [
  103. os.path.join(root, file)
  104. for root, _, files in os.walk(path)
  105. for file in files
  106. if file.endswith(".csv")
  107. ]
  108. def frame_loss_statistic(path):
  109. logger.info(f"Start Check Frame loss statistic: {path}")
  110. file_names = get_all_files(path)
  111. frame_loss_dict = {}
  112. for file in file_names:
  113. file_name = os.path.basename(file)
  114. print(f"\n[{file_name}]")
  115. df = pd.read_csv(file, index_col=False)
  116. d_q = DataQuality(df)
  117. d_q.quality_detect()
  118. frame_loss_dict[file_name] = {
  119. "result": d_q.result,
  120. "frame_loss_rate": d_q.frame_loss_rate,
  121. }
  122. return frame_loss_dict
  123. def data_precheck(file_path, case_name):
  124. """检查数据是否是有效数据,丢帧率是否在阈值内,数据中检查关键信息是否齐全"""
  125. logger.info(f"[case:{case_name}] Check if the data is valid: Start.")
  126. if not os.path.exists(file_path):
  127. logger.error(f"[case:{file_path}] SINGLE_CASE_EVAL: Invalid dataPath!")
  128. exit(1)
  129. frame_loss_dict = {}
  130. try:
  131. frame_loss_dict = frame_loss_statistic(file_path)
  132. except Exception as e:
  133. logger.error(
  134. f"[case:{case_name}] SINGLE_CASE_EVAL: frame loss statistic ERROR: {repr(e)}",
  135. exc_info=True,
  136. )
  137. exit(-1)
  138. # 检查帧丢失率
  139. for key, value in frame_loss_dict.items():
  140. if value["frame_loss_rate"] > config.DATA_QUALITY_LEVEL_1:
  141. logger.error(
  142. f"[case:{case_name}] SINGLE_CASE_EVAL: [{key}] frame loss rate > {config.DATA_QUALITY_LEVEL_1}%: {value['result']}"
  143. )
  144. return False
  145. # 检查关键信息是否齐全(根据实际情况修改)
  146. logger.info(f"[case:{case_name}] Check if the data is valid: End.")
  147. return True
  148. class DataPreprocessing:
  149. def __init__(self, case_name, mode_label):
  150. # Base info
  151. self.data_path = os.path.join(config.PROCESSED_DATA_PATH, case_name)
  152. self.case_name = case_name
  153. # Initialize data containers
  154. self.object_df = pd.DataFrame()
  155. self.driver_ctrl_df = pd.DataFrame()
  156. self.vehicle_sys_df = pd.DataFrame()
  157. self.ego_data_df = pd.DataFrame()
  158. self.config = config
  159. self.mode_label = mode_label
  160. self.obj_data = {}
  161. self.ego_data = {}
  162. self.obj_id_list = []
  163. # Data quality level
  164. self.data_quality_level = config.DATA_QUALITY_LEVEL_1
  165. # Load and process data
  166. self._merge_csv()
  167. self._read_csv()
  168. self._process_object_df()
  169. self.report_info = self._get_report_info(self.obj_data.get(1, pd.DataFrame()))
  170. self._process_mode()
  171. def _process_mode(self):
  172. if self.mode_label == "real_car":
  173. self._process_real_car()
  174. elif self.mode_label == "PGVIL":
  175. self._process_PGVIL()
  176. def _process_real_car(self):
  177. # Process real car data (implementation needed)
  178. pass
  179. def _process_PGVIL(self):
  180. """Process PGVIL data"""
  181. self.driver_ctrl_data = self._get_driver_ctrl_data(self.driver_ctrl_df)
  182. @staticmethod
  183. def cal_velocity(lat_v, lon_v):
  184. """
  185. Calculate the resultant velocity from lateral and longitudinal components.
  186. Args:
  187. lat_v: Lateral velocity in m/s
  188. lon_v: Longitudinal velocity in m/s
  189. Returns:
  190. Resultant velocity in km/h
  191. """
  192. return np.sqrt(lat_v**2 + lon_v**2) # Using numpy for vectorized operations
  193. def _process_object_df(self):
  194. EGO_PLAYER_ID = 1
  195. data = self.object_df.copy()
  196. # calculate common parameters
  197. data["lat_v"] = data["speedY"] * 1
  198. data["lon_v"] = data["speedX"] * 1
  199. data["v"] = data.apply(
  200. lambda row: self.cal_velocity(row["lat_v"], row["lon_v"]), axis=1
  201. )
  202. data["v"] = data["v"] # km/h
  203. # calculate acceleraton components
  204. data["lat_acc"] = data["accelY"] * 1
  205. data["lon_acc"] = data["accelX"] * 1
  206. data["accel"] = data.apply(
  207. lambda row: self.cal_velocity(row["lat_acc"], row["lon_acc"]), axis=1
  208. )
  209. data = data.dropna(subset=["type"])
  210. data.reset_index(drop=True, inplace=True)
  211. self.object_df = data.copy()
  212. # calculate respective parameters
  213. for obj_id, obj_data in data.groupby("playerId"):
  214. self.obj_data[obj_id] = obj_data
  215. self.obj_data[obj_id]["time_diff"] = self.obj_data[obj_id]["simTime"].diff()
  216. self.obj_data[obj_id]["lat_acc_diff"] = self.obj_data[obj_id][
  217. "lat_acc"
  218. ].diff()
  219. self.obj_data[obj_id]["lon_acc_diff"] = self.obj_data[obj_id][
  220. "lon_acc"
  221. ].diff()
  222. self.obj_data[obj_id]["yawrate_diff"] = self.obj_data[obj_id][
  223. "speedH"
  224. ].diff()
  225. self.obj_data[obj_id]["lat_acc_roc"] = (
  226. self.obj_data[obj_id]["lat_acc_diff"]
  227. / self.obj_data[obj_id]["time_diff"]
  228. )
  229. self.obj_data[obj_id]["lon_acc_roc"] = (
  230. self.obj_data[obj_id]["lon_acc_diff"]
  231. / self.obj_data[obj_id]["time_diff"]
  232. )
  233. self.obj_data[obj_id]["accelH"] = (
  234. self.obj_data[obj_id]["yawrate_diff"]
  235. / self.obj_data[obj_id]["time_diff"]
  236. )
  237. self.obj_data[obj_id]["lat_acc_roc"] = self.obj_data[obj_id][
  238. "lat_acc_roc"
  239. ].replace([np.inf, -np.inf], [9999, -9999])
  240. self.obj_data[obj_id]["lon_acc_roc"] = self.obj_data[obj_id][
  241. "lon_acc_roc"
  242. ].replace([np.inf, -np.inf], [9999, -9999])
  243. self.obj_data[obj_id]["accelH"] = self.obj_data[obj_id]["accelH"].replace(
  244. [np.inf, -np.inf], [9999, -9999]
  245. )
  246. # get object id list
  247. self.obj_id_list = list(self.obj_data.keys())
  248. self.ego_data = self.obj_data[EGO_PLAYER_ID]
  249. def _get_driver_ctrl_data(self, df):
  250. """
  251. Process and get driver control information.
  252. Args:
  253. df: A DataFrame containing driver control data.
  254. Returns:
  255. A dictionary of driver control info.
  256. """
  257. driver_ctrl_data = {
  258. "time_list": df["simTime"].round(2).tolist(),
  259. "frame_list": df["simFrame"].tolist(),
  260. "brakePedal_list": (
  261. (df["brakePedal"] * 100).tolist()
  262. if df["brakePedal"].max() < 1
  263. else df["brakePedal"].tolist()
  264. ),
  265. "throttlePedal_list": (
  266. (df["throttlePedal"] * 100).tolist()
  267. if df["throttlePedal"].max() < 1
  268. else df["throttlePedal"].tolist()
  269. ),
  270. "steeringWheel_list": df["steeringWheel"].tolist(),
  271. }
  272. return driver_ctrl_data
  273. def _read_csv(self):
  274. """Read CSV files into DataFrames."""
  275. self.driver_ctrl_df = pd.read_csv(
  276. os.path.join(self.data_path, "DriverCtrl.csv")
  277. ).drop_duplicates()
  278. self.object_df = pd.read_csv(
  279. os.path.join(self.data_path, "merged_ObjState.csv"),
  280. dtype={"simTime": float},
  281. ).drop_duplicates(subset=["simTime", "simFrame", "playerId"])
  282. self.road_mark_df = pd.read_csv(
  283. os.path.join(self.data_path, "RoadMark.csv"), dtype={"simTime": float}
  284. ).drop_duplicates()
  285. self.road_pos_df = pd.read_csv(
  286. os.path.join(self.data_path, "RoadPos.csv"), dtype={"simTime": float}
  287. ).drop_duplicates()
  288. self.traffic_light_df = pd.read_csv(
  289. os.path.join(self.data_path, "TrafficLight.csv"), dtype={"simTime": float}
  290. ).drop_duplicates()
  291. self.traffic_signal_df = pd.read_csv(
  292. os.path.join(self.data_path, "TrafficSign.csv"), dtype={"simTime": float}
  293. ).drop_duplicates()
  294. self.lane_info_new_df = pd.read_csv(
  295. os.path.join(self.data_path, "LaneInfo_new.csv"), dtype={"simTime": float}
  296. ).drop_duplicates()
  297. self.road_info_df = pd.read_csv(
  298. os.path.join(self.data_path, "RoadInfo.csv"), dtype={"simTime": float}
  299. ).drop_duplicates()
  300. self.inter_info_df = pd.read_csv(
  301. os.path.join(self.data_path, "InterInfo.csv"), dtype={"simTime": float}
  302. ).drop_duplicates()
  303. self.cross_walk_df = pd.read_csv(
  304. os.path.join(self.data_path, "CrosswalkInfo.csv"), dtype={"simTime": float}
  305. ).drop_duplicates()
  306. def _get_report_info(self, df):
  307. """Extract report information from the DataFrame."""
  308. mileage = self._mileage_cal(df)
  309. duration = self._duration_cal(df)
  310. return {"mileage": mileage, "duration": duration}
  311. def _mileage_cal(self, df):
  312. """Calculate mileage based on the driving data."""
  313. if df["travelDist"].nunique() == 1:
  314. df["time_diff"] = df["simTime"].diff().fillna(0)
  315. df["avg_speed"] = (df["v"] + df["v"].shift()).fillna(0) / 2
  316. df["distance_increment"] = df["avg_speed"] * df["time_diff"] / 3.6
  317. df["travelDist"] = df["distance_increment"].cumsum().fillna(0)
  318. mileage = round(df["travelDist"].iloc[-1] - df["travelDist"].iloc[0], 2)
  319. return mileage
  320. return 0.0 # Return 0 if travelDist is not valid
  321. def _duration_cal(self, df):
  322. """Calculate duration of the driving data."""
  323. return df["simTime"].iloc[-1] - df["simTime"].iloc[0]
  324. def _merge_csv(self):
  325. """Merge CSV files into one consolidated DataFrame."""
  326. df_object = pd.read_csv(
  327. os.path.join(self.data_path, "ObjState.csv"), dtype={"simTime": float}
  328. ).drop_duplicates()
  329. df_laneinfo = pd.read_csv(
  330. os.path.join(self.data_path, "LaneInfo.csv"), dtype={"simTime": float}
  331. ).drop_duplicates()
  332. df_roadPos = pd.read_csv(
  333. os.path.join(self.data_path, "RoadPos.csv"), dtype={"simTime": float}
  334. ).drop_duplicates()
  335. df_vehicleSys = pd.read_csv(
  336. os.path.join(self.data_path, "VehicleSystems.csv"), dtype={"simTime": float}
  337. ).drop_duplicates()
  338. ego_map_df = pd.read_csv(
  339. os.path.join(self.data_path, "EgoMap.csv"), dtype={"simTime": float}
  340. ).drop_duplicates()
  341. # Rename columns for clarity
  342. df_laneinfo = df_laneinfo.rename(columns={"curvHor": "curvHor", "id": "laneId"})
  343. df_laneinfo["curvHor"] = df_laneinfo["curvHor"].round(3)
  344. # Merge data
  345. combined_df = pd.merge(
  346. df_roadPos,
  347. df_laneinfo,
  348. on=["simTime", "simFrame", "playerId", "laneId"],
  349. how="inner",
  350. )
  351. df_laneinfo_new = combined_df[
  352. ["simTime", "simFrame", "playerId", "curvHor", "curvHorDot"]
  353. ].drop_duplicates()
  354. df_roadPos = df_roadPos[
  355. ["simTime", "simFrame", "playerId", "laneOffset", "rollRel", "pitchRel"]
  356. ].copy()
  357. df_vehicleSys = df_vehicleSys[
  358. ["simTime", "simFrame", "playerId", "lightMask", "steering"]
  359. ].copy()
  360. # Final merge to create complete DataFrame
  361. merged_df = pd.merge(
  362. df_object, df_vehicleSys, on=["simTime", "simFrame", "playerId"], how="left"
  363. )
  364. merged_df = pd.merge(
  365. merged_df,
  366. df_laneinfo_new,
  367. on=["simTime", "simFrame", "playerId"],
  368. how="left",
  369. )
  370. merged_df = pd.merge(
  371. merged_df, df_roadPos, on=["simTime", "simFrame", "playerId"], how="left"
  372. )
  373. # Columns to copy from ego map
  374. columns_to_copy = [
  375. "simTime",
  376. "simFrame",
  377. "playerId",
  378. "road_link_id",
  379. "road_fc",
  380. "road_type",
  381. "road_speed_max",
  382. "road_speed_min",
  383. ]
  384. # Merge EGO data with map data
  385. merged_df = pd.merge(
  386. merged_df,
  387. ego_map_df[columns_to_copy],
  388. on=["simTime", "simFrame", "playerId"],
  389. how="left",
  390. )
  391. # Clean up and save
  392. merged_df.drop_duplicates(inplace=True)
  393. merged_df = merged_df[merged_df.simFrame > 0].copy()
  394. merged_df.to_csv(
  395. os.path.join(self.data_path, "merged_ObjState.csv"), index=False
  396. )
  397. logger.info("merged_ObjState.csv has been saved.")