|
@@ -64,15 +64,15 @@ class ZipCSVProcessor:
|
|
# Define column mappings more clearly
|
|
# Define column mappings more clearly
|
|
EGO_COLS_NEW = [
|
|
EGO_COLS_NEW = [
|
|
"simTime", "simFrame", "playerId", "v", "speedX", "speedY",
|
|
"simTime", "simFrame", "playerId", "v", "speedX", "speedY",
|
|
- "posH", "speedH", "posX", "posY", "accelX", "accelY",
|
|
|
|
- "travelDist", "composite_v", "relative_dist", "type" # Added type
|
|
|
|
|
|
+ "posH", "pitch", "roll", "speedH", "posX", "posY", "accelX", "accelY", "accelZ",
|
|
|
|
+ "travelDist", "composite_v", "relative_dist", "x_relative_dist", "y_relative_dist", "type" # Added type
|
|
]
|
|
]
|
|
OBJ_COLS_OLD_SUFFIXED = [
|
|
OBJ_COLS_OLD_SUFFIXED = [
|
|
- "v_obj", "speedX_obj", "speedY_obj", "posH_obj", "speedH_obj",
|
|
|
|
- "posX_obj", "posY_obj", "accelX_obj", "accelY_obj", "travelDist_obj"
|
|
|
|
|
|
+ "v_obj", "speedX_obj", "speedY_obj", "posH_obj", "pitch_obj", "roll_obj", "speedH_obj",
|
|
|
|
+ "posX_obj", "posY_obj", "accelX_obj", "accelY_obj", "accelZ_obj", "travelDist_obj"
|
|
]
|
|
]
|
|
OBJ_COLS_MAPPING = {old: new for old, new in
|
|
OBJ_COLS_MAPPING = {old: new for old, new in
|
|
- zip(OBJ_COLS_OLD_SUFFIXED, EGO_COLS_NEW[3:13])} # Map suffixed cols to standard names
|
|
|
|
|
|
+ zip(OBJ_COLS_OLD_SUFFIXED, EGO_COLS_NEW[3:16])} # Map suffixed cols to standard names
|
|
|
|
|
|
def __init__(self, config: Config):
|
|
def __init__(self, config: Config):
|
|
self.config = config
|
|
self.config = config
|
|
@@ -109,15 +109,20 @@ class ZipCSVProcessor:
|
|
"speedY": "y_speed",
|
|
"speedY": "y_speed",
|
|
"speedX": "x_speed",
|
|
"speedX": "x_speed",
|
|
"posH": "yaw",
|
|
"posH": "yaw",
|
|
|
|
+ "pitch": "pitch",
|
|
|
|
+ "roll": "roll",
|
|
"speedH": "yaw_rate",
|
|
"speedH": "yaw_rate",
|
|
"posX": "latitude_dd", # Source before projection
|
|
"posX": "latitude_dd", # Source before projection
|
|
"posY": "longitude_dd", # Source before projection
|
|
"posY": "longitude_dd", # Source before projection
|
|
"accelX": "x_acceleration",
|
|
"accelX": "x_acceleration",
|
|
"accelY": "y_acceleration",
|
|
"accelY": "y_acceleration",
|
|
|
|
+ "accelZ": "z_acceleration",
|
|
"travelDist": "total_distance",
|
|
"travelDist": "total_distance",
|
|
# composite_v/relative_dist might not be direct fields in GNSS, handle later if needed
|
|
# composite_v/relative_dist might not be direct fields in GNSS, handle later if needed
|
|
"composite_v": "speed", # Placeholder, adjust if needed
|
|
"composite_v": "speed", # Placeholder, adjust if needed
|
|
- "relative_dist": None, # Placeholder, likely not in GNSS data
|
|
|
|
|
|
+ "relative_dist": "distance", # Placeholder, likely not in GNSS data
|
|
|
|
+ "x_relative_dist": "x_distance",
|
|
|
|
+ "y_relative_dist": "y_distance",
|
|
"type": None # Will be set later
|
|
"type": None # Will be set later
|
|
},
|
|
},
|
|
"db_columns": ["ID", "second", "usecond", "speed", "y_speed", "x_speed",
|
|
"db_columns": ["ID", "second", "usecond", "speed", "y_speed", "x_speed",
|
|
@@ -137,8 +142,11 @@ class ZipCSVProcessor:
|
|
"posX": "VUT_GPS_Latitude", # Source before projection
|
|
"posX": "VUT_GPS_Latitude", # Source before projection
|
|
"posY": "VUT_GPS_Longitude", # Source before projection
|
|
"posY": "VUT_GPS_Longitude", # Source before projection
|
|
"posH": "VUT_Heading",
|
|
"posH": "VUT_Heading",
|
|
|
|
+ "pitch": "VUT_Pitch",
|
|
|
|
+ "roll": "VUT_Roll",
|
|
"accelX": "VUT_Acc_X",
|
|
"accelX": "VUT_Acc_X",
|
|
"accelY": "VUT_Acc_Y",
|
|
"accelY": "VUT_Acc_Y",
|
|
|
|
+ "accelZ": "VUT_Acc_Z",
|
|
# OBJ mappings (UFO = Unidentified Flying Object / Other Vehicle)
|
|
# OBJ mappings (UFO = Unidentified Flying Object / Other Vehicle)
|
|
"v_obj": "Speed_mps",
|
|
"v_obj": "Speed_mps",
|
|
"speedX_obj": "UFO_Speed_x_mps",
|
|
"speedX_obj": "UFO_Speed_x_mps",
|
|
@@ -147,11 +155,16 @@ class ZipCSVProcessor:
|
|
"posX_obj": "GPS_Latitude", # Source before projection
|
|
"posX_obj": "GPS_Latitude", # Source before projection
|
|
"posY_obj": "GPS_Longitude", # Source before projection
|
|
"posY_obj": "GPS_Longitude", # Source before projection
|
|
"posH_obj": "Heading",
|
|
"posH_obj": "Heading",
|
|
|
|
+ "pitch_obj": None,
|
|
|
|
+ "roll_obj": None,
|
|
"accelX_obj": "Acc_X",
|
|
"accelX_obj": "Acc_X",
|
|
"accelY_obj": "Acc_Y",
|
|
"accelY_obj": "Acc_Y",
|
|
|
|
+ "accelZ_obj": "Acc_Z",
|
|
# Relative Mappings
|
|
# Relative Mappings
|
|
"composite_v": "VUT_Rel_speed_long_mps",
|
|
"composite_v": "VUT_Rel_speed_long_mps",
|
|
"relative_dist": "VUT_Dist_MRP_Abs",
|
|
"relative_dist": "VUT_Dist_MRP_Abs",
|
|
|
|
+ "x_relative_dist": "VUT_Dist_MRP_X",
|
|
|
|
+ "y_relative_dist": "VUT_Dist_MRP_Y",
|
|
# travelDist often calculated, not direct CAN signal
|
|
# travelDist often calculated, not direct CAN signal
|
|
"travelDist": None, # Placeholder
|
|
"travelDist": None, # Placeholder
|
|
"travelDist_obj": None # Placeholder
|
|
"travelDist_obj": None # Placeholder
|
|
@@ -352,7 +365,8 @@ class ZipCSVProcessor:
|
|
processed_data.append(record)
|
|
processed_data.append(record)
|
|
|
|
|
|
if processed_data:
|
|
if processed_data:
|
|
- df_final = pd.DataFrame(processed_data)[output_columns].iloc[::4].reset_index(drop=True) # Ensure column order
|
|
|
|
|
|
+ df_final = pd.DataFrame(processed_data)[output_columns].iloc[::4].reset_index(
|
|
|
|
+ drop=True) # Ensure column order
|
|
df_final['simFrame'] = np.arange(1, len(df_final) + 1)
|
|
df_final['simFrame'] = np.arange(1, len(df_final) + 1)
|
|
df_final.to_csv(output_path, index=False, encoding="utf-8")
|
|
df_final.to_csv(output_path, index=False, encoding="utf-8")
|
|
print(f"Successfully wrote GNSS data to {output_path}")
|
|
print(f"Successfully wrote GNSS data to {output_path}")
|
|
@@ -413,8 +427,8 @@ class ZipCSVProcessor:
|
|
df_obj = df_obj.reindex(columns=final_columns).iloc[::4]
|
|
df_obj = df_obj.reindex(columns=final_columns).iloc[::4]
|
|
|
|
|
|
# Reindex simFrame of ego and obj
|
|
# Reindex simFrame of ego and obj
|
|
- df_ego['simFrame'] = np.arange(1, len(df_ego)+1)
|
|
|
|
- df_obj['simFrame'] = np.arange(1, len(df_obj)+1)
|
|
|
|
|
|
+ df_ego['simFrame'] = np.arange(1, len(df_ego) + 1)
|
|
|
|
+ df_obj['simFrame'] = np.arange(1, len(df_obj) + 1)
|
|
|
|
|
|
# Merge EGO and OBJ dataframes
|
|
# Merge EGO and OBJ dataframes
|
|
df_merged = pd.concat([df_ego, df_obj], ignore_index=True)
|
|
df_merged = pd.concat([df_ego, df_obj], ignore_index=True)
|
|
@@ -567,7 +581,9 @@ class ZipCSVProcessor:
|
|
# df_vehicle.dropna(subset=[col for col in required_pos if col in df_vehicle.columns], inplace=True)
|
|
# df_vehicle.dropna(subset=[col for col in required_pos if col in df_vehicle.columns], inplace=True)
|
|
|
|
|
|
try:
|
|
try:
|
|
- df_vehicle["simTime"] = np.round(np.linspace(df_raw["simTime"].tolist()[0]+28800, df_raw["simTime"].tolist()[0]+28800 + 0.01*(len(df_vehicle)), len(df_vehicle)), 2)
|
|
|
|
|
|
+ df_vehicle["simTime"] = np.round(np.linspace(df_raw["simTime"].tolist()[0] + 28800,
|
|
|
|
+ df_raw["simTime"].tolist()[0] + 28800 + 0.01 * (
|
|
|
|
+ len(df_vehicle)), len(df_vehicle)), 2)
|
|
df_vehicle["simFrame"] = np.arange(1, len(df_vehicle) + 1)
|
|
df_vehicle["simFrame"] = np.arange(1, len(df_vehicle) + 1)
|
|
df_vehicle["playerId"] = int(player_id)
|
|
df_vehicle["playerId"] = int(player_id)
|
|
df_vehicle['playerId'] = pd.to_numeric(df_vehicle['playerId']).astype(int)
|
|
df_vehicle['playerId'] = pd.to_numeric(df_vehicle['playerId']).astype(int)
|
|
@@ -1064,23 +1080,23 @@ class FinalDataProcessor:
|
|
def _merge_optional_data(self, df_object: pd.DataFrame) -> pd.DataFrame:
|
|
def _merge_optional_data(self, df_object: pd.DataFrame) -> pd.DataFrame:
|
|
"""加载和合并可选数据"""
|
|
"""加载和合并可选数据"""
|
|
df_merged = df_object.copy()
|
|
df_merged = df_object.copy()
|
|
-
|
|
|
|
|
|
+
|
|
# 检查并删除重复列的函数
|
|
# 检查并删除重复列的函数
|
|
def clean_duplicate_columns(df):
|
|
def clean_duplicate_columns(df):
|
|
# 查找带有 _x 或 _y 后缀的列
|
|
# 查找带有 _x 或 _y 后缀的列
|
|
duplicate_cols = []
|
|
duplicate_cols = []
|
|
base_cols = {}
|
|
base_cols = {}
|
|
-
|
|
|
|
|
|
+
|
|
# 打印清理前的列名
|
|
# 打印清理前的列名
|
|
print(f"清理重复列前的列名: {df.columns.tolist()}")
|
|
print(f"清理重复列前的列名: {df.columns.tolist()}")
|
|
-
|
|
|
|
|
|
+
|
|
for col in df.columns:
|
|
for col in df.columns:
|
|
if col.endswith('_x') or col.endswith('_y'):
|
|
if col.endswith('_x') or col.endswith('_y'):
|
|
base_name = col[:-2] # 去掉后缀
|
|
base_name = col[:-2] # 去掉后缀
|
|
if base_name not in base_cols:
|
|
if base_name not in base_cols:
|
|
base_cols[base_name] = []
|
|
base_cols[base_name] = []
|
|
base_cols[base_name].append(col)
|
|
base_cols[base_name].append(col)
|
|
-
|
|
|
|
|
|
+
|
|
# 对于每组重复列,检查数据是否相同,如果相同则只保留一个
|
|
# 对于每组重复列,检查数据是否相同,如果相同则只保留一个
|
|
for base_name, cols in base_cols.items():
|
|
for base_name, cols in base_cols.items():
|
|
if len(cols) > 1:
|
|
if len(cols) > 1:
|
|
@@ -1091,7 +1107,7 @@ class FinalDataProcessor:
|
|
if not df[first_col].equals(df[col]):
|
|
if not df[first_col].equals(df[col]):
|
|
is_identical = False
|
|
is_identical = False
|
|
break
|
|
break
|
|
-
|
|
|
|
|
|
+
|
|
if is_identical:
|
|
if is_identical:
|
|
# 数据相同,保留第一列并重命名为基本名称
|
|
# 数据相同,保留第一列并重命名为基本名称
|
|
df = df.rename(columns={first_col: base_name})
|
|
df = df.rename(columns={first_col: base_name})
|
|
@@ -1108,7 +1124,7 @@ class FinalDataProcessor:
|
|
# 删除其他 simTime 相关列
|
|
# 删除其他 simTime 相关列
|
|
for col in cols[1:]:
|
|
for col in cols[1:]:
|
|
duplicate_cols.append(col)
|
|
duplicate_cols.append(col)
|
|
-
|
|
|
|
|
|
+
|
|
# 删除重复列
|
|
# 删除重复列
|
|
if duplicate_cols:
|
|
if duplicate_cols:
|
|
# 确保不会删除 simTime 列
|
|
# 确保不会删除 simTime 列
|
|
@@ -1120,13 +1136,13 @@ class FinalDataProcessor:
|
|
duplicate_cols.remove(col)
|
|
duplicate_cols.remove(col)
|
|
print(f"将 {col} 重命名为 simTime")
|
|
print(f"将 {col} 重命名为 simTime")
|
|
break
|
|
break
|
|
-
|
|
|
|
|
|
+
|
|
df = df.drop(columns=duplicate_cols)
|
|
df = df.drop(columns=duplicate_cols)
|
|
print(f"删除了重复列: {duplicate_cols}")
|
|
print(f"删除了重复列: {duplicate_cols}")
|
|
-
|
|
|
|
|
|
+
|
|
# 打印清理后的列名
|
|
# 打印清理后的列名
|
|
print(f"清理重复列后的列名: {df.columns.tolist()}")
|
|
print(f"清理重复列后的列名: {df.columns.tolist()}")
|
|
-
|
|
|
|
|
|
+
|
|
return df
|
|
return df
|
|
|
|
|
|
# --- 合并 EgoMap ---
|
|
# --- 合并 EgoMap ---
|
|
@@ -1134,6 +1150,9 @@ class FinalDataProcessor:
|
|
if egomap_path.exists() and egomap_path.stat().st_size > 0:
|
|
if egomap_path.exists() and egomap_path.stat().st_size > 0:
|
|
try:
|
|
try:
|
|
df_ego = pd.read_csv(egomap_path, dtype={"simTime": float})
|
|
df_ego = pd.read_csv(egomap_path, dtype={"simTime": float})
|
|
|
|
+ ego_column = ['posX', 'posY', 'posH']
|
|
|
|
+ ego_new_column = ['posX_map', 'posY_map', 'posH_map']
|
|
|
|
+ df_ego = df_ego.rename(columns=dict(zip(ego_column, ego_new_column)))
|
|
# 删除 simFrame 列,因为使用主数据的 simFrame
|
|
# 删除 simFrame 列,因为使用主数据的 simFrame
|
|
if 'simFrame' in df_ego.columns:
|
|
if 'simFrame' in df_ego.columns:
|
|
df_ego = df_ego.drop(columns=['simFrame'])
|
|
df_ego = df_ego.drop(columns=['simFrame'])
|
|
@@ -1155,10 +1174,10 @@ class FinalDataProcessor:
|
|
direction='nearest',
|
|
direction='nearest',
|
|
tolerance=0.01 # 10ms tolerance
|
|
tolerance=0.01 # 10ms tolerance
|
|
)
|
|
)
|
|
-
|
|
|
|
|
|
+
|
|
# 打印合并后的列名
|
|
# 打印合并后的列名
|
|
print(f"合并 EgoMap 后 df_merged 的列: {df_merged.columns.tolist()}")
|
|
print(f"合并 EgoMap 后 df_merged 的列: {df_merged.columns.tolist()}")
|
|
-
|
|
|
|
|
|
+
|
|
# 确保 simTime 列存在
|
|
# 确保 simTime 列存在
|
|
if 'simTime' not in df_merged.columns:
|
|
if 'simTime' not in df_merged.columns:
|
|
if 'simTime_x' in df_merged.columns:
|
|
if 'simTime_x' in df_merged.columns:
|
|
@@ -1166,7 +1185,9 @@ class FinalDataProcessor:
|
|
print("将 simTime_x 重命名为 simTime")
|
|
print("将 simTime_x 重命名为 simTime")
|
|
else:
|
|
else:
|
|
print("警告: 合并 EgoMap 后找不到 simTime 列!")
|
|
print("警告: 合并 EgoMap 后找不到 simTime 列!")
|
|
-
|
|
|
|
|
|
+
|
|
|
|
+ df_merged = df_merged.drop(columns=['posX_map', 'posY_map', 'posH_map'])
|
|
|
|
+
|
|
print("EgoMap data merged.")
|
|
print("EgoMap data merged.")
|
|
except Exception as e:
|
|
except Exception as e:
|
|
print(f"Warning: Could not merge EgoMap data from {egomap_path}: {e}")
|
|
print(f"Warning: Could not merge EgoMap data from {egomap_path}: {e}")
|
|
@@ -1191,7 +1212,7 @@ class FinalDataProcessor:
|
|
heading = heading % 360
|
|
heading = heading % 360
|
|
if heading > 180:
|
|
if heading > 180:
|
|
heading -= 360
|
|
heading -= 360
|
|
-
|
|
|
|
|
|
+
|
|
# 确定方向:北(N)、东(E)、南(S)、西(W)
|
|
# 确定方向:北(N)、东(E)、南(S)、西(W)
|
|
if -45 <= heading <= 45: # 北向
|
|
if -45 <= heading <= 45: # 北向
|
|
return 'N'
|
|
return 'N'
|
|
@@ -1201,7 +1222,7 @@ class FinalDataProcessor:
|
|
return 'W'
|
|
return 'W'
|
|
else: # 南向 (135 < heading <= 180 或 -180 <= heading < -135)
|
|
else: # 南向 (135 < heading <= 180 或 -180 <= heading < -135)
|
|
return 'S'
|
|
return 'S'
|
|
-
|
|
|
|
|
|
+
|
|
# 检查posH列是否存在,如果不存在但posH_x存在,则使用posH_x
|
|
# 检查posH列是否存在,如果不存在但posH_x存在,则使用posH_x
|
|
heading_col = 'posH'
|
|
heading_col = 'posH'
|
|
if heading_col not in df_merged.columns:
|
|
if heading_col not in df_merged.columns:
|
|
@@ -1211,10 +1232,10 @@ class FinalDataProcessor:
|
|
else:
|
|
else:
|
|
print(f"警告: 找不到航向角列 posH 或 posH_x")
|
|
print(f"警告: 找不到航向角列 posH 或 posH_x")
|
|
return df_merged
|
|
return df_merged
|
|
-
|
|
|
|
|
|
+
|
|
# 添加方向列
|
|
# 添加方向列
|
|
df_merged['vehicle_direction'] = df_merged[heading_col].apply(get_direction_from_heading)
|
|
df_merged['vehicle_direction'] = df_merged[heading_col].apply(get_direction_from_heading)
|
|
-
|
|
|
|
|
|
+
|
|
# 创建 phaseId 到方向的映射
|
|
# 创建 phaseId 到方向的映射
|
|
phase_to_direction = {
|
|
phase_to_direction = {
|
|
1: 'S', # 南直行
|
|
1: 'S', # 南直行
|
|
@@ -1226,37 +1247,37 @@ class FinalDataProcessor:
|
|
7: 'S', # 南左转
|
|
7: 'S', # 南左转
|
|
8: 'W', # 西左转
|
|
8: 'W', # 西左转
|
|
9: 'N', # 北左转
|
|
9: 'N', # 北左转
|
|
- 10: 'E', # 东左转
|
|
|
|
- 11: 'N', # 北行人
|
|
|
|
- 12: 'E', # 东行人
|
|
|
|
- 13: 'S', # 南右转
|
|
|
|
- 14: 'W', # 西右转
|
|
|
|
- 15: 'N', # 北右转
|
|
|
|
|
|
+ 10: 'E', # 东左转
|
|
|
|
+ 11: 'N', # 北行人
|
|
|
|
+ 12: 'E', # 东行人
|
|
|
|
+ 13: 'S', # 南右转
|
|
|
|
+ 14: 'W', # 西右转
|
|
|
|
+ 15: 'N', # 北右转
|
|
16: 'E' # 东右转
|
|
16: 'E' # 东右转
|
|
}
|
|
}
|
|
-
|
|
|
|
|
|
+
|
|
# 创建 trafficlight_id 到方向的映射
|
|
# 创建 trafficlight_id 到方向的映射
|
|
trafficlight_to_direction = {
|
|
trafficlight_to_direction = {
|
|
# 南向北方向的红绿灯
|
|
# 南向北方向的红绿灯
|
|
- 48100017: 'S',
|
|
|
|
- 48100038: 'S',
|
|
|
|
|
|
+ 48100017: 'S',
|
|
|
|
+ 48100038: 'S',
|
|
48100043: 'S',
|
|
48100043: 'S',
|
|
48100030: 'S',
|
|
48100030: 'S',
|
|
# 西向东方向的红绿灯
|
|
# 西向东方向的红绿灯
|
|
- 48100021: 'W',
|
|
|
|
|
|
+ 48100021: 'W',
|
|
48100039: 'W',
|
|
48100039: 'W',
|
|
# 东向西方向的红绿灯
|
|
# 东向西方向的红绿灯
|
|
- 48100041: 'E',
|
|
|
|
|
|
+ 48100041: 'E',
|
|
48100019: 'E',
|
|
48100019: 'E',
|
|
# 北向南方向的红绿灯
|
|
# 北向南方向的红绿灯
|
|
- 48100033: 'N',
|
|
|
|
- 48100018: 'N',
|
|
|
|
|
|
+ 48100033: 'N',
|
|
|
|
+ 48100018: 'N',
|
|
48100022: 'N'
|
|
48100022: 'N'
|
|
}
|
|
}
|
|
-
|
|
|
|
|
|
+
|
|
# 添加时间列用于合并
|
|
# 添加时间列用于合并
|
|
df_traffic['time'] = df_traffic['simTime'].round(2).astype(float)
|
|
df_traffic['time'] = df_traffic['simTime'].round(2).astype(float)
|
|
-
|
|
|
|
|
|
+
|
|
# 检查 df_merged 中是否有 simTime 列
|
|
# 检查 df_merged 中是否有 simTime 列
|
|
if 'simTime' not in df_merged.columns:
|
|
if 'simTime' not in df_merged.columns:
|
|
print("警告: 合并 Traffic 前 df_merged 中找不到 simTime 列!")
|
|
print("警告: 合并 Traffic 前 df_merged 中找不到 simTime 列!")
|
|
@@ -1267,15 +1288,15 @@ class FinalDataProcessor:
|
|
else:
|
|
else:
|
|
print("严重错误: 无法找到任何 simTime 相关列,无法继续合并!")
|
|
print("严重错误: 无法找到任何 simTime 相关列,无法继续合并!")
|
|
return df_merged
|
|
return df_merged
|
|
-
|
|
|
|
|
|
+
|
|
df_merged['time'] = df_merged['simTime'].round(2).astype(float)
|
|
df_merged['time'] = df_merged['simTime'].round(2).astype(float)
|
|
-
|
|
|
|
|
|
+
|
|
# 合并 Traffic 数据
|
|
# 合并 Traffic 数据
|
|
df_merged = pd.merge(df_merged, df_traffic, on=["time"], how="left")
|
|
df_merged = pd.merge(df_merged, df_traffic, on=["time"], how="left")
|
|
-
|
|
|
|
|
|
+
|
|
# 再次处理可能的列名重复问题
|
|
# 再次处理可能的列名重复问题
|
|
df_merged = clean_duplicate_columns(df_merged)
|
|
df_merged = clean_duplicate_columns(df_merged)
|
|
-
|
|
|
|
|
|
+
|
|
# 检查trafficlight_id列是否存在
|
|
# 检查trafficlight_id列是否存在
|
|
trafficlight_col = 'trafficlight_id'
|
|
trafficlight_col = 'trafficlight_id'
|
|
if trafficlight_col not in df_merged.columns:
|
|
if trafficlight_col not in df_merged.columns:
|
|
@@ -1284,39 +1305,40 @@ class FinalDataProcessor:
|
|
print(f"使用 {trafficlight_col} 替代 trafficlight_id")
|
|
print(f"使用 {trafficlight_col} 替代 trafficlight_id")
|
|
else:
|
|
else:
|
|
print(f"警告: 找不到红绿灯ID列 trafficlight_id 或 trafficlight_id_x")
|
|
print(f"警告: 找不到红绿灯ID列 trafficlight_id 或 trafficlight_id_x")
|
|
-
|
|
|
|
|
|
+
|
|
# 筛选与车辆行驶方向相关的红绿灯
|
|
# 筛选与车辆行驶方向相关的红绿灯
|
|
def filter_relevant_traffic_light(row):
|
|
def filter_relevant_traffic_light(row):
|
|
if 'phaseId' not in row or pd.isna(row['phaseId']):
|
|
if 'phaseId' not in row or pd.isna(row['phaseId']):
|
|
return np.nan
|
|
return np.nan
|
|
-
|
|
|
|
|
|
+
|
|
# 获取 phaseId 对应的方向
|
|
# 获取 phaseId 对应的方向
|
|
phase_id = int(row['phaseId']) if not pd.isna(row['phaseId']) else None
|
|
phase_id = int(row['phaseId']) if not pd.isna(row['phaseId']) else None
|
|
if phase_id is None:
|
|
if phase_id is None:
|
|
return np.nan
|
|
return np.nan
|
|
-
|
|
|
|
|
|
+
|
|
phase_direction = phase_to_direction.get(phase_id, None)
|
|
phase_direction = phase_to_direction.get(phase_id, None)
|
|
-
|
|
|
|
|
|
+
|
|
# 如果 phaseId 方向与车辆方向匹配
|
|
# 如果 phaseId 方向与车辆方向匹配
|
|
if phase_direction == row['vehicle_direction']:
|
|
if phase_direction == row['vehicle_direction']:
|
|
# 查找该方向的所有红绿灯 ID
|
|
# 查找该方向的所有红绿灯 ID
|
|
- relevant_ids = [tid for tid, direction in trafficlight_to_direction.items()
|
|
|
|
- if direction == phase_direction]
|
|
|
|
-
|
|
|
|
|
|
+ relevant_ids = [tid for tid, direction in trafficlight_to_direction.items()
|
|
|
|
+ if direction == phase_direction]
|
|
|
|
+
|
|
# 如果 trafficlight_id 在 EgoMap 中且方向匹配
|
|
# 如果 trafficlight_id 在 EgoMap 中且方向匹配
|
|
- if trafficlight_col in row and not pd.isna(row[trafficlight_col]) and row[trafficlight_col] in relevant_ids:
|
|
|
|
|
|
+ if trafficlight_col in row and not pd.isna(row[trafficlight_col]) and row[
|
|
|
|
+ trafficlight_col] in relevant_ids:
|
|
return row[trafficlight_col]
|
|
return row[trafficlight_col]
|
|
-
|
|
|
|
|
|
+
|
|
return np.nan
|
|
return np.nan
|
|
-
|
|
|
|
|
|
+
|
|
# 应用筛选函数
|
|
# 应用筛选函数
|
|
df_merged['filtered_trafficlight_id'] = df_merged.apply(filter_relevant_traffic_light, axis=1)
|
|
df_merged['filtered_trafficlight_id'] = df_merged.apply(filter_relevant_traffic_light, axis=1)
|
|
-
|
|
|
|
|
|
+
|
|
# 清理临时列
|
|
# 清理临时列
|
|
print(f"删除 time 列前 df_merged 的列: {df_merged.columns.tolist()}")
|
|
print(f"删除 time 列前 df_merged 的列: {df_merged.columns.tolist()}")
|
|
df_merged.drop(columns=['time'], inplace=True)
|
|
df_merged.drop(columns=['time'], inplace=True)
|
|
print(f"删除 time 列后 df_merged 的列: {df_merged.columns.tolist()}")
|
|
print(f"删除 time 列后 df_merged 的列: {df_merged.columns.tolist()}")
|
|
-
|
|
|
|
|
|
+
|
|
# 确保 simTime 列存在
|
|
# 确保 simTime 列存在
|
|
if 'simTime' not in df_merged.columns:
|
|
if 'simTime' not in df_merged.columns:
|
|
if 'simTime_x' in df_merged.columns:
|
|
if 'simTime_x' in df_merged.columns:
|
|
@@ -1324,7 +1346,7 @@ class FinalDataProcessor:
|
|
print("将 simTime_x 重命名为 simTime")
|
|
print("将 simTime_x 重命名为 simTime")
|
|
else:
|
|
else:
|
|
print("警告: 处理 Traffic 数据后找不到 simTime 列!")
|
|
print("警告: 处理 Traffic 数据后找不到 simTime 列!")
|
|
-
|
|
|
|
|
|
+
|
|
print("Traffic light data merged and filtered.")
|
|
print("Traffic light data merged and filtered.")
|
|
except Exception as e:
|
|
except Exception as e:
|
|
print(f"Warning: Could not merge Traffic data from {traffic_path}: {e}")
|
|
print(f"Warning: Could not merge Traffic data from {traffic_path}: {e}")
|
|
@@ -1341,7 +1363,7 @@ class FinalDataProcessor:
|
|
print(f"正在读取 Function 数据: {function_path}")
|
|
print(f"正在读取 Function 数据: {function_path}")
|
|
df_function = pd.read_csv(function_path, low_memory=False).drop_duplicates()
|
|
df_function = pd.read_csv(function_path, low_memory=False).drop_duplicates()
|
|
print(f"Function 数据列名: {df_function.columns.tolist()}")
|
|
print(f"Function 数据列名: {df_function.columns.tolist()}")
|
|
-
|
|
|
|
|
|
+
|
|
# 删除 simFrame 列
|
|
# 删除 simFrame 列
|
|
if 'simFrame' in df_function.columns:
|
|
if 'simFrame' in df_function.columns:
|
|
df_function = df_function.drop(columns=['simFrame'])
|
|
df_function = df_function.drop(columns=['simFrame'])
|
|
@@ -1353,19 +1375,19 @@ class FinalDataProcessor:
|
|
df_function['simTime'] = pd.to_numeric(df_function['simTime'], errors='coerce')
|
|
df_function['simTime'] = pd.to_numeric(df_function['simTime'], errors='coerce')
|
|
df_function = df_function.dropna(subset=['simTime']) # 删除无法转换的行
|
|
df_function = df_function.dropna(subset=['simTime']) # 删除无法转换的行
|
|
df_function['time'] = df_function['simTime'].round(2)
|
|
df_function['time'] = df_function['simTime'].round(2)
|
|
-
|
|
|
|
|
|
+
|
|
# 安全地处理 df_merged 的 simTime 列
|
|
# 安全地处理 df_merged 的 simTime 列
|
|
if 'simTime' in df_merged.columns:
|
|
if 'simTime' in df_merged.columns:
|
|
print(f"df_merged['simTime'] 的类型: {df_merged['simTime'].dtype}")
|
|
print(f"df_merged['simTime'] 的类型: {df_merged['simTime'].dtype}")
|
|
print(f"df_merged['simTime'] 的前5个值: {df_merged['simTime'].head().tolist()}")
|
|
print(f"df_merged['simTime'] 的前5个值: {df_merged['simTime'].head().tolist()}")
|
|
-
|
|
|
|
|
|
+
|
|
df_merged['time'] = pd.to_numeric(df_merged['simTime'], errors='coerce').round(2)
|
|
df_merged['time'] = pd.to_numeric(df_merged['simTime'], errors='coerce').round(2)
|
|
# 删除 time 列中的 NaN 值
|
|
# 删除 time 列中的 NaN 值
|
|
nan_count = df_merged['time'].isna().sum()
|
|
nan_count = df_merged['time'].isna().sum()
|
|
if nan_count > 0:
|
|
if nan_count > 0:
|
|
print(f"警告: 转换后有 {nan_count} 个 NaN 值,将删除这些行")
|
|
print(f"警告: 转换后有 {nan_count} 个 NaN 值,将删除这些行")
|
|
df_merged = df_merged.dropna(subset=['time'])
|
|
df_merged = df_merged.dropna(subset=['time'])
|
|
-
|
|
|
|
|
|
+
|
|
# 确保两个 DataFrame 的 time 列类型一致
|
|
# 确保两个 DataFrame 的 time 列类型一致
|
|
df_function['time'] = df_function['time'].astype(float)
|
|
df_function['time'] = df_function['time'].astype(float)
|
|
df_merged['time'] = df_merged['time'].astype(float)
|
|
df_merged['time'] = df_merged['time'].astype(float)
|
|
@@ -1402,7 +1424,7 @@ class FinalDataProcessor:
|
|
print(f"正在读取 OBU 数据: {obu_path}")
|
|
print(f"正在读取 OBU 数据: {obu_path}")
|
|
df_obu = pd.read_csv(obu_path, low_memory=False).drop_duplicates()
|
|
df_obu = pd.read_csv(obu_path, low_memory=False).drop_duplicates()
|
|
print(f"OBU 数据列名: {df_obu.columns.tolist()}")
|
|
print(f"OBU 数据列名: {df_obu.columns.tolist()}")
|
|
-
|
|
|
|
|
|
+
|
|
# 删除 simFrame 列
|
|
# 删除 simFrame 列
|
|
if 'simFrame' in df_obu.columns:
|
|
if 'simFrame' in df_obu.columns:
|
|
df_obu = df_obu.drop(columns=['simFrame'])
|
|
df_obu = df_obu.drop(columns=['simFrame'])
|
|
@@ -1414,19 +1436,19 @@ class FinalDataProcessor:
|
|
df_obu['simTime'] = pd.to_numeric(df_obu['simTime'], errors='coerce')
|
|
df_obu['simTime'] = pd.to_numeric(df_obu['simTime'], errors='coerce')
|
|
df_obu = df_obu.dropna(subset=['simTime']) # 删除无法转换的行
|
|
df_obu = df_obu.dropna(subset=['simTime']) # 删除无法转换的行
|
|
df_obu['time'] = df_obu['simTime'].round(2)
|
|
df_obu['time'] = df_obu['simTime'].round(2)
|
|
-
|
|
|
|
|
|
+
|
|
# 安全地处理 df_merged 的 simTime 列
|
|
# 安全地处理 df_merged 的 simTime 列
|
|
if 'simTime' in df_merged.columns:
|
|
if 'simTime' in df_merged.columns:
|
|
print(f"合并 OBU 前 df_merged['simTime'] 的类型: {df_merged['simTime'].dtype}")
|
|
print(f"合并 OBU 前 df_merged['simTime'] 的类型: {df_merged['simTime'].dtype}")
|
|
print(f"合并 OBU 前 df_merged['simTime'] 的前5个值: {df_merged['simTime'].head().tolist()}")
|
|
print(f"合并 OBU 前 df_merged['simTime'] 的前5个值: {df_merged['simTime'].head().tolist()}")
|
|
-
|
|
|
|
|
|
+
|
|
df_merged['time'] = pd.to_numeric(df_merged['simTime'], errors='coerce').round(2)
|
|
df_merged['time'] = pd.to_numeric(df_merged['simTime'], errors='coerce').round(2)
|
|
# 删除 time 列中的 NaN 值
|
|
# 删除 time 列中的 NaN 值
|
|
nan_count = df_merged['time'].isna().sum()
|
|
nan_count = df_merged['time'].isna().sum()
|
|
if nan_count > 0:
|
|
if nan_count > 0:
|
|
print(f"警告: 转换后有 {nan_count} 个 NaN 值,将删除这些行")
|
|
print(f"警告: 转换后有 {nan_count} 个 NaN 值,将删除这些行")
|
|
df_merged = df_merged.dropna(subset=['time'])
|
|
df_merged = df_merged.dropna(subset=['time'])
|
|
-
|
|
|
|
|
|
+
|
|
# 确保两个 DataFrame 的 time 列类型一致
|
|
# 确保两个 DataFrame 的 time 列类型一致
|
|
df_obu['time'] = df_obu['time'].astype(float)
|
|
df_obu['time'] = df_obu['time'].astype(float)
|
|
df_merged['time'] = df_merged['time'].astype(float)
|
|
df_merged['time'] = df_merged['time'].astype(float)
|
|
@@ -1457,9 +1479,8 @@ class FinalDataProcessor:
|
|
|
|
|
|
# 在所有合并完成后,再次清理重复列
|
|
# 在所有合并完成后,再次清理重复列
|
|
df_merged = clean_duplicate_columns(df_merged)
|
|
df_merged = clean_duplicate_columns(df_merged)
|
|
-
|
|
|
|
- return df_merged
|
|
|
|
|
|
|
|
|
|
+ return df_merged
|
|
|
|
|
|
def _process_trafficlight_data(self) -> pd.DataFrame:
|
|
def _process_trafficlight_data(self) -> pd.DataFrame:
|
|
"""Processes traffic light JSON data if available."""
|
|
"""Processes traffic light JSON data if available."""
|
|
@@ -1540,12 +1561,12 @@ class FinalDataProcessor:
|
|
print(f"Processed {len(df_trafficlights)} unique traffic light state entries.")
|
|
print(f"Processed {len(df_trafficlights)} unique traffic light state entries.")
|
|
# 按时间升序排序 - 修复倒序问题
|
|
# 按时间升序排序 - 修复倒序问题
|
|
df_trafficlights = df_trafficlights.sort_values('simTime', ascending=True)
|
|
df_trafficlights = df_trafficlights.sort_values('simTime', ascending=True)
|
|
-
|
|
|
|
|
|
+
|
|
# 添加调试信息
|
|
# 添加调试信息
|
|
print(f"交通灯数据时间范围: {df_trafficlights['simTime'].min()} 到 {df_trafficlights['simTime'].max()}")
|
|
print(f"交通灯数据时间范围: {df_trafficlights['simTime'].min()} 到 {df_trafficlights['simTime'].max()}")
|
|
print(f"交通灯数据前5行时间: {df_trafficlights['simTime'].head().tolist()}")
|
|
print(f"交通灯数据前5行时间: {df_trafficlights['simTime'].head().tolist()}")
|
|
print(f"交通灯数据后5行时间: {df_trafficlights['simTime'].tail().tolist()}")
|
|
print(f"交通灯数据后5行时间: {df_trafficlights['simTime'].tail().tolist()}")
|
|
-
|
|
|
|
|
|
+
|
|
return df_trafficlights
|
|
return df_trafficlights
|
|
|
|
|
|
except json.JSONDecodeError as e:
|
|
except json.JSONDecodeError as e:
|
|
@@ -1555,6 +1576,7 @@ class FinalDataProcessor:
|
|
print(f"Unexpected error processing traffic light data: {e}")
|
|
print(f"Unexpected error processing traffic light data: {e}")
|
|
return pd.DataFrame()
|
|
return pd.DataFrame()
|
|
|
|
|
|
|
|
+
|
|
# --- Rosbag Processing ---
|
|
# --- Rosbag Processing ---
|
|
class RosbagProcessor:
|
|
class RosbagProcessor:
|
|
"""Extracts data from Rosbag files within a ZIP archive."""
|
|
"""Extracts data from Rosbag files within a ZIP archive."""
|
|
@@ -1617,7 +1639,6 @@ class RosbagProcessor:
|
|
if not target_rostopic:
|
|
if not target_rostopic:
|
|
print("Skipping Rosbag processing as no target topic was identified.")
|
|
print("Skipping Rosbag processing as no target topic was identified.")
|
|
|
|
|
|
-
|
|
|
|
with tempfile.TemporaryDirectory() as tmp_dir_str:
|
|
with tempfile.TemporaryDirectory() as tmp_dir_str:
|
|
tmp_dir = Path(tmp_dir_str)
|
|
tmp_dir = Path(tmp_dir_str)
|
|
bag_files_extracted = []
|
|
bag_files_extracted = []
|