有两个EXCEL表格数据,其中一个是从遥感数据中提取的变量数据,另一个是站点数据,依据站点ID(Station_Id_d),数据获取日期(Date)和数据获取时间(Time)进行两个表格数的匹配,将匹配好的数据存入新的表格,作为训练样本数据。
import pandas as pd
# Read the first Excel file into a pandas DataFrame
df1 = pd.read_excel(r"F:\Solar_Radiance\BSRN_SSR\EXCEL_data\Factor_Extraction_data\54221_interpolation_NYR_Timechange.xlsx")
# Read the second Excel file into a pandas DataFrame
df2 = pd.read_excel(r"F:\Solar_Radiance\BSRN_SSR\EXCEL_data\Station_data\finaldata\station_data.xlsx")
# Merge the two DataFrames based on date, time, and station ID columns
df_merged = pd.merge(df1, df2, on=["Date", "Time", "Station_Id_d"])
# Drop any rows that contain missing values
df_merged = df_merged.dropna()
# Select the required columns from the merged DataFrame
selected_cols = ["QRA_Avg_Hour", "DRA_Avg_Hour", "SRA_Avg_Hour"]
df_selected = df_merged[selected_cols]
# Add the selected columns to the original DataFrame
df1[selected_cols] = df_selected[selected_cols]
# Save the updated DataFrame to a new Excel file
df1.to_excel("F:/Solar_Radiance/BSRN_SSR/Train_Validation_data/54221.xlsx", index=False)