通过python对多个文件进行遍历,主要思路:
'''
#质量控制+核查经纬度+筛选站点+得到相同相同站点导入mysql
'''
data = []
for i in range(len(l)):
io = l[i]
data.append(pd.read_table(io, sep='\s+', header=None))
print(l)
station=[]
for i in range(len(data)):
dat1 = data[i][data[i][8].isin([0])] # 1852
data1 = dat1.fillna(dat1.mean())
data2=data1.dropna(axis=0, how='any').reset_index(drop=True)
date = []
for z in range(len(data2)):
year = data2[4][z]
month = data2[5][z]
day = data2[6][z]
date.append(datetime(year, month, day).strftime('%Y-%m-%d'))
data2['dateYM'] = date
data2.drop(columns=[4, 5, 6], inplace=True)
order = ['dateYM', 0, 1, 2, 3, 7, 8]
df = data2[order].rename(columns={0: '区站号', 1: 'lat', 2: 'lon', 3: '观测场拔海高度', 7: '日照时数', 8: '质量控制码'})
df['lat'] = df['lat'].apply(lambda x: x / 100)
df['lon'] = df['lon'].apply(lambda x: x / 100)
df['日照时数'] = df['日照时数'].apply(lambda x: x * 0.1)
df['观测场拔海高度'] = df['观测场拔海高度'].apply(lambda x: x * 0.1)
#核查经纬度
lat1 = []
lon1 = []
for j in range(len(df)):
lat1.append(list(df[df['区站号'][j] == df['区站号']]['lat'])[0])
lon1.append(list(df[df['区站号'][j] == df['区站号']]['lon'])[0])
df['lat'] = lat1
df['lon'] = lon1
注:经纬度需要转化以及检查是否与区站号一致
自己参考写代码岂不是更香,本人能力有限,上面仅主要参考代码。