“臭氧8小时”是一个滑动平均值,是将0-23小时中从0-7开始每8小时计算一个均值作为7时的值,1-8计算的均值作为8时的值,以此类推滑动计算。
gpb1 = df1.groupby(groupbylist)['O3']
for g_name, g_data in gpb1:
# -*- coding: utf-8 -*-
'''
@Description: 批量读取txt文件,计算指定字段的均值,臭氧8小时滑动平均值
小时值 :维度(年、月、日、时),四个指标,几个站的平均
日值:维度(站、年、月、日),四个指标,0-23时的平均(NO、NO2、NOX、O3),8小时滑动平均(O3),日最大8小时滑动平均(O3)
'''
import pandas as pd
import numpy as np
import glob
# another one,速度更快
# 输出结果 不与原始数据等长,假设原数据为m,平滑步长为t,则输出数据为m-t+1
'''
def movingaverage(data, window_size):
cumsum_vec = np.cumsum(np.insert(data, 0, 0))
ma_vec = (cumsum_vec[window_size:] - cumsum_vec[:-window_size]) / window_size
return ma_vec
'''
#修改为: 传入序列index和数据data,以及窗口大小window_size,以便计算出对应序列的滑动平均值。
#这是为了应对有些数据丢失的情况,序列不是从0开始的情况。
#输出按窗口大小偏移后的序列值及其对应的滑动平均值
def movingaverage(index, data, window_size):
cumsum_vec = np.cumsum(np.insert(data, 0, 0))
ma_vec = (cumsum_vec[window_size:] - cumsum_vec[:-window_size]) / window_size
result = []
for i in range(len(ma_vec)):
index1 = index[window_size-1+i]
index1.append(ma_vec[i])
result.append(index1)
return result
filesDir = input("请输入要计算的txt文件所在的路径(如e:\\data1):")
if filesDir == '':
filesDir=r'E:\xiaohong\O3'
filewr = filesDir+r'\O3_statistic.xlsx'
print('创建输出文件:', filewr)
writer = pd.ExcelWriter(filewr, mode='a', engine='openpyxl') #可以追加写入 # pylint: disable=abstract-class-instantiated
try:
for file in glob.glob(filesDir+r'\*.txt'):
fileName = file.split('\\')[-1]
print('处理文件:', file)
hour_startrow=0
day_startrow=0
df1 = pd.read_table(file,encoding='GBK')
print('计算到小时维度的均值')
groupbylist = ['年','月','日','时']
pi_hour = pd.pivot_table(df1, index=groupbylist, values=['NO','NO2','NOX','O3'], aggfunc=np.mean).reset_index().sort_index(axis=0)
print('写入小时excel')
pi_hour.to_excel(writer, sheet_name=fileName+'小时',startrow=hour_startrow)
writer.save()
print('计算各站每日O3的滑动平均')
groupbylist = ['站点','年','月','日']
smoothO3_data = []
'''
使用旧movingaverage只计算滑动平均,然后和g_name、小时拼起来
gpb1 = df1.groupby(groupbylist)['O3']
for g_name, g_data in gpb1:
smoothO3 = movingaverage(g_data.values, 8)
for i in range(0,17):
g_name_list = list(g_name)
g_name_list.append(i+7)
g_name_list.append(smoothO3[i])
smoothO3_data.append(g_name_list)
'''
#使用新movingaverage,先将g_name和小时拼接为smoothO3_index传入函数,
#再计算对应smoothO3_index的滑动平均值,
#返回计算并拼接好的数据
gpb1 = df1.groupby(groupbylist)[['时','O3']]
for g_name, g_data in gpb1:
smoothO3_index = []
for i in range(len(g_data)):
l_name = list(g_name)
l_name.append(g_data['时'].values[i])
smoothO3_index.append(l_name)
smoothO3 = movingaverage(smoothO3_index, g_data['O3'].values, 8)
smoothO3_data.extend(smoothO3)
df_smoothO3 = pd.DataFrame(smoothO3_data, columns=['站点','年','月','日','时','O3滑动平均'])
print('写入小时excel')
df_smoothO3.to_excel(writer, sheet_name=fileName+'小时',startrow=hour_startrow, startcol=10)
writer.save()
#计算下次追加记录的起始行
hour_startrow = hour_startrow + pi_hour.shape[0] + 10
print('计算到各站日维度的均值')
pi_day = pd.pivot_table(df1, index=groupbylist, values=['NO','NO2','NOX','O3'], aggfunc=np.mean).reset_index()
print('写入日excel')
pi_day.to_excel(writer, sheet_name=fileName+'日',startrow=day_startrow)
writer.save()
print('计算各站每日O3的滑动平均最大值')
pi_O3_max = pd.pivot_table(df_smoothO3, index=groupbylist, values=['O3滑动平均'], aggfunc=max).reset_index()
pi_O3_max.rename(columns={'O3滑动平均':'O3滑动平均最大'},inplace=True)
print('写入excel')
pi_O3_max.to_excel(writer, sheet_name=fileName+'日',startrow=day_startrow, startcol=10)
writer.save()
#计算下次追加记录的起始行
day_startrow = day_startrow + df_smoothO3.shape[0] + 10
except IndexError as e1:
print('***Error happend at:', g_name, e1)
writer.close()