os-walk|reset_index|slice

Python中os.walk()的使用方法 - 晓伟的文章 - 知乎
https://zhuanlan.zhihu.com/p/149824829
如何重置index?

data.reset_index(drop=True,inplace = True)  ## 先把之前的index去掉~
time_list = pd.date_range(start = "2019/12/10 17:30",end = "2019/12/24 21:59:59.8",freq = "0.2S")
data.index.name = "time_"
data = data.set_index(time_list)

如果index是日期加时间,列是通量,那么如何获得每个小时的平均值?

data["hour"] = pd.to_datetime(data.index).hour
transform = lambda x:x.to_pydatetime().replace(hour=0)
data = data.set_index([data.index,data['hour']]) 

cal_q=lambda x:x.describe(percentiles=[0.05,0.25,.5,.75, .95])
data.loc[:,"Flux"].unstack().apply(cal_q).to_csv("Flux.csv")

如果想对长数据进行切片并输出?

import numpy as np
import pandas as pd
import matplotlib as mpl
import os
import math
import glob
import datetime
import matplotlib.dates as mdates
from matplotlib import pyplot as plt
from datetime import timedelta

def control_data_level(data,n):

    time_list = pd.date_range(start='2019-12-24 22:00',end='2020-1-1 6:30',freq='30T')
    
    for ith, star_t in enumerate(time_list[:]):
            end_t = star_t + timedelta(minutes=30)-timedelta(seconds=0.2)
            print(star_t, end_t)
                
            sub_df = data.loc[star_t:end_t]  
            sub_df.to_csv(r"D:\python\inter-1-"+datetime.datetime.strftime(star_t,'%Y-%m-%d_%H%M')+'.csv')
            
            fig,axs = plt.subplots(nrows=6,ncols=1,figsize=(30,20))
            mpl.rcParams['font.size'] = 15
            mpl.rcParams['font.weight'] = 'bold'
            mpl.rcParams['font.sans-serif']=['Arial']
            
            axs[0].plot(sub_df.index,sub_df["SCsize_tmp"],"k-o",lw=1,markerfacecolor='w')
            axs[1].plot(sub_df.index,sub_df["SizeBC_tmp"],"b-o",lw=1,markerfacecolor='w')
            axs[2].plot(sub_df.index,sub_df["massBC"],"r-o",lw=1,markerfacecolor='w')
            axs[3].plot(sub_df.index,sub_df["Ux_12m"],"k-o",lw=1,markerfacecolor='w')
            axs[4].plot(sub_df.index,sub_df["Uy_12m"],"b-o",lw=1,markerfacecolor='w')
            axs[5].plot(sub_df.index,sub_df["Uz_12m"],"r-o",lw=1,markerfacecolor='w')
            
            
            axs[0].axhline(sub_df["SCsize_tmp"].mean()+3.5*sub_df["SCsize_tmp"].std(),c="r",ls="--",lw=1.5)
            axs[0].axhline(sub_df["SCsize_tmp"].mean()-3.5*sub_df["SCsize_tmp"].std(),c="r",ls="--",lw=1.5)
            axs[1].axhline(sub_df["SizeBC_tmp"].mean()+3.5*sub_df["SizeBC_tmp"].std(),c="r",ls="--",lw=1.5)
            axs[1].axhline(sub_df["SizeBC_tmp"].mean()-3.5*sub_df["SizeBC_tmp"].std(),c="r",ls="--",lw=1.5)
            axs[2].axhline(sub_df["massBC"].mean()+3.5*sub_df["massBC"].std(),c="r",ls="--",lw=1.5)
            axs[2].axhline(sub_df["massBC"].mean()-3.5*sub_df["massBC"].std(),c="r",ls="--",lw=1.5)
            axs[3].axhline(sub_df["Ux_12m"].mean()+3.5*sub_df["Ux_12m"].std(),c="r",ls="--",lw=1.5)
            axs[3].axhline(sub_df["Ux_12m"].mean()-3.5*sub_df["Ux_12m"].std(),c="r",ls="--",lw=1.5)
            axs[4].axhline(sub_df["Uy_12m"].mean()+3.5*sub_df["Uy_12m"].std(),c="r",ls="--",lw=1.5)
            axs[4].axhline(sub_df["Uy_12m"].mean()-3.5*sub_df["Uy_12m"].std(),c="r",ls="--",lw=1.5)
            axs[5].axhline(sub_df["Uz_12m"].mean()+5*sub_df["Uz_12m"].std(),c="r",ls="--",lw=1.5)
            axs[5].axhline(sub_df["Uz_12m"].mean()-5*sub_df["Uz_12m"].std(),c="r",ls="--",lw=1.5)
            
            
            
            axs[5].set_xlabel("time")

            axs[0].set_ylabel("SC num conc.(#/(cm$^3$))")
            axs[1].set_ylabel("BC num conc.(#/(cm$^3$))")
            axs[2].set_ylabel("BC mass conc.(ng/m$^3$)")
            axs[3].set_ylabel("u(m/s)")
            axs[4].set_ylabel("v(m/s)")
            axs[5].set_ylabel("w(m/s)")           
            
            for ax in axs:
                    ax.xaxis.set_major_formatter(mdates.DateFormatter("%Y-%m-%d %H:%M"))
                    ax.set_xlim(sub_df.index[0],sub_df.index[-1])

            outf=r'D:\python\18000-02s-pic-'+datetime.datetime.strftime(star_t,'%Y-%m-%d_%H%M')+'.png'
            fig.savefig(outf,bbox_inches = 'tight') 
    
    nan_count    = data.resample("0.5H").count()
    first_mean   = data.resample("0.5H").mean()
    first_std    = data.resample("0.5H").std()
    first_median =  data.resample("0.5H").median()
    
    writer = pd.ExcelWriter(r'D:\python\18000_02s-describe_'+str(n)+'.xlsx')
    nan_count.to_excel(writer, sheet_name="nan_count")
    first_mean.to_excel(writer, sheet_name="first_mean")
    first_std.to_excel(writer, sheet_name="first_std") 
    first_median.to_excel(writer, sheet_name="first_median")
    writer.save()

你可能感兴趣的:(os-walk|reset_index|slice)