pandas读取csv,按指定字段筛选数据

@[TOC]pandas读取csv,按指定字段筛选数据

需求

读取csv内容,按指定字段筛选数据
把数据写入模板里,输出到csv文件

代码说明

1.数据清洗
dataframe格式数据清洗:
data_extract_level4 = data_read[(data_read[‘linkMaxRate’] > 0.5) & (data_read[‘linkMaxRate’] <= 0.6)]
2.字符串去除空格
strip()方法
比如 NEName = NEName[0].strip()
3.遍历dataframe的行数据

        for index, row in data_cleaning.iterrows():
            id = row['id'],
            reportTime = row['reportTime'],

4.获取今天
today = time.strftime(‘%Y%m%d’, time.localtime(time.time()))

5.写入脚本所在路径

script_path = os.path.dirname(__file__)
        self.filename = script_path+"/OLT上联端口CRC越限告警明细%s.csv" % (today)
        self.filenameOK = script_path+"/OLT上联端口CRC越限告警明细%s.csv.ok" % (today)

6.把文件生成到脚本目录下
cmd = "cp -f %s %s " % (self.filename, self.filenameOK)
ret = os.system(cmd)
self.logger.info(str(ret))
7.拼接成dataframe格式数据,输出到excel

#statistics_choice是前面筛选的数据,这里不做展示
col=['序号','日期','发生时间','专业','问题省份','主题','问题描述','隐患分类','隐患重要性','来源','地市','网元名称']
data_lists=[]
i=0
#输出结果
if len(statistics_choice):
    for index,row in statistics_choice.iterrows():
        i+=1
        order='YH-'+'002-'+ds+'-%04d'%i
        major=row['SpecialtyLv2']
        province=row['AlarmProvince']
        title='《%s》超频性能告警'%row['AlarmTitle']
        problem_des='%s性能告警本周期内频发%d次'%(row['AlarmTitle'],row['quantity'])
        sort='性能类'
        level='重要'
        source='告警'
        city=row['AlarmRegion']
        ne_name=row['NeName']
        data_list=[order,d,dt,major,province,title,problem_des,sort,level,source,city,ne_name]
        data_lists.append(data_list)
    result=pd.DataFrame(data_lists,columns=col)
    result.to_excel(excel_writer=r"超频性能告警输出结果%s.xlsx"%ds, index=False)
    print('结果输出成功!')
else:
    print('无符合条件输出!')

全部代码

import csv
import os
import time

import pandas as pd
from datetime import date
from datetime import datetime
from datetime import timedelta
from getLog import FinalLogger

date_list = []
data_lists = []
s = date.today().strftime(“%Y%m%d”)

class warningProcess():
   def __init__(self):
   			self.logger = FinalLogger.getLogger()
            self.fileName = ""
       	    self.totalWarningCnt = 0

def clientToServer(self, strData):
    today = time.strftime('%Y%m%d', time.localtime(time.time()))
    # self.filename = "/data/FBAndIPTVsocket/result/sourceData/OLTUOOCRCAlarm6%s.csv" % (today)
    # self.filenameOK = "/data/FBAndIPTVsocket/result/sourceDataOK/OLTUOOCRCAlarm6%s.csv.ok" % (today)
    script_path=os.path.dirname(__file__)
    self.filename = script_path+"/OLT上联单端口带宽利用率越限明细%s.csv" % (today)
    self.filenameOK = script_path+"/OLT上联单端口带宽利用率越限明细%s.csv.ok" % (today)

    with open(self.filename.encode('utf-8'), "a") as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow([strData])
    with open(self.filenameOK.encode('utf-8'), "a") as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow([strData])

def alarmListingTemplate(self, data_cleaning):

    for index, row in data_cleaning.iterrows():
        id = row['id'],
        dataTime = row['dataTime'],
        NEName = str(row['NEName']),
        NEIP = row['NEIP'],
        devicePort = row['devicePort'],
        linkMaxRate = row['linkMaxRate'],
        manufacturerName = row['manufacturerName'],
        alarmClass = row['alarmClass']

        id = id[0]
        dataTime = dataTime[0]
        NEName = NEName[0].strip()
        NEIP = NEIP[0]
        devicePort = devicePort[0]
        linkMaxRate = linkMaxRate[0]
        manufacturerName = manufacturerName[0]

        if linkMaxRate >= 0.6 and linkMaxRate < 1.0:
            fileType = "OLT上联链路单端口带宽利用率高于一级门限"
            VendorSeverity = "三级告警"
            AlarmText = "%s,%s设备:%s端口:%s,OLT上联链路带宽利用率高于%s,产生此告警,峰值利用率为:%5.3f" % (
                NEName, manufacturerName, NEIP, devicePort, "60%", linkMaxRate)
        elif linkMaxRate >= 0.5 and linkMaxRate < 0.6:
            fileType = "OLT上联链路单端口带宽利用率高于二级门限"
            VendorSeverity = "四级告警"
            AlarmText = "%s,%s设备:%s端口:%s,OLT上联链路带宽利用率高于%s,产生此告警,峰值利用率为:%5.3f" % (
                NEName, manufacturerName, NEIP, devicePort, "60%", linkMaxRate)

        if manufacturerName == "中兴":
            HWBaermanufacturer = "中兴"
        else:
            HWBaermanufacturer = "华为贝尔"

        strData1 = ""
        strData2 = "IntVersion:V1.0.0"
        strData3 = "MsgSerial:"
        strData4 = "AlarmUniqueId:" + str(id)
        strData5 = "AlarmUniqueClearId:"
        strData6 = "NeName:" + NEIP
        strData7 = "NeIp:"
        strData8 = "SystemName:集中性能系统"
        strData9 = "EquipmentClass:OLT"
        strData10 = "Version:V1.0"
        strData11 = "LocateNeName:" + str(alarmClass)
        strData12 = "LocateNeType:"
        strData13 = "LocateInfo:000000000"
        strData14 = "EventTime:" + str(dataTime)
        strData15 = "CancelTime:"
        strData16 = "VendorSeverity:%s" % VendorSeverity
        strData17 = "VendorAlarmId:000-000-50-900039"
        strData18 = "AlarmTitle:" + fileType
        strData19 = "ProbableCauseTxt:"
        strData20 = "AlarmText:" + AlarmText
        strData21 = "AlarmStatus:1"
        strData22 = "Vendor:" + HWBaermanufacturer
        strData23 = "maintain_group:" + HWBaermanufacturer + "设备维护组"
        strData24 = ""
        strDataU8 = "\r\n" + strData1 + "\r\n" + strData2 + "\r\n" + strData3 + "\r\n" + strData4 + "\r\n" + strData5 + "\r\n" + strData6 + "\r\n" + strData7 + "\r\n" + strData8 + "\r\n" + strData9 + "\r\n" + strData10 + "\r\n" + strData11 + "\r\n" + strData12 + "\r\n" + strData13 + "\r\n" + strData14 + "\r\n" + strData15 + "\r\n" + strData16 + "\r\n" + strData17 + "\r\n" + strData18 + "\r\n" + strData19 + "\r\n" + strData20 + "\r\n" + strData21 + "\r\n" + strData22 + "\r\n" + strData23 + "\r\n" + strData24 + "\r\n"
        self.logger.info("生成告警模块")
        self.logger.info(strDataU8)
        self.clientToServer(strDataU8)
        self.totalWarningCnt = self.totalWarningCnt + 1

def data_to_excel(self, data_cleaning, data_lists):
    for index, row in data_cleaning.iterrows():
        id = row['id']
        reportTime = row['reportTime']
        manufacturerName = row['manufacturerName']
        startTime = row['startTime']
        oltIp = row['oltIp']
        upLinkPort = row['upLinkPort']
        CRC = row['CRC']
        oltName = row['oltName']
        data_list = [id, reportTime, manufacturerName, startTime, oltIp, upLinkPort, CRC, oltName]
        data_lists.append(data_list)

        # 输出到excel
        # col = ['id', 'reportTime', 'manufacturerName', 'startTime', 'oltIp', 'upLinkPort', 'CRC', 'oltName']
        # result = pd.DataFrame(data_lists, columns=col)
        # result.to_excel(excel_writer=r"周期内上联端口CRC大于100的次数大于10次告警输出结果%s.xlsx" % ds, index=False)
        # print('结果输出成功!')

def read_data(self):
    # 读取数据
    data_read = pd.read_csv(r'OLT上联单端口带宽利用率越限明细.csv', engine='python')
    # 数据清洗
    data_extract_level4 = data_read[(data_read['linkMaxRate'] > 0.5) & (data_read['linkMaxRate'] <= 0.6)]
    data_extract_level3 = data_read[(data_read['linkMaxRate'] > 0.6) & (data_read['linkMaxRate'] <= 1)]

    self.alarmListingTemplate(data_extract_level4)
    self.alarmListingTemplate(data_extract_level3)

    if self.totalWarningCnt > 0:
        print(self.filename)
        cmd = "cp -f %s %s " % (self.filename, self.filenameOK)
        ret = os.system(cmd)
        self.logger.info(str(ret))
    else:
        self.logger.info("当前告警数为:%d" % self.totalWarningCnt)
if __name__ == '__main__':
	WarningProcess = warningProcess()
	WarningProcess.read_data()

数据

在这里插入图片描述

你可能感兴趣的:(pandas,python,python,数据挖掘,机器学习)