python panda包操作excel(写入) 读取csv文件/toList/排序/获取指定行

1.读取csv文件,转成list ([[xx],[xx]]),按日期列排序,获取最后一行的数据,写入excel.

2.代码:

# -*- coding:utf-8 -*-
import requests
from bs4 import BeautifulSoup
import pandas as pd
from urllib import urlencode
from xml.dom import minidom
from xlwt import *
import numpy as np
import MySQLdb
import datetime
import time
import sys
reload(sys)
sys.setdefaultencoding('utf8')

# 生成excel 写入指定内容
def write_excel():
    w = Workbook(encoding='utf-8')
    # 第一個sheet頁
    ws = w.add_sheet('N. China Ports In&Out')
    # 第一行 煤的种类
    ws.write(0, 0, 'Inbound Rail & Shipment at N. China Ports')
    ws.write(1, 0, 'Unit: 10k tons')
    ws.write(2, 0, '铁路调入量(万吨)')
    ws.write(2, 13, '吞吐量(万吨)')
    # 第4行
    ws.write(3, 0, 'Date')
    ws.write(3, 1, 'QHD')
    ws.write(3, 2, 'SDIC JT')
    ws.write(3, 3, 'Old JT  ')
    ws.write(3, 4, '36-40 JT')
    ws.write(3, 5, 'SDIC CFD')
    ws.write(3, 6, 'CFD2')
    ws.write(3, 7, 'Huaneng CFD')
    ws.write(3, 8, 'HH')
    ws.write(3, 9, 'TOTAL')
    ws.write(3, 10, 'Inbound\nRail\n5dma')
    ws.write(3, 11, ' ')

    ws.write(3, 12, 'Date')
    ws.write(3, 13, 'QHD')
    ws.write(3, 14, 'SDIC JT')
    ws.write(3, 15, 'Old JT  ')
    ws.write(3, 16, '36-40 JT')
    ws.write(3, 17, 'SDIC CFD')
    ws.write(3, 18, 'CFD2')
    ws.write(3, 19, 'Huaneng CFD')
    ws.write(3, 20, 'HH')
    ws.write(3, 21, 'TOTAL')
    ws.write(3, 22, 'Inbound\nRail\n5dma')
    ws.write(3, 23, ' ')

    ws.write(3, 24, 'Date')
    ws.write(3, 25, 'INBOUND RAIL')
    ws.write(3, 26, 'SHIPMENT')
    ws.write(3, 27, 'YEAR')
    ws.write(3, 28, 'MONTH')
    ws.write(3, 29, 'WEEK')
    ws.write(3, 30, 'DAY')
    ws.write(3, 31, ' ')
    ws.write(3, 32, 'SHIPMENT\n5dma')
    ws.write(3, 33, ' ')
    ws.write(3, 34, 'Date')
    ws.write(3, 35, 'Implied Daqin\nRailway')
    ws.write(3, 36, 'Implied Mengji\nRailway')
    ws.write(3, 37, 'Implied Shuohuang\nRailway')
    ws.write(3, 38, 'Daqin Railway\n5dma')
    ws.write(3, 39, 'Mengji Railway\n5dma')
    ws.write(3, 40, 'Shuohuang Railway\n5dma')

    # 第五行 中文描述
    ws.write(4, 1, '秦皇岛港')
    ws.write(4, 2, '国投京唐港')
    ws.write(4, 3, '京唐老港')
    ws.write(4, 4, '36-40码头')
    ws.write(4, 5, '国投曹妃甸港')
    ws.write(4, 6, '曹妃甸二期')
    ws.write(4, 7, '华能曹妃甸')
    ws.write(4, 8, '黄骅港')
    ws.write(4, 9, '')

    ws.write(4, 13, '秦皇岛港')
    ws.write(4, 14, '国投京唐港')
    ws.write(4, 15, '京唐老港')
    ws.write(4, 16, '36-40码头')
    ws.write(4, 17, '国投曹妃甸港')
    ws.write(4, 18, '曹妃甸二期')
    ws.write(4, 19, '华能曹妃甸')
    ws.write(4, 20, '黄骅港')
    ws.write(4, 21, '')
    # 獲取當天日期
    # 寫入數據
    today = datetime.date.today()
    qinghuangdaoList = read_cvs()
    guotoujingtanggangList = read_guotoujingtanggangcvs()
    jingtanglaogangList = read_jingtanglaogangcvs()
    read_36_40Lsit = read_36_40cvs()
    guotoucaofeiList = read_guotoucaofeicvs()
    caofeierqiList = read_caofeierqicvs()
    huanengcaofeierqiLsit = read_huanengcaofeierqicvs()
    huanghuaList = read_huanghuacvs()
    # 铁路调入量(万吨)
    ws.write(5, 0, today.__str__())
    ws.write(5, 1, qinghuangdaoList[8])
    ws.write(5, 2, guotoujingtanggangList[6])
    ws.write(5, 3, jingtanglaogangList[5])
    ws.write(5, 4, read_36_40Lsit[5])
    ws.write(5, 5, guotoucaofeiList[7])
    ws.write(5, 6, caofeierqiList[6])
    ws.write(5, 7, huanengcaofeierqiLsit[6])
    ws.write(5, 8, huanghuaList[6])
    ws.write(5, 9, float(qinghuangdaoList[8])+float(guotoujingtanggangList[6])+float(jingtanglaogangList[5])+float(read_36_40Lsit[5])+float(guotoucaofeiList[7])+float(caofeierqiList[6])+float(huanengcaofeierqiLsit[6])+float(huanghuaList[6]))
    # 吞吐量(万吨)
    ws.write(5, 12, today.__str__())
    ws.write(5, 13, qinghuangdaoList[7])
    ws.write(5, 14, guotoujingtanggangList[5])
    ws.write(5, 15, jingtanglaogangList[4])
    ws.write(5, 16, read_36_40Lsit[4])
    ws.write(5, 17, guotoucaofeiList[6])
    ws.write(5, 18, caofeierqiList[5])
    ws.write(5, 19, huanengcaofeierqiLsit[5])
    ws.write(5, 20, huanghuaList[5])
    ws.write(5, 21, float(qinghuangdaoList[7])+float(guotoujingtanggangList[5])+float(jingtanglaogangList[4])+float(read_36_40Lsit[4])+float(guotoucaofeiList[6])+float(caofeierqiList[5])+float(huanengcaofeierqiLsit[5])+float(huanghuaList[5]))
    # 第二個sheet頁
    ws = w.add_sheet('N. China Ports Vessels')
    # 第一行 煤的种类
    ws.write(0, 0, 'Vessels at N. China Ports')
    ws.write(1, 0, 'Unit: 10k tons')
    ws.write(2, 0, '锚地船舶(艘)')
    ws.write(2, 13, '预到船舶(艘)')
    # 第4行
    ws.write(3, 0, 'Date')
    ws.write(3, 1, 'QHD')
    ws.write(3, 2, 'SDIC JT')
    ws.write(3, 3, 'Old JT  ')
    ws.write(3, 4, '36-40 JT')
    ws.write(3, 5, 'SDIC CFD')
    ws.write(3, 6, 'CFD2')
    ws.write(3, 7, 'Huaneng CFD')
    ws.write(3, 8, 'HH')
    ws.write(3, 9, 'TOTAL')
    ws.write(3, 10, 'Inbound\nRail\n5dma')
    ws.write(3, 11, ' ')

    ws.write(3, 12, 'Date')
    ws.write(3, 13, 'QHD')
    ws.write(3, 14, 'SDIC JT')
    ws.write(3, 15, 'Old JT  ')
    ws.write(3, 16, '36-40 JT')
    ws.write(3, 17, 'SDIC CFD')
    ws.write(3, 18, 'CFD2')
    ws.write(3, 19, 'Huaneng CFD')
    ws.write(3, 20, 'HH')
    ws.write(3, 21, 'TOTAL')
    ws.write(3, 22, 'Inbound\nRail\n5dma')
    ws.write(3, 23, ' ')

    ws.write(3, 24, 'Date')
    ws.write(3, 25, 'INBOUND RAIL')
    ws.write(3, 26, 'SHIPMENT')
    ws.write(3, 27, 'YEAR')
    ws.write(3, 28, 'MONTH')
    ws.write(3, 29, 'WEEK')
    ws.write(3, 30, 'DAY')
    ws.write(3, 31, ' ')
    ws.write(3, 32, 'SHIPMENT\n5dma')
    ws.write(3, 33, ' ')
    ws.write(3, 34, 'Date')
    ws.write(3, 35, 'Implied Daqin\nRailway')
    ws.write(3, 36, 'Implied Mengji\nRailway')
    ws.write(3, 37, 'Implied Shuohuang\nRailway')
    ws.write(3, 38, 'Daqin Railway\n5dma')
    ws.write(3, 39, 'Mengji Railway\n5dma')
    ws.write(3, 40, 'Shuohuang Railway\n5dma')

    # 第五行 中文描述
    ws.write(4, 1, '秦皇岛港')
    ws.write(4, 2, '国投京唐港')
    ws.write(4, 3, '京唐老港')
    ws.write(4, 4, '36-40码头')
    ws.write(4, 5, '国投曹妃甸港')
    ws.write(4, 6, '曹妃甸二期')
    ws.write(4, 7, '华能曹妃甸')
    ws.write(4, 8, '黄骅港')
    ws.write(4, 9, '')

    ws.write(4, 13, '秦皇岛港')
    ws.write(4, 14, '国投京唐港')
    ws.write(4, 15, '京唐老港')
    ws.write(4, 16, '36-40码头')
    ws.write(4, 17, '国投曹妃甸港')
    ws.write(4, 18, '曹妃甸二期')
    ws.write(4, 19, '华能曹妃甸')
    ws.write(4, 20, '黄骅港')
    ws.write(4, 21, '')
    # 獲取當天日期
    # 寫入數據

    # 锚地船舶(艘)
    ws.write(5, 0, today.__str__())
    ws.write(5, 1, qinghuangdaoList[0])
    ws.write(5, 2, guotoujingtanggangList[1])
    ws.write(5, 3, jingtanglaogangList[2])
    ws.write(5, 4, read_36_40Lsit[2])
    ws.write(5, 5, guotoucaofeiList[0])
    ws.write(5, 6, caofeierqiList[0])
    ws.write(5, 7, huanengcaofeierqiLsit[0])
    ws.write(5, 8, huanghuaList[0])
    ws.write(5, 9, float(qinghuangdaoList[0]) + float(guotoujingtanggangList[1]) + float(jingtanglaogangList[2]) + float(read_36_40Lsit[2]) + float(guotoucaofeiList[0]) + float(caofeierqiList[0]) + float(huanengcaofeierqiLsit[0]) + float(huanghuaList[0]))
    # 预到船舶(艘)
    ws.write(5, 12, today.__str__())
    ws.write(5, 13, qinghuangdaoList[1])
    ws.write(5, 14, guotoujingtanggangList[2])
    ws.write(5, 15, jingtanglaogangList[3])
    ws.write(5, 16, read_36_40Lsit[3])
    ws.write(5, 17, guotoucaofeiList[1])
    ws.write(5, 18, caofeierqiList[1])
    ws.write(5, 19, huanengcaofeierqiLsit[1])
    ws.write(5, 21, float(qinghuangdaoList[1]) + float(guotoujingtanggangList[2]) + float(jingtanglaogangList[3]) + float(read_36_40Lsit[3]) + float(guotoucaofeiList[1]) + float(caofeierqiList[1]) + float(huanengcaofeierqiLsit[1]))

    w.save('china_coal.xls')
dataList = []
# 读取本地cvs文件 获取指定内容 (讀取秦皇島)
def read_cvs():
    # 1. 用pandas读取csv ;把哪一列的数据转换成日期类型 parse_dates=[哪一列,从0开始],infer_datetime_format=True
    data = pd.read_csv(u'C:/pythonProject/port/20190621/秦皇岛港日调度量历史数据_data.csv',parse_dates=[5],infer_datetime_format=True)
    print (data)
    # 按日期排序 升序
    data.sort_values('日期',inplace=True)
    print '====================='
    print data
    print '------------------------'
    # 读取CSV,并将数据转化为数组
    list=data.values.tolist()
    listLen = len(list)
    print listLen
    # 获取最后一行的数据
    endCol =  list[listLen-1]
    print '最後一列的list : ' ,endCol
    # 獲取當天日期
    today = datetime.date.today()
    # 把日期格式成所需格式 .strftime("%Y/%m/%d")
    todays = today.strftime("%Y/%m/%d")
    print 'today : ',todays
    yesterday = today - datetime.timedelta(days=1)
    yesterdays = yesterday.strftime("%Y/%m/%d")
    print 'yesterday : ' ,yesterdays
    print 'read day : ' ,endCol[5].strftime("%Y/%m/%d")
    if endCol[5].strftime("%Y/%m/%d") == todays:
        print '1'
        return endCol
    else:
        print '2'
        return endCol


# 读取本地cvs文件 获取指定内容 (讀取国投京唐港)
def read_guotoujingtanggangcvs():
    # 1. 用pandas读取csv
    data = pd.read_csv(u'C:/pythonProject/port/20190621/国投京唐港日调度量历史数据_data.csv',parse_dates=[4],infer_datetime_format=True)
    print (data)
    # 按日期降序
    data.sort_values('日期',inplace=True)
    print '====================='
    print data
    print '------------------------'
    # 读取CSV,并将数据转化为数组
    list=data.values.tolist()
    listLen = len(list)
    print listLen
    endCol =  list[listLen-1]
    print '最後一列的list : ' ,endCol
    # 獲取當天日期
    today = datetime.date.today()
    todays = today.strftime("%Y/%m/%d")
    print 'today : ', todays
    yesterday = today - datetime.timedelta(days=1)
    yesterdays = yesterday.strftime("%Y/%m/%d")
    print 'yesterday : ', yesterdays
    print 'read day : ', endCol[4].strftime("%Y/%m/%d")
    if endCol[4].strftime("%Y/%m/%d") == todays:
        print '1'
        return endCol
    else:
        print '2'
        return endCol

# 读取本地cvs文件 获取指定内容 (讀取京唐老港)
def read_jingtanglaogangcvs():
    # 1. 用pandas读取csv
    data = pd.read_csv(u'C:/pythonProject/port/20190621/京唐港老港日调度量历史数据_data.csv',parse_dates=[0],infer_datetime_format=True)
    print (data)
    # 按日期降序
    data.sort_values('日期',inplace=True)
    print '====================='
    print data
    print '------------------------'
    # 读取CSV,并将数据转化为数组
    list=data.values.tolist()
    listLen = len(list)
    print listLen
    endCol =  list[listLen-1]
    print '最後一列的list : ' ,endCol
    # 獲取當天日期
    today = datetime.date.today()
    todays = today.strftime("%Y/%m/%d")
    print 'today : ', todays
    yesterday = today - datetime.timedelta(days=1)
    yesterdays = yesterday.strftime("%Y/%m/%d")
    print 'yesterday : ', yesterdays
    print 'read day : ', endCol[0].strftime("%Y/%m/%d")
    if endCol[0].strftime("%Y/%m/%d") == todays:
        print '1'
        return endCol
    else:
        print '2'
        return endCol

# 读取本地cvs文件 获取指定内容 (讀取36-40码头)
def read_36_40cvs():
    # 1. 用pandas读取csv
    data = pd.read_csv(u'C:/pythonProject/port/20190621/京唐港专业码头日调度_data.csv',parse_dates=[0],infer_datetime_format=True)
    print (data)
    # 按日期降序
    data.sort_values('日期',inplace=True)
    print '====================='
    print data
    print '------------------------'
    # 读取CSV,并将数据转化为数组
    list=data.values.tolist()
    listLen = len(list)
    print listLen
    endCol =  list[listLen-1]
    print '最後一列的list : ' ,endCol
    # 獲取當天日期
    today = datetime.date.today()
    todays = today.strftime("%Y/%m/%d")
    print 'today : ', todays
    yesterday = today - datetime.timedelta(days=1)
    yesterdays = yesterday.strftime("%Y/%m/%d")
    print 'yesterday : ', yesterdays
    print 'read day : ', endCol[0].strftime("%Y/%m/%d")
    if endCol[0].strftime("%Y/%m/%d") == todays:
        print '1'
        return endCol
    else:
        print '2'
        return endCol


# 读取本地cvs文件 获取指定内容 (讀取国投曹妃甸港)
def read_guotoucaofeicvs():
    # 1. 用pandas读取csv
    data = pd.read_csv(u'C:/pythonProject/port/20190621/国投曹妃甸港日调度量历史数据_data.csv',parse_dates=[4],infer_datetime_format=True)
    print (data)
    # 按日期降序
    data.sort_values('日期',inplace=True)
    print '====================='
    print data
    print '------------------------'
    # 读取CSV,并将数据转化为数组
    list=data.values.tolist()
    listLen = len(list)
    print listLen
    endCol =  list[listLen-1]
    print '最後一列的list : ' ,endCol
    # 獲取當天日期
    today = datetime.date.today()
    todays = today.strftime("%Y/%m/%d")
    print 'today : ', todays
    yesterday = today - datetime.timedelta(days=1)
    yesterdays = yesterday.strftime("%Y/%m/%d")
    print 'yesterday : ', yesterdays
    print 'read day : ', endCol[4].strftime("%Y/%m/%d")
    if endCol[4].strftime("%Y/%m/%d") == todays:
        print '1'
        return endCol
    else:
        print '2'
        return endCol
# 读取本地cvs文件 获取指定内容 (讀取曹妃甸二期)
def read_caofeierqicvs():
    # 1. 用pandas读取csv
    data = pd.read_csv(u'C:/pythonProject/port/20190621/曹妃甸港二期日调度量历史数据_data.csv',parse_dates=[3],infer_datetime_format=True)
    print (data)
    # 按日期降序
    data.sort_values('日期',inplace=True)
    print '====================='
    print data
    print '------------------------'
    # 读取CSV,并将数据转化为数组
    list=data.values.tolist()
    listLen = len(list)
    print listLen
    endCol =  list[listLen-1]
    print '最後一列的list : ' ,endCol
    # 獲取當天日期
    today = datetime.date.today()
    todays = today.strftime("%Y/%m/%d")
    print 'today : ', todays
    yesterday = today - datetime.timedelta(days=1)
    yesterdays = yesterday.strftime("%Y/%m/%d")
    print 'yesterday : ', yesterdays
    print 'read day : ', endCol[3].strftime("%Y/%m/%d")
    if endCol[3].strftime("%Y/%m/%d") == todays:
        print '1'
        return endCol
    else:
        print '2'
        return endCol

# 读取本地cvs文件 获取指定内容 (讀取华能曹妃甸)
def read_huanengcaofeierqicvs():
    # 1. 用pandas读取csv
    data = pd.read_csv(u'C:/pythonProject/port/20190621/华能曹妃甸港二期日调度量历史数据_data.csv',parse_dates=[3],infer_datetime_format=True)
    print (data)
    # 按日期降序
    data.sort_values('日期',inplace=True)
    print '====================='
    print data
    print '------------------------'
    # 读取CSV,并将数据转化为数组
    list=data.values.tolist()
    listLen = len(list)
    print listLen
    endCol =  list[listLen-1]
    print '最後一列的list : ' ,endCol
    # 獲取當天日期
    today = datetime.date.today()
    todays = today.strftime("%Y/%m/%d")
    print 'today : ', todays
    yesterday = today - datetime.timedelta(days=1)
    yesterdays = yesterday.strftime("%Y/%m/%d")
    print 'yesterday : ', yesterdays
    print 'read day : ', endCol[3].strftime("%Y/%m/%d")
    if endCol[3].strftime("%Y/%m/%d") == todays:
        print '1'
        return endCol
    else:
        print '2'

# 读取本地cvs文件 获取指定内容 (讀取黄骅港日调度量历史数据_data.csv)
def read_huanghuacvs():
    # 1. 用pandas读取csv
    data = pd.read_csv(u'C:/pythonProject/port/20190621/黄骅港日调度量历史数据_data.csv',parse_dates=[3],infer_datetime_format=True)
    print (data)
    # 按日期降序
    data.sort_values('日期',inplace=True)
    print '====================='
    print data
    print '------------------------'
    # 读取CSV,并将数据转化为数组
    list=data.values.tolist()
    listLen = len(list)
    print listLen
    endCol =  list[listLen-1]
    print '最後一列的list : ' ,endCol
    # 獲取當天日期
    today = datetime.date.today()
    todays = today.strftime("%Y/%m/%d")
    print 'today : ', todays
    yesterday = today - datetime.timedelta(days=1)
    yesterdays = yesterday.strftime("%Y/%m/%d")
    print 'yesterday : ', yesterdays
    print 'read day : ', endCol[3].strftime("%Y/%m/%d")
    if endCol[3].strftime("%Y/%m/%d") == todays:
        print '1'
        return endCol
    else:
        print '2'
        return endCol
    
if __name__ == "__main__":

    # read_cvs()
    write_excel()
    # read_guotoujingtanggangcvs()
    # read_jingtanglaogangcvs()
    # read_36_40cvs()
    # read_guotoucaofeicvs()
    # read_caofeierqicvs()
    # read_huanengcaofeierqicvs()
    # read_huanghuacvs()
    print ("爬虫结束...")


你可能感兴趣的:(python)