来源:《Python编程:从入门到实践》
要在文本文件中存储数据,最简单的方式是将数据作为一系列以逗号分隔的值(CSV)写入文件,这样的文件称为CSV文件
2014-1-5,61,44,26,18,7,-1,56,30,9,30.34,30.27,30.15,,,,10,4,,0.00,0,,195
这个项目使用的天气数据是从http://www.wunderground.com/history下载而来的
csv模块
包含在Python标准库中,可用于分析CSV文件中的数据行,能够快速提取感兴趣的值highs_lows.py
import csv
filename = 'sitka_weather_07-2014.csv'
with open(filename) as f:
reader = csv.reader(f)
header_row = next(reader)
print(header_row)
csv.reader()
,将前面存储的文件对象作为实参传递给它,从而创建一个与该文件相关联的阅读器(reader)对象next()方法,返回文件中的下一行
reader处理文件中以逗号分隔的第一行数据,并将每项数据都作为一个元素存储到列表中
highs_lows.py
--snip--
with open(filename) as f:
reader = csv.reader(f)
header_row = next(reader)
for index, column_header in enumerate(header_row):
print(index, column_header)
highs_lows.py
import csv
# 从文件中获取最高气温
filename = 'sitka_weather_07-2014.csv'
with open(filename) as f:
reader = csv.reader(f)
header_row = next(reader)
highs = []
for row in reader:
highs.append(row[1])
print(highs)
reader从其停留的地方继续往下读取CSV文件,每次都自动返回当前位置的下一行
highs_lows.py
--snip--
highs = []
for row in reader:
high = int(row[1])
highs.append(high)
print(highs)
highs_lows.py
import csv
import matplotlib.pyplot as plt
# 从文件中获取最高气温
--snip--
# 根据数据绘制图形
fig = plt.figure(dpi=128, figsize=(10, 6))
plt.plot(highs, c='red')
# 设置图形的格式
plt.title("Daily high temperatures, July 2014", fontsize=24)
plt.xlabel('', fontsize=16)
plt.ylabel("Temperature (F)", fontsize=16)
plt.tick_params(axis='both', which='major', labelsize=16)
plt.show()
2014-7-1,64,56,50,53,51,48,--snip--
使用模块datetime的方法strptime()
highs_lows.py
import csv
from datetime import datetime
from matplotlib import pyplot as plt
# 从文件中获取日期 & 最高气温
filename = 'sitka_weather_07-2014.csv'
with open(filename) as f:
reader = csv.reader(f)
header_row = next(reader)
dates, highs = [], []
for row in reader:
current_date = datetime.strptime(row[0], "%Y-%m-%d")
dates.append(current_date)
high = int(row[1])
highs.append(high)
# 根据数据绘制图形
fig = plt.figure(dpi=128, figsize=(10, 6))
plt.plot(dates, highs, c='red')
# 设置图形的格式
plt.title("Daily high temperatures, July 2014", fontsize=24)
plt.xlabel('', fontsize=16)
fig.autofmt_xdate()
plt.ylabel("Temperature (F)", fontsize=16)
plt.tick_params(axis='both', which='major', labelsize=16)
plt.show()
highs_lows.py
--snip--
# 从文件中获取日期 & 最高气温
filename = 'sitka_weather_2014.csv'
--snip--
# 设置图形的格式
plt.title("Daily high temperatures - 2014", fontsize=24)
--snip--
highs_lows.py
--snip--
# 从文件中获取日期 & 最高气温 & 最低气温
filename = 'sitka_weather_2014.csv'
with open(filename) as f:
reader = csv.reader(f)
header_row = next(reader)
dates, highs, lows = [], [], []
for row in reader:
current_date = datetime.strptime(row[0], "%Y-%m-%d")
dates.append(current_date)
high = int(row[1])
highs.append(high)
low = int(row[3])
lows.append(low)
# 根据数据绘制图形
fig = plt.figure(dpi=128, figsize=(10, 6))
plt.plot(dates, highs, c='red')
plt.plot(dates, lows, c='blue')
# 设置图形的格式
plt.title("Daily high and low temperatures - 2014", fontsize=24)
--snip--
highs_lows.py
--snip--
# 根据数据绘制图形
fig = plt.figure(dpi=128, figsize=(10, 6))
plt.plot(dates, highs, c='red', alpha=0.5)
plt.plot(dates, lows, c='blue', alpha=0.5)
plt.fill_between(dates, highs, lows, facecolor='blue', alpha=0.1)
--snip--
highs_lows.py
--snip--
# 从文件中获取日期 & 最高气温 & 最低气温
filename = 'death_valley_2014.csv'
with open(filename) as f:
--snip--
highs_lows.py
--snip--
# 从文件中获取日期 & 最高气温 & 最低气温
filename = 'death_valley_2014.csv'
with open(filename) as f:
--snip--
for row in reader:
try:
current_date = datetime.strptime(row[0], "%Y-%m-%d")
high = int(row[1])
low = int(row[3])
except ValueError:
print(current_date, 'missing data')
else:
dates.append(current_date)
highs.append(high)
lows.append(low)
# 根据数据绘制图形
--snip--
# 设置图形的格式
title = "Daily high and low temperatures - 2014\nDeath Valley, CA"
plt.title(title, fontsize=20)
--snip--
try-except-else代码块
这部分了可以去Python:文件和异常这篇文章复习一下solution:
import csv
from datetime import datetime
from matplotlib import pyplot as plt
def get_weather_data(filename, dates, highs, lows):
with open(filename) as f:
reader = csv.reader(f)
header_row = next(reader)
# dates, highs, lows = [], [], []
for row in reader:
try:
current_date = datetime.strptime(row[0], "%Y-%m-%d")
high = int(row[1])
low = int(row[3])
except ValueError:
print(current_date, 'missing data')
else:
dates.append(current_date)
highs.append(high)
lows.append(low)
# Get weather data for Sitka.
dates, highs, lows = [], [], []
get_weather_data('sitka_weather_2014.csv', dates, highs, lows)
# Plot Sitka weather data.
fig = plt.figure(dpi=128, figsize=(10, 6))
plt.plot(dates, highs, c='red', alpha=0.6)
plt.plot(dates, lows, c='blue', alpha=0.6)
plt.fill_between(dates, highs, lows, facecolor='blue', alpha=0.15)
# Get Death Valley data.
dates, highs, lows = [], [], []
get_weather_data('death_valley_2014.csv', dates, highs, lows)
# Add Death Valley data to current plot.
plt.plot(dates, highs, c='red', alpha=0.3)
plt.plot(dates, lows, c='blue', alpha=0.3)
plt.fill_between(dates, highs, lows, facecolor='blue', alpha=0.05)
# 设置图形的格式
title = "Daily high and low temperatures - 2014"
title += "\nSitka AK and Death Valley, CA"
plt.title(title, fontsize=20)
plt.xlabel('', fontsize=16)
fig.autofmt_xdate()
plt.ylabel("Temperature (F)", fontsize=16)
plt.tick_params(axis='both', which='major', labelsize=16)
plt.ylim(10, 120)
plt.show()