现有大约160份水文年鉴数据Excel表格,包含水位、降水量、水文要素摘录表和逐日表,格式如下图所示。
需将其存储到数据库中,如下图所示。
如此大量的数据,如果采用手动复制粘贴的方法,需要耗费大量时间。考虑到水文年鉴数据格式统一,可以采用Python对Excel表格数据进行提取,大致分为3步。
import xlrd
import xlwt
import calendar
import arrow
import os
# 输出该目录下所有子文件目录
def get_file_name(file_dir):
list_of_files = []
for root, dirs, files in os.walk(file_dir):
#print(root) #当前目录路径
#print(dirs) #当前路径下所有子目录
#print(files) #当前路径下所有非目录子文件
for file in files:
#print(os.path.join(root, file))
list_of_files.append(os.path.join(root, file))
return list_of_files
def isLeapYear(years):
'''
通过判断闰年,获取年份years下一年的总天数
:param years: 年份,int
:return:days_sum,一年的总天数
'''
# 断言:年份不为整数时,抛出异常。
assert isinstance(years, int), "请输入整数年,如 2018"
if ((years % 4 == 0 and years % 100 != 0) or (years % 400 == 0)): # 判断是否是闰年
# print(years, "是闰年")
days_sum = 366
return days_sum
else:
# print(years, '不是闰年')
days_sum = 365
return days_sum
def getAllDayPerYear(years):
'''
获取一年的所有日期
:param years:年份
:return:全部日期列表
'''
start_date = '%s-1-1' % years
a = 0
all_date_list = []
days_sum = isLeapYear(int(years))
while a < days_sum:
b = arrow.get(start_date).shift(days=a).format("YYYY-MM-DD")
a += 1
all_date_list.append(b)
# print(all_date_list)
return all_date_list
# 用列表推导去除列表空白列
def remove_space(list_of_data, index_of_space):
list_of_no_space_data = []
list_of_no_space_data = [data for index, data in enumerate(list_of_data) if index not in index_of_space]
return list_of_no_space_data
# 从原始逐日降水量表转换成archigh能接受的形式
def transform(file_name):
data = xlrd.open_workbook(file_name)
table = data.sheets()[0]
# 一年中的逐日降水量
list_of_precipitation = []
# 年份
year = int(table.cell(1, 2).value)
# 测站编码
station_code = table.cell(1, 5).value
for month in range(1, 13):
# 天数
days = calendar.monthrange(year, month)[1]
list_of_precipitation += table.col_values(1 + month)[4 : 4 + days]
# 去除降水量数据中的·*符号
for index, precipitation in enumerate(list_of_precipitation):
list_of_precipitation[index] = precipitation.replace("·",'').replace("*",'')
# 获取该年份所有日期
days_of_year = getAllDayPerYear(str(year))
# 开始日期
start_date = days_of_year
# 结束日期为开始日期后移一天
end_date = days_of_year[:]
del end_date[0]
end_date.append(str(year + 1) + "-01-01")
# 去除降水量为0(空白)的记录
# 记录空白行的索引
index_of_space = []
for index, precipitation in enumerate(list_of_precipitation):
if(precipitation == ""):
index_of_space.append(index)
# 用列表推导删除降水量的空白行
start_date = remove_space(start_date, index_of_space)
end_date = remove_space(end_date, index_of_space)
list_of_precipitation = remove_space(list_of_precipitation, index_of_space)
# 输出
output = xlwt.Workbook()
sheet1 = output.add_sheet("逐日降水量")
sheet1.write(0, 0, "测站编码")
sheet1.write(0, 1, "开始日期")
sheet1.write(0, 2, "结束日期")
sheet1.write(0, 3, "降水量(mm)")
# 写入测站编码
for row, _ in enumerate(list_of_precipitation, 1):
sheet1.write(row, 0, station_code)
# 写入开始日期
for row, day in enumerate(start_date, 1):
sheet1.write(row, 1, day)
# 写入结束日期
for row, day in enumerate(end_date, 1):
sheet1.write(row, 2, day)
# 写入降水量
for row, precipitation in enumerate(list_of_precipitation, 1):
sheet1.write(row, 3, precipitation)
output.save(os.path.splitext(file_name)[0] + "NewStyle" + ".xls")
# 主函数
if __name__=="__main__":
print("===本程序将逐日降水量表转换为ArcHIGH能接受的形式NewStyle===")
# 该目录下所有文件名
list_of_files = []
list_of_files = get_file_name(os.getcwd())
#print(list_of_files)
for file_name in list_of_files:
if "逐日降水量表.xls" in file_name:
transform(file_name)
print("===转换完毕,请查看...逐日降水量表NewStyle.xls文件===")
import xlrd
import xlwt
import calendar
import arrow
import os
# 输出该目录下所有子文件目录
def get_file_name(file_dir):
list_of_files = []
for root, dirs, files in os.walk(file_dir):
#print(root) #当前目录路径
#print(dirs) #当前路径下所有子目录
#print(files) #当前路径下所有非目录子文件
for file in files:
#print(os.path.join(root, file))
list_of_files.append(os.path.join(root, file))
return list_of_files
def isLeapYear(years):
'''
通过判断闰年,获取年份years下一年的总天数
:param years: 年份,int
:return:days_sum,一年的总天数
'''
# 断言:年份不为整数时,抛出异常。
assert isinstance(years, int), "请输入整数年,如 2018"
if ((years % 4 == 0 and years % 100 != 0) or (years % 400 == 0)): # 判断是否是闰年
# print(years, "是闰年")
days_sum = 366
return days_sum
else:
# print(years, '不是闰年')
days_sum = 365
return days_sum
def getAllDayPerYear(years):
'''
获取一年的所有日期
:param years:年份
:return:全部日期列表
'''
start_date = '%s-1-1' % years
a = 0
all_date_list = []
days_sum = isLeapYear(int(years))
while a < days_sum:
b = arrow.get(start_date).shift(days=a).format("YYYY-MM-DD")
a += 1
all_date_list.append(b)
# print(all_date_list)
return all_date_list
# 从原始逐日平均水位表转换成archigh能接受的形式
def transform(file_name):
data = xlrd.open_workbook(file_name)
table = data.sheets()[0]
# 一年中的逐日平均水位
stage_of_year = []
# 年份
year = int(table.cell(2, 1).value)
# 测站编码
station_code = table.cell(2, 4).value
# 表内水位与85基准水位差值
stage_change = eval(table.cell(2, 9).value)
# 基准面转换关系
datum = table.cell(2, 6).value + table.cell(2, 9).value\
+ table.cell(2, 10).value + table.cell(2, 11).value
# 读取每天的水位
for month in range(1, 13):
# 天数
days = calendar.monthrange(year, month)[1]
stage_of_year += table.col_values(1 + month)[5 : 5 + days]
# 补齐缺少整数位的水位数值
stage_int = '0' # 水位整数位,初始值为0
for index, stage in enumerate(stage_of_year):
if '.' in stage:
stage_int = stage.split('.')[0]
else:
stage = stage_int + '.' + stage
stage_of_year[index] = stage
# 去除水位数据中的X符号
for index, stage in enumerate(stage_of_year):
stage_of_year[index] = stage.replace("X",'')
# 计算85基准水位
list_of_stage_85 = [str(round(eval(stage) + stage_change, 3)) for stage in stage_of_year]
# 输出
output = xlwt.Workbook()
sheet1 = output.add_sheet("逐日平均水位")
sheet1.write(0, 0, "测站编码")
sheet1.write(0, 1, "日期")
sheet1.write(0, 2, "85基准水位(m)")
sheet1.write(0, 3, "表内水位(m)")
sheet1.write(0, 4, "基准面转换关系")
sheet1.write(1, 4, datum)
# 获取该年份所有日期
days_of_year = getAllDayPerYear(str(year))
# 写入测站编码
for row, _ in enumerate(stage_of_year, 1):
sheet1.write(row, 0, station_code)
# 日期
for row, day in enumerate(days_of_year, 1):
sheet1.write(row, 1, day)
# 85水位
for row, stage in enumerate(list_of_stage_85, 1):
sheet1.write(row, 2, stage)
# 表内水位
for row, stage in enumerate(stage_of_year, 1):
sheet1.write(row, 3, stage)
output.save(os.path.splitext(file_name)[0] + "NewStyle" + ".xls")
# 主函数
if __name__=="__main__":
print("===本程序将逐日平均水位表转换为ArcHIGH能接受的形式NewStyle===")
# 该目录下所有文件名
list_of_files = []
list_of_files = get_file_name(os.getcwd())
#print(list_of_files)
for file_name in list_of_files:
if "逐日平均水位表.xls" in file_name:
print(file_name)
transform(file_name)
print("===转换完毕,请查看...逐日平均水位表NewStyle.xls文件===")
import xlrd
import xlwt
import calendar
import arrow
import os
import re
# 输出该目录下所有子文件目录
def get_file_name(file_dir):
list_of_files = []
for root, dirs, files in os.walk(file_dir):
#print(root) #当前目录路径
#print(dirs) #当前路径下所有子目录
#print(files) #当前路径下所有非目录子文件
for file in files:
#print(os.path.join(root, file))
list_of_files.append(os.path.join(root, file))
return list_of_files
def isLeapYear(years):
'''
通过判断闰年,获取年份years下一年的总天数
:param years: 年份,int
:return:days_sum,一年的总天数
'''
# 断言:年份不为整数时,抛出异常。
assert isinstance(years, int), "请输入整数年,如 2018"
if ((years % 4 == 0 and years % 100 != 0) or (years % 400 == 0)): # 判断是否是闰年
# print(years, "是闰年")
days_sum = 366
return days_sum
else:
# print(years, '不是闰年')
days_sum = 365
return days_sum
def getAllDayPerYear(years):
'''
获取一年的所有日期
:param years:年份
:return:全部日期列表
'''
start_date = '%s-1-1' % years
a = 0
all_date_list = []
days_sum = isLeapYear(int(years))
while a < days_sum:
b = arrow.get(start_date).shift(days=a).format("YYYY-MM-DD")
a += 1
all_date_list.append(b)
# print(all_date_list)
return all_date_list
# 从原始逐日水面蒸发量表转换成archigh能接受的形式
def transform(file_name):
data = xlrd.open_workbook(file_name)
table = data.sheets()[0]
# 一年中的逐日水面蒸发量
evaporation_of_year = []
# 年份和测站编码从合并单元格最左格提取
year_stcd_info = table.cell(1, 0).value # 最左格值
year_stcd_info = re.split('[: ]', year_stcd_info) # 用空格和冒号分割列表
year = int(year_stcd_info[1]) # 年份信息在列表第2位,并转化为整型
# 测站编码
station_code = int(year_stcd_info[3])
for month in range(1, 13):
# 天数
days = calendar.monthrange(year, month)[1]
evaporation_of_year += table.col_values(1 + month)[4 : 4 + days]
# 输出
output = xlwt.Workbook()
sheet1 = output.add_sheet("逐日水面蒸发量")
sheet1.write(0, 0, "测站编码")
sheet1.write(0, 1, "日期")
sheet1.write(0, 2, "水面蒸发量(mm)")
# 获取该年份所有日期
days_of_year = getAllDayPerYear(str(year))
# 写入测站编码
for row, _ in enumerate(evaporation_of_year, 1):
sheet1.write(row, 0, station_code)
# 日期
for row, day in enumerate(days_of_year, 1):
sheet1.write(row, 1, day)
# 蒸散发
for row, evaporation in enumerate(evaporation_of_year, 1):
sheet1.write(row, 2, evaporation)
output.save(os.path.splitext(file_name)[0] + "NewStyle" + ".xls")
# 主函数
if __name__=="__main__":
print("===本程序将逐日水面蒸发量表转换为ArcHIGH能接受的形式NewStyle===")
# 该目录下所有文件名
list_of_files = []
list_of_files = get_file_name(os.getcwd())
#print(list_of_files)
for file_name in list_of_files:
if "逐日水面蒸发量表.xls" in file_name:
transform(file_name)
print("===转换完毕,请查看...逐日水面蒸发量表NewStyle.xls文件===")
import xlrd
import xlwt
import os
import datetime
# 输出该目录下所有子文件目录
def get_file_name(file_dir):
list_of_files = []
for root, dirs, files in os.walk(file_dir):
#print(root) #当前目录路径
#print(dirs) #当前路径下所有子目录
#print(files) #当前路径下所有非目录子文件
for file in files:
#print(os.path.join(root, file))
list_of_files.append(os.path.join(root, file))
return list_of_files
# 用列表推导去除列表空白列
def remove_space(list_of_data, index_of_space):
list_of_no_space_data = []
list_of_no_space_data = [data for index, data in enumerate(list_of_data) if index not in index_of_space]
return list_of_no_space_data
# 降水量摘录表中结束时间有24时,这个不合格,弄成下一天0时
def change_time_type(year, month, day):
current_day = datetime.datetime(year, month, day, 00, 00, 00)
next_day = current_day + datetime.timedelta(days = 1)
return next_day.strftime("%Y-%m-%d %H:%M:%S")
# 从降水量摘录表转换成archigh能接受的形式
def transform(file_name):
data = xlrd.open_workbook(file_name)
table = data.sheets()[0]
# 年份
year = table.cell(1, 2).value
# 测站编码
station_code = table.cell(1, 5).value
# 页数
pages = int(table.cell(1, 17).value.split()[1])
# 月份
list_of_month = []
# 日期
list_of_day = []
# 开始时间 时:分
list_of_start_time = []
# 结束时间
list_of_end_time = []
# 降水量
list_of_precipitation = []
# 按照页码和列循环读取日期和降水量
for page in range(0, pages):
for col in range(0, 4):
list_of_month += table.col_values(0 + 5 * col)[4 + 54 * page : 4 + 50 + 54 * page]
list_of_day += table.col_values(1 + 5 * col)[4 + 54 * page : 4 + 50 + 54 * page]
list_of_start_time += table.col_values(2 + 5 * col)[4 + 54 * page : 4 + 50 + 54 * page]
list_of_end_time += table.col_values(3 + 5 * col)[4 + 54 * page : 4 + 50 + 54 * page]
list_of_precipitation += table.col_values(4 + 5 * col)[4 + 54 * page : 4 + 50 + 54 * page]
# 去除掉最后一页的空白行
# 记录空白行的索引
index_of_space = []
for index, precipitation in enumerate(list_of_precipitation):
if(precipitation == ""):
index_of_space.append(index)
# 用列表推导删除月份、日期、开始时间、结束时间、降水量的最后一页空白行
list_of_month = remove_space(list_of_month, index_of_space)
list_of_day = remove_space(list_of_day, index_of_space)
list_of_start_time = remove_space(list_of_start_time, index_of_space)
list_of_end_time = remove_space(list_of_end_time, index_of_space)
list_of_precipitation = remove_space(list_of_precipitation, index_of_space)
## # 去除降水量数据中的·*符号
## for index, precipitation in enumerate(precipitation_of_year):
## precipitation_of_year[index] = precipitation.replace("·",'').replace("*",'')
# 处理开始时间和结束时间
# 开始日期时间
list_of_start_date_time = []
# 结束日期时间
list_of_end_date_time = []
# 记录当前月份和日期
current_month = "0"
current_day = "0"
for month, day, start_time, end_time in zip(list_of_month, list_of_day, list_of_start_time, list_of_end_time):
# 只有需要更新日期时才有可能要更新月份信息
if(day != ""):
current_day = day
if(month != ""):
current_month = month
# 给出日期时间格式
start_date_time = "{}-{}-{} {}:00:00".format(int(year), int(current_month), int(current_day), int(start_time))
# 处理结束时间为24时的特殊情况
if end_time == 24:
end_date_time = change_time_type(int(year), int(current_month), int(current_day))
else:
end_date_time = "{}-{}-{} {}:00:00".format(int(year), int(current_month), int(current_day), int(end_time))
# 记录日期时间到列表中以便于输出
list_of_start_date_time.append(start_date_time)
list_of_end_date_time.append(end_date_time)
# 输出
output = xlwt.Workbook()
sheet1 = output.add_sheet("降水量摘录")
sheet1.write(0, 0, "测站编码")
sheet1.write(0, 1, "开始时间")
sheet1.write(0, 2, "结束时间")
sheet1.write(0, 3, "降水量(mm)")
# 写入测站编码
for row, _ in enumerate(list_of_precipitation, 1):
sheet1.write(row, 0, station_code)
# 写入开始时间
for row, start_date_time in enumerate(list_of_start_date_time, 1):
sheet1.write(row, 1, start_date_time)
# 写入结束时间
for row, end_date_time in enumerate(list_of_end_date_time, 1):
sheet1.write(row, 2, end_date_time)
# 写入降水量
for row, precipitation in enumerate(list_of_precipitation, 1):
sheet1.write(row, 3, precipitation)
output.save(os.path.splitext(file_name)[0] + "NewStyle" + ".xls")
# 主函数
if __name__=="__main__":
print("===本程序将降水量摘录表转换为ArcHIGH能接受的形式NewStyle===")
# 该目录下所有文件名
list_of_files = []
list_of_files = get_file_name(os.getcwd())
#print(list_of_files)
for file_name in list_of_files:
if "降水量摘录表.xls" in file_name:
transform(file_name)
print("===转换完毕,请查看...降水量摘录表NewStyle.xls文件===")
import xlrd
import xlwt
import os
import re
# 输出该目录下所有子文件目录
def get_file_name(file_dir):
list_of_files = []
for root, dirs, files in os.walk(file_dir):
#print(root) #当前目录路径
#print(dirs) #当前路径下所有子目录
#print(files) #当前路径下所有非目录子文件
for file in files:
#print(os.path.join(root, file))
list_of_files.append(os.path.join(root, file))
return list_of_files
# 用列表推导去除列表空白列
def remove_space(list_of_data, index_of_space):
list_of_no_space_data = []
list_of_no_space_data = [data for index, data in enumerate(list_of_data) if index not in index_of_space]
return list_of_no_space_data
# 从洪水水位摘录表转换成archigh能接受的形式
def transform(file_name):
data = xlrd.open_workbook(file_name)
table = data.sheets()[0]
# 年份
year = table.cell(1, 2).value
# 测站编码
station_code = table.cell(1, 5).value
# 从相应逐日平均水位表读取基准面转换关系
data1 = xlrd.open_workbook(file_name.replace("洪水水位摘录表", "逐日平均水位表"))
table1 = data1.sheets()[0]
# 表内水位与85基准水位差值
stage_change = eval(table1.cell(2, 9).value)
# 基准面转换关系
datum = table1.cell(2, 6).value + table1.cell(2, 9).value\
+ table1.cell(2, 10).value + table1.cell(2, 11).value
# 页数
pages = int(re.findall("\d+", table.cell(1, 17).value.split()[0])[0])
# 月份
list_of_month = []
# 日期
list_of_day = []
# 时间 时:分
list_of_time = []
# 水位
list_of_stage = []
# 按照页码和列循环读取日期和降水量
for page in range(0, pages):
for col in range(0, 5):
list_of_month += table.col_values(0 + 4 * col)[4 + 54 * page : 4 + 50 + 54 * page]
list_of_day += table.col_values(1 + 4 * col)[4 + 54 * page : 4 + 50 + 54 * page]
list_of_time += table.col_values(2 + 4 * col)[4 + 54 * page : 4 + 50 + 54 * page]
list_of_stage += table.col_values(3 + 4 * col)[4 + 54 * page : 4 + 50 + 54 * page]
# 去除掉最后一页的空白行
# 记录空白行的索引
index_of_space = []
for index, stage in enumerate(list_of_stage):
if(stage == ""):
index_of_space.append(index)
# 用列表推导删除月份、日期、时间、水位的最后一页空白行
list_of_month = remove_space(list_of_month, index_of_space)
list_of_day = remove_space(list_of_day, index_of_space)
list_of_time = remove_space(list_of_time, index_of_space)
list_of_stage = remove_space(list_of_stage, index_of_space)
# 处理日期时间
list_of_date_time = []
# 记录当前月份和日期
current_month = "0"
current_day = "0"
for month, day, time in zip(list_of_month, list_of_day, list_of_time):
# 只有需要更新日期时才有可能要更新月份信息
if(day != ""):
current_day = day
if(month != ""):
current_month = month
# 给出日期时间格式
if(type(time) == float):
time = str(int(time)) + ":00"
date_time = "{}-{}-{} {}:00".format(int(year), int(current_month), int(current_day), time)
# 记录日期时间到列表中以便于输出
list_of_date_time.append(date_time)
# 补齐缺少整数位的水位数值
stage_int = '0' # 水位整数位
for index, stage in enumerate(list_of_stage):
if '.' in stage:
stage_int = stage.split('.')[0]
else:
stage = stage_int + '.' + stage
list_of_stage[index] = stage
# 计算85基准水位
list_of_stage_85 = [str(round(eval(stage) + stage_change, 3)) for stage in list_of_stage]
# 输出
output = xlwt.Workbook()
sheet1 = output.add_sheet("洪水水位摘录")
sheet1.write(0, 0, "测站编码")
sheet1.write(0, 1, "日期时间")
sheet1.write(0, 2, "85基准水位(m)")
sheet1.write(0, 3, "表内水位(m)")
sheet1.write(0, 4, "基准面转换关系")
sheet1.write(1, 4, datum)
# 写入测站编码
for row, _ in enumerate(list_of_stage, 1):
sheet1.write(row, 0, station_code)
# 写入日期时间
for row, date_time in enumerate(list_of_date_time, 1):
sheet1.write(row, 1, date_time)
# 85水位
for row, stage in enumerate(list_of_stage_85, 1):
sheet1.write(row, 2, stage)
# 表内水位
for row, stage in enumerate(list_of_stage, 1):
sheet1.write(row, 3, stage)
output.save(os.path.splitext(file_name)[0] + "NewStyle" + ".xls")
# 主函数
if __name__=="__main__":
print("===本程序将洪水水位摘录表转换为ArcHIGH能接受的形式NewStyle===")
# 该目录下所有文件名
list_of_files = []
list_of_files = get_file_name(os.getcwd())
#print(list_of_files)
for file_name in list_of_files:
if "洪水水位摘录表.xls" in file_name:
transform(file_name)
print("===转换完毕,请查看...洪水水位摘录表NewStyle.xls文件===")
import xlrd
import xlwt
import os
import re
# 输出该目录下所有子文件目录
def get_file_name(file_dir):
list_of_files = []
for root, dirs, files in os.walk(file_dir):
#print(root) #当前目录路径
#print(dirs) #当前路径下所有子目录
#print(files) #当前路径下所有非目录子文件
for file in files:
#print(os.path.join(root, file))
list_of_files.append(os.path.join(root, file))
return list_of_files
# 用列表推导去除列表空白列
def remove_space(list_of_data, index_of_space):
list_of_no_space_data = []
list_of_no_space_data = [data for index, data in enumerate(list_of_data) if index not in index_of_space]
return list_of_no_space_data
# 从洪水水文要素摘录表转换成archigh能接受的形式
def transform(file_name):
data = xlrd.open_workbook(file_name)
table = data.sheets()[0]
# 年份
year = table.cell(2, 2).value
# 测站编码
station_code = table.cell(2, 6).value
# 从相应逐日平均水位表读取基准面转换关系
data1 = xlrd.open_workbook(file_name.replace("洪水水文要素摘录表(二要素)", "逐日平均水位表"))
table1 = data1.sheets()[0]
# 表内水位与85基准水位差值
stage_change = eval(table1.cell(2, 9).value)
# 基准面转换关系
datum = table1.cell(2, 6).value + table1.cell(2, 9).value\
+ table1.cell(2, 10).value + table1.cell(2, 11).value
# 页数
pages = int(re.findall("\d+", table.cell(2, 18).value.split()[0])[0])
# 月份
list_of_month = []
# 日期
list_of_day = []
# 时间 时:分
list_of_time = []
# 水位
list_of_stage = []
# 流量
list_of_discharge = []
# 按照页码和列循环读取日期、水位和流量
for page in range(0, pages):
for col in range(0, 4):
list_of_month += table.col_values(0 + 5 * col)[5 + 55 * page : 5 + 50 + 55 * page]
list_of_day += table.col_values(1 + 5 * col)[5 + 55 * page : 5 + 50 + 55 * page]
list_of_time += table.col_values(2 + 5 * col)[5 + 55 * page : 5 + 50 + 55 * page]
list_of_stage += table.col_values(3 + 5 * col)[5 + 55 * page : 5 + 50 + 55 * page]
list_of_discharge += table.col_values(4 + 5 * col)[5 + 55 * page : 5 + 50 + 55 * page]
# 去除掉最后一页的空白行
# 记录空白行的索引
index_of_space = []
for index, stage in enumerate(list_of_stage):
if(stage == ""):
index_of_space.append(index)
# 用列表推导删除月份、日期、时间、水位的最后一页空白行
list_of_month = remove_space(list_of_month, index_of_space)
list_of_day = remove_space(list_of_day, index_of_space)
list_of_time = remove_space(list_of_time, index_of_space)
list_of_stage = remove_space(list_of_stage, index_of_space)
list_of_discharge = remove_space(list_of_discharge, index_of_space)
# 处理日期时间
list_of_date_time = []
# 记录当前月份和日期
current_month = "0"
current_day = "0"
for month, day, time in zip(list_of_month, list_of_day, list_of_time):
# 只有需要更新日期时才有可能要更新月份信息
if(day != ""):
current_day = day
if(month != ""):
current_month = month
# 给出日期时间格式
if(type(time) == float):
time = str(int(time)) + ":00"
date_time = "{}-{}-{} {}:00".format(int(year), int(current_month), int(current_day), time)
# 记录日期时间到列表中以便于输出
list_of_date_time.append(date_time)
# 补齐缺少整数位的水位数值
stage_int = '0' # 水位整数位
for index, stage in enumerate(list_of_stage):
if '.' in stage:
stage_int = stage.split('.')[0]
else:
stage = stage_int + '.' + stage
list_of_stage[index] = stage
# 计算85基准水位
list_of_stage_85 = [str(round(eval(stage) + stage_change, 3)) for stage in list_of_stage]
# 输出
output = xlwt.Workbook()
sheet1 = output.add_sheet("洪水水文要素摘录")
sheet1.write(0, 0, "测站编码")
sheet1.write(0, 1, "日期时间")
sheet1.write(0, 2, "85基准水位(m)")
sheet1.write(0, 3, "流量(m3/s)")
sheet1.write(0, 4, "表内水位(m)")
sheet1.write(0, 5, "基准面转换关系")
sheet1.write(1, 5, datum)
# 写入测站编码
for row, _ in enumerate(list_of_stage, 1):
sheet1.write(row, 0, station_code)
# 写入日期时间
for row, date_time in enumerate(list_of_date_time, 1):
sheet1.write(row, 1, date_time)
# 85水位
for row, stage in enumerate(list_of_stage_85, 1):
sheet1.write(row, 2, stage)
# 写入流量
for row, discharge in enumerate(list_of_discharge, 1):
sheet1.write(row, 3, discharge)
# 写入表内水位
for row, stage in enumerate(list_of_stage, 1):
sheet1.write(row, 4, stage)
output.save(os.path.splitext(file_name)[0] + "NewStyle" + ".xls")
# 主函数
if __name__=="__main__":
print("===本程序将洪水水文要素摘录表(二要素)转换为ArcHIGH能接受的形式NewStyle===")
# 该目录下所有文件名
list_of_files = []
list_of_files = get_file_name(os.getcwd())
#print(list_of_files)
for file_name in list_of_files:
if "洪水水文要素摘录表(二要素).xls" in file_name:
transform(file_name)
print("===转换完毕,请查看...洪洪水水文要素摘录表(二要素)NewStyle.xls文件===")
import xlrd
import xlwt
import os
#====将所有的NewStyle提取到一张汇总表上====
# 输出该目录下所有子文件目录
def get_file_name(file_dir):
list_of_files = []
for root, dirs, files in os.walk(file_dir):
#print(root) #当前目录路径
#print(dirs) #当前路径下所有子目录
#print(files) #当前路径下所有非目录子文件
for file in files:
#print(os.path.join(root, file))
list_of_files.append(os.path.join(root, file))
return list_of_files
def extract_to_summary(file_name, sheet, cols, sheet_num):
data = xlrd.open_workbook(file_name)
table = data.sheets()[0]
# 当前表含有记录数
rows_num = table.nrows - 1
# 对需要提取列遍历
for i in range(0, cols):
col_data = table.col_values(i)[1:]
# 写入汇总表中
for row, col_cell_data in enumerate(col_data, 1 + rows_sum[sheet_num]):
sheet.write(row, i, col_cell_data)
rows_sum[sheet_num] += rows_num
# 主函数
if __name__=="__main__":
print("===本程序将所有的NewStyle提取到一张水文数据汇总表上===")
# 该目录下所有文件名
list_of_files = []
list_of_files = get_file_name(os.getcwd())
# 输出
# 输出表格总行数
rows_sum = [0, 0, 0, 0, 0, 0]
# 创建输出表
output = xlwt.Workbook()
sheet1 = output.add_sheet("逐日降水量")
sheet2 = output.add_sheet("逐日平均水位")
sheet3 = output.add_sheet("逐日水面蒸发量")
sheet4 = output.add_sheet("降水量摘录")
sheet5 = output.add_sheet("洪水水位摘录")
sheet6 = output.add_sheet("洪水水文要素摘录")
sheet1.write(0, 0, "STCD")
sheet1.write(0, 1, "TMBEG")
sheet1.write(0, 2, "TMEND")
sheet1.write(0, 3, "R")
sheet2.write(0, 0, "STCD")
sheet2.write(0, 1, "TM")
sheet2.write(0, 2, "Z")
sheet3.write(0, 0, "STCD")
sheet3.write(0, 1, "TM")
sheet3.write(0, 2, "E")
sheet4.write(0, 0, "STCD")
sheet4.write(0, 1, "TMBEG")
sheet4.write(0, 2, "TMEND")
sheet4.write(0, 3, "R")
sheet5.write(0, 0, "STCD")
sheet5.write(0, 1, "TM")
sheet5.write(0, 2, "Z")
sheet6.write(0, 0, "STCD")
sheet6.write(0, 1, "TM")
sheet6.write(0, 2, "Z")
sheet6.write(0, 3, "Q")
list_of_cols = [4, 3, 3, 4, 3, 4]
list_of_sheets = [sheet1, sheet2, sheet3, sheet4, sheet5, sheet6]
list_of_sheet_names = ["逐日降水量表", "逐日平均水位表", "逐日水面蒸发量表", "降水量摘录表", "洪水水位摘录表", "洪水水文要素摘录表(二要素)"]
for file_name in list_of_files:
for index, sheet_name in enumerate(list_of_sheet_names):
if sheet_name + "NewStyle" in file_name:
extract_to_summary(file_name, list_of_sheets[index], list_of_cols[index], index)
output.save("水文数据汇总表.xls")
print("===转换完毕,请查看水文数据汇总表.xls文件===")
import xlrd
import xlwt
import os
# 输出该目录下所有子文件目录
def get_file_name(file_dir):
list_of_files = []
for root, dirs, files in os.walk(file_dir):
#print(root) #当前目录路径
#print(dirs) #当前路径下所有子目录
#print(files) #当前路径下所有非目录子文件
for file in files:
#print(os.path.join(root, file))
list_of_files.append(file)
return list_of_files
# 主函数
if __name__=="__main__":
print("===打印目录中所有子文件===")
# 该目录下所有文件名
list_of_files = []
list_of_files = get_file_name(os.getcwd())
list_of_origin_files = [file for file in list_of_files if "NewStyle" not in file]
print('\n'.join(list_of_origin_files))
# 输出
output = xlwt.Workbook()
sheet1 = output.add_sheet("目录名")
sheet1.write(0, 0, "目录名")
for row, file in enumerate(list_of_origin_files, 1):
sheet1.write(row, 0, file)
output.save("目录名.xls")
本地目录:笔记本电脑D:\Projects\数据清洗\杭州南排\第一次任务\提供杭州林水局
,办公室电脑E:\Projects\数据清洗\杭州南排