开发环境:
mac10.12+python3.6
我这里示例代码是excel形式的省区市转成json的,excel文件以及代码打包请移步
简单粗暴,直接上代码~
# coding=utf-8
# # 文件位置
# fileLocation = "/Users/luoxiaohui/Desktop/2016.xls"
#
# import xlrd
# import xlwt
#
# bk = xlrd.open_workbook(fileLocation)
# sh = bk.sheet_by_name("2016年行政区划")
# nrows = sh.nrows
# ncols = sh.ncols
# # print(nrows)
# # print(ncols)
#
# filename = xlwt.Workbook()
# sheet = filename.add_sheet("精简版")
#
# for row in range(3, nrows):
# # print("第" + str(row) + "行的数据:" + sh.cell(row, 1).value)
# if str(sh.cell(row, 3).value) == '5.0':
# print("你被剔除了。。。")
# else :
# print("准备写入数据到新表格中。。。")
# for col in range(4):
# sheet.write(row, col, sh.cell(row, col).value)
#
# filename.save("/Users/luoxiaohui/Desktop/new.xls")
# ====================================================狠严肃的分割线============================================================
# 文件位置
fileLocation = "/Users/luoxiaohui/Desktop/new.xls"
import xlrd
bk = xlrd.open_workbook(fileLocation)
sh = bk.sheet_by_name("精简版")
allRows = sh.nrows
allCols = sh.ncols
# 所有省级的行数数组
proviceRows = []
# # 所有市级的行数数组
# cityRows = []
# # 所有区级的行数数组
# areaRows = []
# 构造字典 装载所有数据
allJsonData = []
# 构造字典 装载省级数据
allProvinceData = {}
# 构造字典 装载市级数据
allCityData = {}
# 构造字典 装载县级数据
allAreaData = {}
nrows = 3630
# 获取所有省级,市级,区级的行数数组
for row in range(0, nrows):
# print("第" + str(row) + "行的数据:" + sh.cell(row, 1).value)
areaCode = str(sh.cell(row, 0).value).replace(".0","")
areaType = str(sh.cell(row, 3).value).replace(".0","")
# 首先,获取所有areaType为2的行数
if areaType == "2":
proviceRows.append(row)
for proviceIndex in range(0, len(proviceRows)):
print("============================以下是" + str(
sh.cell(proviceRows[proviceIndex], 1).value) + "的数据=========================")
itemData = {}
provinceCode = str(sh.cell(proviceRows[proviceIndex], 0).value).replace(".0","")
provinceName = str(sh.cell(proviceRows[proviceIndex], 1).value)
provinceType = str(sh.cell(proviceRows[proviceIndex], 3).value).replace(".0","")
province = {}
itemData["provinceCode"] = provinceCode
itemData["provinceName"] = provinceName
itemData["provinceType"] = provinceType
# itemData["province"] = province
# 获取每个省里面省,市,区三个级别所有的总数
if proviceIndex + 1 != len(proviceRows):
count = proviceRows[proviceIndex + 1] - proviceRows[proviceIndex]
print("count-->" + str(count))
else:
count = nrows - proviceRows[proviceIndex]
# 保存每个省份中所有市级所在的行数索引
cityRows = []
# 获取每个省级中所有areaType是3,也即市级的索引,城市名,areaType数据
for row in range(proviceRows[proviceIndex], proviceRows[proviceIndex] + count):
areaType = str(sh.cell(row, 3).value).replace(".0","")
areaCode = str(sh.cell(row, 0).value).replace(".0","")
cityName = str(sh.cell(row, 1).value)
print("areaType-->" + areaType + ";row-->" + str(row))
if areaType == "3":
cityRows.append(row)
cities = []
for cityIndex in range(len(cityRows)):
print(sh.cell(cityRows[cityIndex], 1).value + "下的县区级数据有:")
cityType = str(sh.cell(cityRows[cityIndex], 3).value).replace(".0","")
cityCode = str(sh.cell(cityRows[cityIndex], 0).value).replace(".0","")
cityName = str(sh.cell(cityRows[cityIndex], 1).value)
city = {}
city["cityType"] = cityType
city["cityCode"] = cityCode
city["cityName"] = cityName
if (cityIndex + 1) != len(cityRows):
areaCount = cityRows[cityIndex + 1] - cityRows[cityIndex] - 1
else:
# 如果是最后一个的话:
if len(cityRows) == 1:
# 如果只有一个市级城市,用城市总数减去一个省行数,减去一个市行数
areaCount = count - 2
else:
# 最后一个市级中的区级城市总数=下一个省级城市的索引减去本省中最后一个市级城市的索引
if proviceIndex != len(proviceRows) - 1:
areaCount = proviceRows[proviceIndex + 1] - cityRows[len(cityRows) - 1] - 1
areas = []
# 保存每个市级中所有区县级所在的行数索引
areaRows = []
# 获取每个市级中所有areaType是4,也即区县级的索引,城市名,areaType的数据
for row in range(cityRows[cityIndex], cityRows[cityIndex] + areaCount + 1):
if row < nrows:
areaType = str(sh.cell(row, 3).value).replace(".0","")
areaCode = str(sh.cell(row, 0).value).replace(".0","")
areaName = str(sh.cell(row, 1).value)
if areaType == "4":
print(areaName)
area = {}
area["areaType"] = areaType
area["areaCode"] = areaCode
area["areaName"] = areaName
areas.append(area)
city["areas"] = areas
cities.append(city)
itemData["cities"] = cities
allJsonData.append(itemData)
# 对香港和澳门特殊处理,因为他俩只有3,4两级,木有2级
proviceRows = []
# 获取所有省级,市级,区级的行数数组
for row in range(nrows, allRows):
areaCode = str(sh.cell(row, 0).value).replace(".0","")
areaType = str(sh.cell(row, 3).value).replace(".0","")
# 首先,获取所有areaType为3的行数
if areaType == "3":
print("row-->" + str(row))
proviceRows.append(row)
for proviceIndex in range(0, len(proviceRows)):
itemData = {}
provinceCode = str(sh.cell(proviceRows[proviceIndex], 0).value).replace(".0","")
provinceName = str(sh.cell(proviceRows[proviceIndex], 1).value)
provinceType = str(sh.cell(proviceRows[proviceIndex], 3).value).replace(".0","")
print("============================以下是" + str(
sh.cell(proviceRows[proviceIndex], 1).value) + "的数据=========================")
province = {}
itemData["provinceCode"] = provinceCode
itemData["provinceName"] = provinceName
itemData["provinceType"] = provinceType
# itemData["province"] = province
# 获取每个省里面省,市两个级别所有的总数
if proviceIndex + 1 != len(proviceRows):
count = proviceRows[proviceIndex + 1] - proviceRows[proviceIndex]
else:
count = allRows - proviceRows[proviceIndex]
# 保存每个省份中所有市级所在的行数索引
cities = []
city = {}
city["cityType"] = provinceType
city["cityCode"] = provinceCode
city["cityName"] = provinceName
areas = []
# 获取每个省级中所有areaType是3,也即市级的索引,城市名,areaType数据
for row in range(proviceRows[proviceIndex], proviceRows[proviceIndex] + count):
area = {}
areaType = str(sh.cell(row, 3).value).replace(".0","")
areaCode = str(sh.cell(row, 0).value).replace(".0","")
areaName = str(sh.cell(row, 1).value)
if areaType == "4":
print(areaName)
area["areaType"] = areaType
area["areaCode"] = areaCode
area["areaName"] = areaName
areas.append(area)
# city其实只有一个,所以放在这里
city["areas"] = areas
cities.append(city)
itemData["cities"] = cities
allJsonData.append(itemData)
import json
data = json.dumps(allJsonData, ensure_ascii=False, default="utf-8")
def save(filename, contents):
fh = open(filename, 'w')
fh.write(contents)
fh.close()
save('/Users/luoxiaohui/Desktop/new.txt', data)