- 你应该只处理两种类型的顶级标记:“节点”和“道路”
- “节点”和“道路”应该转换为常规键值对,以下情况除外:
- CREATED 数组中的属性应该添加到键“created”下
- 经纬度属性应该添加到“pos”数组中,以用于地理空间索引编制。确保“pos”数组中的值是浮点型,不是字符串。
- 如果二级标记“k”值包含存在问题的字符,则应忽略
- 如果二级标记“k”值以“addr:”开头,则应添加到字典“address”中
- 如果二级标记“k”值不是以“addr:”开头,但是包含“:”,你可以按照自己认为最合适的方式进行处理。例如,你可以将其拆分为二级字典,例如包含“addr:”,或者转换“:”以创建有效的键。
- 如果有第二个用于区分街道类型/方向的“:”,则应该忽略该标记,例如
lower = re.compile(r'^([a-z]|_)*$')
lower_colon = re.compile(r'^([a-z]|_)*:([a-z]|_)*$')
problemchars = re.compile(r'[=\+/&<>;\'"\?%#$@\,\. \t\r\n]')
CREATED = [ "version", "changeset", "timestamp", "user", "uid"]
def shape_element(element):
node = {}
pos = []
node_refs = []
created = {}
address = {}
if element.tag == "node" or element.tag == "way" :
temp = {}
temp = element.attrib
if element.tag == "node":
node['id'] = temp['id']
node['type'] = element.tag
try:
node['visible'] = temp['visible']
except:pass
pos.append(float(temp['lat']))
pos.append(float(temp['lon']))
node['pos'] = pos
created[CREATED[0]] = temp['version']
created[CREATED[1]] = temp['changeset']
created[CREATED[2]] = temp['timestamp']
created[CREATED[3]] = temp['user']
created[CREATED[4]] = temp['uid']
node['created'] = created
for item1 in element:
if re.search(problemchars,item1.get('k')) == None:
if re.search('addr:',item1.get('k')) == None:
if re.search('amenity',item1.get('k')) != None:
node['amenity'] = item1.get('v')
elif re.search('cuisine',item1.get('k')) != None:
node['cuisine'] = item1.get('v')
elif re.search('name',item1.get('k')) != None:
node['name'] = item1.get('v')
elif re.search('phone',item1.get('k')) != None:
node['phone'] = item1.get('v')
else:pass
else:
if re.search(r'housenumber$',item1.get('k')) != None:
address['housenumber'] = item1.get('v')
elif re.search(r'postcode$',item1.get('k')) != None:
address['postcode'] = item1.get('v')
elif re.search(r'street$',item1.get('k')) != None:
address['street'] = item1.get('v')
else:pass
else:
print(re.search(problemchars,item1.get('k')))
continue
if len(address) != 0:
node['address'] = address
else:
pass
else:
node['id'] = temp['id']
node['type'] = element.tag
node['visible'] = temp['visible']
created[CREATED[0]] = temp['version']
created[CREATED[1]] = temp['changeset']
created[CREATED[2]] = temp['timestamp']
created[CREATED[3]] = temp['user']
created[CREATED[3]] = temp['uid']
node['created'] = created
for item in element:
if item.tag == 'nd':
node_refs.append(item.get('ref'))
else:
if re.search('addr:',item.get('k')) == None:
pass
else:
if re.search(r'housenumber$',item.get('k')) != None:
address['housenumber'] = item.get('v')
elif re.search(r'postcode$',item.get('k')) != None:
address['postcode'] = item.get('v')
elif re.search(r'street$',item.get('k')) != None:
address['street'] = item.get('v')
print(address['street'])
else:pass
node['node_refs'] = node_refs
if len(address) != 0:
node['address'] = address
else:
pass
#print(item.tag)
#print("el",temp)
#print('node',node)
# YOUR CODE HERE
return node
else:
return None
def process_map(file_in, pretty = False):
# You do not need to change this file
file_out = "{0}.json".format(file_in)
data = []
with codecs.open(file_out, "w") as fo:
for _, element in ET.iterparse(file_in):
el = shape_element(element)
if el:
data.append(el)
if pretty:
fo.write(json.dumps(el, indent=2)+"\n")
else:
fo.write(json.dumps(el) + "\n")
return data
def test():
# NOTE: if you are running this code on your computer, with a larger dataset,
# call the process_map procedure with pretty=False. The pretty=True option adds
# additional spaces to the output, making it significantly larger.
data = process_map('example.osm', True)
pprint.pprint(data[-1])
correct_first_elem = {
"id": "261114295",
"visible": "true",
"type": "node",
"pos": [41.9730791, -87.6866303],
"created": {
"changeset": "11129782",
"user": "bbmiller",
"version": "7",
"uid": "451048",
"timestamp": "2012-03-28T18:31:23Z"
}
}
assert data[0] == correct_first_elem
assert data[-1]["address"] == {
"street": "West Lexington St.",
"housenumber": "1412"
}
assert data[-1]["node_refs"] == [ "2199822281", "2199822390", "2199822392", "2199822369",
"2199822370", "2199822284", "2199822281"]
if __name__ == "__main__":
test()