栈操作实例--整理SUMO分类数据
tree_map.py提取原理:
树的结构如下
,实体
, , , , , , ,物质的
, , , , , , , , , , , ,物体
, , , , , , , , , , , , , , , , ,自身连续物体
, , , , , , , , , , , , , , , , , , , , , ,物质
, , , , , , , , , , , , , , , , , , , , , , , , , , ,纯物质
, , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , ,基本物质
, , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , ,金属
, , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , ,原子
, , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , ,次原子粒子
, , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , ,原子核
, , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , ,电子
, , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , ,质子
, , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , ,中子
, , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , ,化合物
, , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , ,水
, , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , ,养份
1.建一个列表,将所有的节点存入treenodeList中;
2.逗号表示树的深度,当深度与大于前一个的时候,将当前层次数和当前所在的ID压入到栈中,并将栈顶元素和当前比较元素添加到tupleList中;
3.当下一个进栈的层次数大于或等于栈顶元素,弹出栈顶元素,直到栈顶元素小于当前层次数时,将当前层次数和当前所在的ID压入到栈中,并将栈顶元素和当前比较元素添加到tupleList中;
4.输出树的最底两层数据即为self.tupleList[i][len(self.tupleList[i])-1][0],self.tupleList[i][len(self.tupleList[i])-1][1]。
#
!/usr/bin/env python
# coding:utf8
#
# Finename: tree_map.py
# Function: 将大分类树中的底层数据
#
# Current version: 1.0
# author: Chen Yu
# Date: 25/9/2012
#
# 栈操作类
class Stack(object):
def __init__(self):
self.stack = []
def push(self,object):
self.stack.append(object)
def top(self):
return self.stack[len(self.stack)-1]
def pop(self):
return self.stack.pop()
def length(self):
return len(self.stack)
class ExtractTreeunder:
def __init__(self):
self.treedataList = []
self.undertreeDir = {}
self.treenodeList = []
self.spacenumList = []
self.tupleList = [[]]
# 数据的加载和预处理
def load(self,infile,nodefile):
try:
# 加载整棵层次树
rfile = open(infile, ' r ')
self.treedataList = rfile.read().split( ' \n ')
rfile.close()
for i in range(len(self.treedataList)):
self.treedataList[i] = self.treedataList[i].split( ' , ')
self.tupleList.append([[]])
# 加载所有的叶子节点
rfile = open(nodefile, ' r ')
self.treenodeList = rfile.read().split( ' \n ')
rfile.close()
except IOError:
return None
def extract_tree(self):
# 统计空格数目
i = 0
lens = len(self.treenodeList)
while(1):
if i == lens:
break
nowflag = 0
for j in range(len(self.treedataList[i])):
if self.treedataList[i][j] == ' ':
nowflag += 1
else:
break
self.spacenumList.append(nowflag)
i+=1
# 获取的二元组
s = Stack()
i = 1
# 标记层次
tuplenum = 0
s.push([self.spacenumList[0],0])
while(1):
if i == lens:
break
if i + 2 < lens and self.spacenumList[i-1] < self.spacenumList[i]:
topdata = s.top()
# print self.treenodeList[topdata[1]],'i =',i,self.treenodeList[i]
s.push([self.spacenumList[i],i])
self.tupleList[tuplenum].append([self.treenodeList[topdata[1]],self.treenodeList[i]])
elif i + 2 < lens and self.spacenumList[i-1] >= self.spacenumList[i]:
tuplenum += 1
while(1):
print ' 2 '
s.pop()
topdata = s.top()
if topdata[0] < self.spacenumList[i]:
s.push([self.spacenumList[i],i])
print self.treenodeList[topdata[1]], ' i = ',i,self.treenodeList[i]
self.tupleList[tuplenum].append([self.treenodeList[topdata[1]],self.treenodeList[i]])
break
i+=1
# 分别生成所有的组合,叶子节点,最底两层的数据
def output(self,outfile,outfile2,outfile3):
# 取出最低层两层节点
wfile = open(outfile, ' wa+ ')
wfile2 = open(outfile2, ' wa+ ')
wfile3 = open(outfile3, ' wa+ ')
for i in range(len(self.tupleList)):
if len(self.tupleList[i][len(self.tupleList[i])-1]) >= 2:
wfile2.write(self.tupleList[i][len(self.tupleList[i])-1][1] + ' \n ')
wfile3.write(self.tupleList[i][len(self.tupleList[i])-1][0]+ ' ' + self.tupleList[i][len(self.tupleList[i])-1][1] + ' \n ')
for j in range(len(self.tupleList[i])):
print self.tupleList[i][j]
if len(self.tupleList[i][j])>=2:
wfile.write(self.tupleList[i][j][0] + ' ' + self.tupleList[i][j][1] + ' \n ')
wfile.close()
wfile2.close()
if __name__ == ' __main__ ':
t = ExtractTreeunder()
t.load( ' chinatree.txt ', ' chinatreenode.txt ')
t.extract_tree()
t.output( ' all_chinanode.txt ', ' treeunder.txt ', ' treeundertuple.txt ')
# coding:utf8
#
# Finename: tree_map.py
# Function: 将大分类树中的底层数据
#
# Current version: 1.0
# author: Chen Yu
# Date: 25/9/2012
#
# 栈操作类
class Stack(object):
def __init__(self):
self.stack = []
def push(self,object):
self.stack.append(object)
def top(self):
return self.stack[len(self.stack)-1]
def pop(self):
return self.stack.pop()
def length(self):
return len(self.stack)
class ExtractTreeunder:
def __init__(self):
self.treedataList = []
self.undertreeDir = {}
self.treenodeList = []
self.spacenumList = []
self.tupleList = [[]]
# 数据的加载和预处理
def load(self,infile,nodefile):
try:
# 加载整棵层次树
rfile = open(infile, ' r ')
self.treedataList = rfile.read().split( ' \n ')
rfile.close()
for i in range(len(self.treedataList)):
self.treedataList[i] = self.treedataList[i].split( ' , ')
self.tupleList.append([[]])
# 加载所有的叶子节点
rfile = open(nodefile, ' r ')
self.treenodeList = rfile.read().split( ' \n ')
rfile.close()
except IOError:
return None
def extract_tree(self):
# 统计空格数目
i = 0
lens = len(self.treenodeList)
while(1):
if i == lens:
break
nowflag = 0
for j in range(len(self.treedataList[i])):
if self.treedataList[i][j] == ' ':
nowflag += 1
else:
break
self.spacenumList.append(nowflag)
i+=1
# 获取的二元组
s = Stack()
i = 1
# 标记层次
tuplenum = 0
s.push([self.spacenumList[0],0])
while(1):
if i == lens:
break
if i + 2 < lens and self.spacenumList[i-1] < self.spacenumList[i]:
topdata = s.top()
# print self.treenodeList[topdata[1]],'i =',i,self.treenodeList[i]
s.push([self.spacenumList[i],i])
self.tupleList[tuplenum].append([self.treenodeList[topdata[1]],self.treenodeList[i]])
elif i + 2 < lens and self.spacenumList[i-1] >= self.spacenumList[i]:
tuplenum += 1
while(1):
print ' 2 '
s.pop()
topdata = s.top()
if topdata[0] < self.spacenumList[i]:
s.push([self.spacenumList[i],i])
print self.treenodeList[topdata[1]], ' i = ',i,self.treenodeList[i]
self.tupleList[tuplenum].append([self.treenodeList[topdata[1]],self.treenodeList[i]])
break
i+=1
# 分别生成所有的组合,叶子节点,最底两层的数据
def output(self,outfile,outfile2,outfile3):
# 取出最低层两层节点
wfile = open(outfile, ' wa+ ')
wfile2 = open(outfile2, ' wa+ ')
wfile3 = open(outfile3, ' wa+ ')
for i in range(len(self.tupleList)):
if len(self.tupleList[i][len(self.tupleList[i])-1]) >= 2:
wfile2.write(self.tupleList[i][len(self.tupleList[i])-1][1] + ' \n ')
wfile3.write(self.tupleList[i][len(self.tupleList[i])-1][0]+ ' ' + self.tupleList[i][len(self.tupleList[i])-1][1] + ' \n ')
for j in range(len(self.tupleList[i])):
print self.tupleList[i][j]
if len(self.tupleList[i][j])>=2:
wfile.write(self.tupleList[i][j][0] + ' ' + self.tupleList[i][j][1] + ' \n ')
wfile.close()
wfile2.close()
if __name__ == ' __main__ ':
t = ExtractTreeunder()
t.load( ' chinatree.txt ', ' chinatreenode.txt ')
t.extract_tree()
t.output( ' all_chinanode.txt ', ' treeunder.txt ', ' treeundertuple.txt ')