栈操作实例－－整理SUMO分类数据

tree_map.py提取原理：

树的结构如下

,实体

, , , , , , ,物质的

, , , , , , , , , , , ,物体

, , , , , , , , , , , , , , , , ,自身连续物体

, , , , , , , , , , , , , , , , , , , , , ,物质

, , , , , , , , , , , , , , , , , , , , , , , , , , ,纯物质

, , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , ,基本物质

, , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , ,金属

, , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , ,原子

, , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , ,次原子粒子

, , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , ,原子核

, , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , ,电子

, , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , ,质子

, , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , ,中子

, , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , ,化合物

, , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , ,水

, , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , ,养份

1.建一个列表，将所有的节点存入treenodeList中；

2.逗号表示树的深度，当深度与大于前一个的时候，将当前层次数和当前所在的ID压入到栈中，并将栈顶元素和当前比较元素添加到tupleList中；

3.当下一个进栈的层次数大于或等于栈顶元素，弹出栈顶元素，直到栈顶元素小于当前层次数时，将当前层次数和当前所在的ID压入到栈中，并将栈顶元素和当前比较元素添加到tupleList中；

4.输出树的最底两层数据即为self.tupleList[i][len(self.tupleList[i])-1][0]，self.tupleList[i][len(self.tupleList[i])-1][1]。

# !/usr/bin/env python
# coding:utf8

#
# Finename: tree_map.py
# Function: 将大分类树中的底层数据
#
# Current version: 1.0
# author: Chen Yu
# Date: 25/9/2012
#

# 栈操作类
class Stack(object):
   def __init__(self):
    self.stack = []
   def push(self,object):
    self.stack.append(object)
   def top(self):
     return self.stack[len(self.stack)-1]
   def pop(self):
     return self.stack.pop()
   def length(self):
     return len(self.stack)

class ExtractTreeunder:
   def __init__(self):
    self.treedataList = []
    self.undertreeDir = {}
    self.treenodeList = []
    self.spacenumList = []
    self.tupleList = [[]]

# 数据的加载和预处理
   def load(self,infile,nodefile):
     try:
       # 加载整棵层次树
      rfile = open(infile, ' r ')
      self.treedataList = rfile.read().split( ' \n ')
      rfile.close()
       for i in range(len(self.treedataList)):
        self.treedataList[i] = self.treedataList[i].split( ' , ')
        self.tupleList.append([[]])
       # 加载所有的叶子节点
      rfile = open(nodefile, ' r ')
      self.treenodeList = rfile.read().split( ' \n ')
      rfile.close()

     except IOError:
       return None

   def extract_tree(self):
     # 统计空格数目
    i = 0
    lens = len(self.treenodeList)
     while(1):
       if i == lens:
         break
      nowflag = 0
       for j in range(len(self.treedataList[i])):
         if self.treedataList[i][j] == ' ':
          nowflag += 1
         else:
           break
      self.spacenumList.append(nowflag)
      i+=1
     # 获取的二元组
    s = Stack()
    i = 1
     # 标记层次
    tuplenum = 0
    s.push([self.spacenumList[0],0])
     while(1):
       if i == lens:
         break
       if i + 2 < lens and self.spacenumList[i-1] < self.spacenumList[i]:
        topdata = s.top()
       #   print self.treenodeList[topdata[1]],'i =',i,self.treenodeList[i]
        s.push([self.spacenumList[i],i])
        self.tupleList[tuplenum].append([self.treenodeList[topdata[1]],self.treenodeList[i]])
       elif i + 2 < lens and self.spacenumList[i-1] >= self.spacenumList[i]:
        tuplenum += 1
         while(1):
           print ' 2 '
          s.pop()
          topdata = s.top()
           if topdata[0] < self.spacenumList[i]:
            s.push([self.spacenumList[i],i])
             print self.treenodeList[topdata[1]], ' i = ',i,self.treenodeList[i]
            self.tupleList[tuplenum].append([self.treenodeList[topdata[1]],self.treenodeList[i]])
             break
      i+=1

   # 分别生成所有的组合，叶子节点，最底两层的数据
   def output(self,outfile,outfile2,outfile3):
     # 取出最低层两层节点
    wfile = open(outfile, ' wa+ ')
    wfile2 = open(outfile2, ' wa+ ')
    wfile3 = open(outfile3, ' wa+ ')
     for i in range(len(self.tupleList)):
       if len(self.tupleList[i][len(self.tupleList[i])-1]) >= 2:
        wfile2.write(self.tupleList[i][len(self.tupleList[i])-1][1] + ' \n ')
        wfile3.write(self.tupleList[i][len(self.tupleList[i])-1][0]+ ' ' + self.tupleList[i][len(self.tupleList[i])-1][1] + ' \n ')
       for j in range(len(self.tupleList[i])):
         print self.tupleList[i][j]
         if len(self.tupleList[i][j])>=2:
          wfile.write(self.tupleList[i][j][0] + ' ' + self.tupleList[i][j][1] + ' \n ')
    wfile.close()
    wfile2.close()

if __name__ == ' __main__ ':
  t = ExtractTreeunder()
  t.load( ' chinatree.txt ', ' chinatreenode.txt ')
  t.extract_tree()
  t.output( ' all_chinanode.txt ', ' treeunder.txt ', ' treeundertuple.txt ')

栈操作实例－－整理SUMO分类数据

你可能感兴趣的:(栈操作实例－－整理SUMO分类数据)