markdown文档解析器

import pandas as pd
import numpy as np
import os


class markdownParser():
	
	def __init__(self, path):
		"""
		读取md转换的txt文件
		"""
		self.mdPath = path
		self.txtPath = path[:-3] + '.txt'
		
		mdtmp = open(self.mdPath, 'rb')
		txttmp = open(self.txtPath, 'wb')
		txttmp.write(mdtmp.read())
		
		self.file = open(self.txtPath, 'r', encoding='utf-8')
		self.lines = self.file.readlines()
	
	def __del__(self):
		self.file.close()
		os.remove(self.txtPath)
	
	def getDataFrame(self):
		"""
		拿到md文件中存在的表格,返回dataframe格式的数据
		"""
		
		start = -1
		end = -1
		for i, line in enumerate(self.lines):
			if (line[:3] == '| :'): start = i - 1
			if (start != -1 and line[:2] != '| '):
				end = i
		
		if (start == -1):
			raise "No Table"
		
		if (end == -1): end = len(self.lines)
		tableList = self.lines[start:end]
		columns = [item.strip(' ') for item in tableList[0].strip("\n|").split("|")]
		df = pd.DataFrame(columns=columns)
		for line in tableList[2:]:
			line = [item.strip(' ') for item in line.strip("\n|").split("|")]
			df.loc[len(df)] = line
		return df

你可能感兴趣的:(处理脚本,python,numpy)