请参阅:java : pdfbox 读取 PDF文件内书签
请注意:书的目录.txt 编码:UTF-8,推荐用 Notepad++ 转换编码。
npm install elementtree ;
npm install xml2js ;
node.js 用 elementtree读目录.txt文件,用 xml2js 转换为json数据,ejs 生成 jstree模板所需的文件。
编写 txt_etree_json.js 如下
// 读目录.txt文件,用xml2js 转换为json数据,生成jstree所需的文件
let fs = require('fs');
let process = require('process');
let path = require('path');
let readline = require('readline');
let et = require('elementtree');
let XML = et.XML;
let ElementTree = et.ElementTree;
// element = et.Element;
let subElement = et.SubElement;
let xml2js = require('xml2js');
let ejs = require('ejs');
if (process.argv.length <3){
console.error("usage: node txt_etree_json.js file1.txt");
return 1;
}
let file1 = process.argv[2];
if (! fs.statSync(file1).isFile()) {
console.error("it is not File.");
return 2;
}
let ext = path.extname(file1);
if (ext.toLowerCase() != '.txt'){
console.error(ext +" is not .txt");
return 3;
}
let file2 = file1.replace('.txt', '.htm');
console.log(file2);
let fRead = fs.createReadStream(file1);
let fWrite = fs.createWriteStream(file2);
// 创建readline接口实例
let rline = readline.createInterface({
input: fRead,
// output: fWrite,
terminal: true
});
// 用缩排表现层级关系,假设最多5个层级
const indent1 =" ";
const indent2 =" ";
const indent3 =" ";
const indent4 =" ";
var txt, title, root, edge, p_node, node1, node2, node3, node4, node5;
// line 事件
let ln =1;
rline.on('line', function(line){
txt = line.trim();
if (ln ==1){
// 读取第一行:书名
title = txt;
// 创建主题节点
root = et.Element("node");
root.set('id', '1');
root.set('text', txt);
// 定义状态:
state = subElement(root, "state")
state.set("opened", 'true')
state.set("disabled", 'true')
}
txt = txt.slice(0,-3); // 去掉行尾的页数
if (txt.length ==0 || ln==1) ;
else if (txt.length >0 && line.slice(0,1) !=' '){
// 创建主题的子节点(1级节点)
node1 = subElement(root, "children");
node1.set('id', String(ln));
node1.set('text', txt);
p_node = node1; // 寄存父节点
}
else if (line.startsWith(indent1) && line.slice(2,3) !=' '){
// 创建node1的子节点(2级节点)
if (node1) node2 = subElement(node1, "children");
else node2 = subElement(root, "children");
node2.set('id', String(ln));
node2.set('text', txt);
p_node = node2;
}
else if (line.startsWith(indent2) && line.slice(4,5) !=' '){
// 创建node2的子节点(3级节点)
if (node2) node3 = subElement(node2, "children");
else if(node1) node3 = subElement(node1, "children");
else node3 = subElement(root, "children");
node3.set('id', String(ln));
node3.set('text', txt);
p_node = node3;
}
else if (line.startsWith(indent3) && line.slice(6,7) !=' '){
// 创建node3的子节点(4级节点)
if (node3) node4 = subElement(node3, "children");
else if (node2) node4 = subElement(node2, "children");
else if (node1) node4 = subElement(node1, "children");
else node4 = subElement(root, "children");
node4.set('id', String(ln));
node4.set('text', txt);
p_node = node4;
}
else if (line.startsWith(indent4) && line.slice(8,9) !=' '){
// 创建node4的子节点(5级节点)
if (node4) node5 = subElement(node4, "children");
else if (p_node) node5 = subElement(p_node, "children");
else node5 = subElement(root, "children");
node5.set('id', String(ln));
node5.set('text', txt);
}
else {
console.log(txt);
}
ln++;
});
// 官网 www.npmjs.com/package/xml2js 查找 Options
// Parser 用于解析xml为json对象
let parser = new xml2js.Parser({explicitArray:false, mergeAttrs:true});
var etree, xml, json_str;
let filename ="jstree_template.htm";
rline.on('close', function(){
etree = new ElementTree(root);
xml = etree.write({'xml_declaration': false});
parser.parseString(xml, function(err,res){
if (err) throw err;
//console.dir(res);
json_str = '['+ JSON.stringify(res['node'], null,2) +']';
});
// 官网 ejs.bootcss.com/#docs <%- 输出非转义的数据到模板 %>
//ejs.renderFile(filename, data, options, function(err, str){
ejs.renderFile(filename, {title:title, mydir:json_str}, {}, function(err, str){
// str => 输出渲染后的 HTML 字符串
fWrite.write(str);
});
console.log("line number: "+ln);
});
https://gitee.com/ 搜索 jstree 下载
编写 jstree_template.htm 如下
<%= title%>
运行 node txt_etree_json.js your_pdf_dir.txt
生成 your_pdf_dir.htm