请移步到我的Blog,获得更好的阅读体验!本文的链接请点这里
起因
我的博客系统的文章是直接使用gitbook保存的markdown文档,后端使用marked.js来解析markdown文档。
因为gitbook比较优秀,markdown进行了语法扩展,拿gitbook写文章也就更爽了。但是,这样问题就出现了,marked.js无法解析gitbook扩展的语法,所以我们就需要手动去解析这些扩展的markdown语法。
marked.js如何扩展语法?
想要扩展语法,也就需要深入了解marked.js的工作机制。
marked.js的工作机制:
创建Lexer(词法解析器)实例:
const lexer = new marked.Lexer()
lexer将markdown字符串解析成tokens(tokens是官方文档的叫法,按照我的理解应该是markdown节点(node)):
const tokens = lexer.lex(md)
接下来创建Parser(解析器)实例:
const parser = new marked.Parser()
调用parser.parse()来解析tokens,生成html字符串:
const html = parser.parse(tokens)
到这里,marked.js的工作机制就完了。
那么,我们需要修改的部分也就很清晰了,我们需要修改lexer的生成tokens函数、parser的解析函数以及渲染函数。
如何修改Lexer生成token的函数?
我看了marked.js的源码发现,lexer生成token的函数是一个名为token的函数,这个函数的代码是这样的:
marked.Lexer.prototype.token = function (src, top) {
src = src.replace(/^ +$/gm, '');
var next,
// ...;
while (src) {
// newline
if (cap = this.rules.newline.exec(src)) {
src = src.substring(cap[0].length);
if (cap[0].length > 1) {
this.tokens.push({
type: 'space'
});
}
}
// ...
}
//...
}
也就是,lexer用while循环来使用正则表达式进行判断是否符合语法,如果符合就push到tokens,然后切割字符串进行下次循环。我们就如法炮制一条正则表达式和循环中的代码块:
const customRules = {
hint: /^{% hint style="([a-z]+)" %}\n(.*)?(\n{% endhint %})/
}
// 将自定义的rule添加到词法解析器
marked.Lexer.rules = Object.assign(marked.Lexer.rules,customRules)
const lexer = new marked.Lexer();
// 源代码是merge了新的block rules,所以需要在实例上再添加一次
/*
block.normal = merge({}, block);
function Lexer(options) {
this.tokens = [];
this.tokens.links = Object.create(null);
this.options = options || marked.defaults;
this.rules = block.normal;
}
*/
Object.assign(lexer.rules,customRules)
// 重写词法解析器
marked.Lexer.prototype.token = function (src, top) {
src = src.replace(/^ +$/gm, '');
var next,
//...;
while (src) {
// newline
if (cap = this.rules.newline.exec(src)) {
src = src.substring(cap[0].length);
if (cap[0].length > 1) {
this.tokens.push({
type: 'space'
});
}
}
// 我们扩展的hint语法
// hint
if (cap = this.rules.hint.exec(src)) {
var lastToken = this.tokens[this.tokens.length - 1];
src = src.substring(cap[0].length);
this.tokens.push({
type: 'hint',
state:cap[1],
text: cap[2]
});
continue;
}
}
//...
这样,词法解析器就修改完成了,接下来我们需要修改Parser的解析函数用来生成对应hint语法的html
如何修改Parser解析?
依然是翻源码,找到parser的解析函数:tok
marked.Parser.prototype.tok = function() {
switch (this.token.type) {
case 'space': {
return '';
}
case 'hr': {
return this.renderer.hr();
}
//...
}
//...
}
我们需要在switch中增加一个case:
marked.Parser.prototype.tok = function() {
switch (this.token.type) {
case 'space': {
return '';
}
case 'hint': {
return this.renderer.hint(this.token.state, this.token.text);
}
case 'hr': {
return this.renderer.hr();
}
//...
}
//...
}
这里的`this.renderer.hint`调用的是渲染器的函数,也就是让marked.js知道怎么渲染hint语法:
// 新建渲染器
const renderer = new marked.Renderer()
const hintState={
info:``,
warning: ``,
danger: ``,
success: ``
}
// 自定义语法的渲染函数
renderer.hint = (state, text) => {
return `
${text.replace(/\n/g,'
')}
`
}
最后,将renderer传递给marked.js:
marked.setOptions({ renderer })
这样,我们就新扩展了markdown的语法啦
例:
const md = `{% hint style="info" %}
1
{% endhint %}
{% hint style="warning" %}
2
{% endhint %}
{% hint style="danger" %}
3
{% endhint %}
{% hint style="success" %}
4
{% endhint %}`
marked(md)
展示出来就是这个样子:
{% hint style="warning" %}
上面的代码是不完整的,下面贴上完整的代码
{% endhint %}
import marked from 'marked'
import hljs from 'highlight.js'
const customRules = {
hint: /^{% hint style="([a-z]+)" %}\n(.*)?(\n{% endhint %})/
}
// 将自定义的rule添加到词法解析器
marked.Lexer.rules = Object.assign(marked.Lexer.rules,customRules)
const lexer = new marked.Lexer();
// 源代码是merge了新的block rules,所以需要在实例上再添加一次
/*
block.normal = merge({}, block);
function Lexer(options) {
this.tokens = [];
this.tokens.links = Object.create(null);
this.options = options || marked.defaults;
this.rules = block.normal;
}
*/
Object.assign(lexer.rules,customRules)
// 重写词法解析器
marked.Lexer.prototype.token = function (src, top) {
src = src.replace(/^ +$/gm, '');
var next,
loose,
cap,
bull,
b,
item,
listStart,
listItems,
t,
space,
i,
tag,
l,
isordered,
istask,
ischecked;
while (src) {
// newline
if (cap = this.rules.newline.exec(src)) {
src = src.substring(cap[0].length);
if (cap[0].length > 1) {
this.tokens.push({
type: 'space'
});
}
}
// hint
if (cap = this.rules.hint.exec(src)) {
var lastToken = this.tokens[this.tokens.length - 1];
src = src.substring(cap[0].length);
this.tokens.push({
type: 'hint',
state:cap[1],
text: cap[2]
});
continue;
}
// code
if (cap = this.rules.code.exec(src)) {
var lastToken = this.tokens[this.tokens.length - 1];
src = src.substring(cap[0].length);
// An indented code block cannot interrupt a paragraph.
if (lastToken && lastToken.type === 'paragraph') {
lastToken.text += '\n' + cap[0].trimRight();
} else {
cap = cap[0].replace(/^ {4}/gm, '');
this.tokens.push({
type: 'code',
codeBlockStyle: 'indented',
text: !this.options.pedantic
? rtrim(cap, '\n')
: cap
});
}
continue;
}
// fences
if (cap = this.rules.fences.exec(src)) {
src = src.substring(cap[0].length);
this.tokens.push({
type: 'code',
lang: cap[2] ? cap[2].trim() : cap[2],
text: cap[3] || ''
});
continue;
}
// heading
if (cap = this.rules.heading.exec(src)) {
src = src.substring(cap[0].length);
this.tokens.push({
type: 'heading',
depth: cap[1].length,
text: cap[2]
});
continue;
}
// table no leading pipe (gfm)
if (cap = this.rules.nptable.exec(src)) {
item = {
type: 'table',
header: splitCells(cap[1].replace(/^ *| *\| *$/g, '')),
align: cap[2].replace(/^ *|\| *$/g, '').split(/ *\| */),
cells: cap[3] ? cap[3].replace(/\n$/, '').split('\n') : []
};
if (item.header.length === item.align.length) {
src = src.substring(cap[0].length);
for (i = 0; i < item.align.length; i++) {
if (/^ *-+: *$/.test(item.align[i])) {
item.align[i] = 'right';
} else if (/^ *:-+: *$/.test(item.align[i])) {
item.align[i] = 'center';
} else if (/^ *:-+ *$/.test(item.align[i])) {
item.align[i] = 'left';
} else {
item.align[i] = null;
}
}
for (i = 0; i < item.cells.length; i++) {
item.cells[i] = splitCells(item.cells[i], item.header.length);
}
this.tokens.push(item);
continue;
}
}
// hr
if (cap = this.rules.hr.exec(src)) {
src = src.substring(cap[0].length);
this.tokens.push({
type: 'hr'
});
continue;
}
// blockquote
if (cap = this.rules.blockquote.exec(src)) {
src = src.substring(cap[0].length);
this.tokens.push({
type: 'blockquote_start'
});
cap = cap[0].replace(/^ *> ?/gm, '');
// Pass `top` to keep the current
// "toplevel" state. This is exactly
// how markdown.pl works.
this.token(cap, top);
this.tokens.push({
type: 'blockquote_end'
});
continue;
}
// list
if (cap = this.rules.list.exec(src)) {
src = src.substring(cap[0].length);
bull = cap[2];
isordered = bull.length > 1;
listStart = {
type: 'list_start',
ordered: isordered,
start: isordered ? +bull : '',
loose: false
};
this.tokens.push(listStart);
// Get each top-level item.
cap = cap[0].match(this.rules.item);
listItems = [];
next = false;
l = cap.length;
i = 0;
for (; i < l; i++) {
item = cap[i];
// Remove the list item's bullet
// so it is seen as the next token.
space = item.length;
item = item.replace(/^ *([*+-]|\d+\.) */, '');
// Outdent whatever the
// list item contains. Hacky.
if (~item.indexOf('\n ')) {
space -= item.length;
item = !this.options.pedantic
? item.replace(new RegExp('^ {1,' + space + '}', 'gm'), '')
: item.replace(/^ {1,4}/gm, '');
}
// Determine whether the next list item belongs here.
// Backpedal if it does not belong in this list.
if (i !== l - 1) {
const bullet = /(?:[*+-]|\d{1,9}\.)/
b = bullet.exec(cap[i + 1])[0];
if (bull.length > 1 ? b.length === 1
: (b.length > 1 || (this.options.smartLists && b !== bull))) {
src = cap.slice(i + 1).join('\n') + src;
i = l - 1;
}
}
// Determine whether item is loose or not.
// Use: /(^|\n)(?! )[^\n]+\n\n(?!\s*$)/
// for discount behavior.
loose = next || /\n\n(?!\s*$)/.test(item);
if (i !== l - 1) {
next = item.charAt(item.length - 1) === '\n';
if (!loose) loose = next;
}
if (loose) {
listStart.loose = true;
}
// Check for task list items
istask = /^\[[ xX]\] /.test(item);
ischecked = undefined;
if (istask) {
ischecked = item[1] !== ' ';
item = item.replace(/^\[[ xX]\] +/, '');
}
t = {
type: 'list_item_start',
task: istask,
checked: ischecked,
loose: loose
};
listItems.push(t);
this.tokens.push(t);
// Recurse.
this.token(item, false);
this.tokens.push({
type: 'list_item_end'
});
}
if (listStart.loose) {
l = listItems.length;
i = 0;
for (; i < l; i++) {
listItems[i].loose = true;
}
}
this.tokens.push({
type: 'list_end'
});
continue;
}
// html
if (cap = this.rules.html.exec(src)) {
src = src.substring(cap[0].length);
this.tokens.push({
type: this.options.sanitize
? 'paragraph'
: 'html',
pre: !this.options.sanitizer
&& (cap[1] === 'pre' || cap[1] === 'script' || cap[1] === 'style'),
text: this.options.sanitize ? (this.options.sanitizer ? this.options.sanitizer(cap[0]) : escape(cap[0])) : cap[0]
});
continue;
}
// def
if (top && (cap = this.rules.def.exec(src))) {
src = src.substring(cap[0].length);
if (cap[3]) cap[3] = cap[3].substring(1, cap[3].length - 1);
tag = cap[1].toLowerCase().replace(/\s+/g, ' ');
if (!this.tokens.links[tag]) {
this.tokens.links[tag] = {
href: cap[2],
title: cap[3]
};
}
continue;
}
// table (gfm)
if (cap = this.rules.table.exec(src)) {
item = {
type: 'table',
header: splitCells(cap[1].replace(/^ *| *\| *$/g, '')),
align: cap[2].replace(/^ *|\| *$/g, '').split(/ *\| */),
cells: cap[3] ? cap[3].replace(/\n$/, '').split('\n') : []
};
if (item.header.length === item.align.length) {
src = src.substring(cap[0].length);
for (i = 0; i < item.align.length; i++) {
if (/^ *-+: *$/.test(item.align[i])) {
item.align[i] = 'right';
} else if (/^ *:-+: *$/.test(item.align[i])) {
item.align[i] = 'center';
} else if (/^ *:-+ *$/.test(item.align[i])) {
item.align[i] = 'left';
} else {
item.align[i] = null;
}
}
for (i = 0; i < item.cells.length; i++) {
item.cells[i] = splitCells(
item.cells[i].replace(/^ *\| *| *\| *$/g, ''),
item.header.length);
}
this.tokens.push(item);
continue;
}
}
// lheading
if (cap = this.rules.lheading.exec(src)) {
src = src.substring(cap[0].length);
this.tokens.push({
type: 'heading',
depth: cap[2].charAt(0) === '=' ? 1 : 2,
text: cap[1]
});
continue;
}
// top-level paragraph
if (top && (cap = this.rules.paragraph.exec(src))) {
src = src.substring(cap[0].length);
this.tokens.push({
type: 'paragraph',
text: cap[1].charAt(cap[1].length - 1) === '\n'
? cap[1].slice(0, -1)
: cap[1]
});
continue;
}
// text
if (cap = this.rules.text.exec(src)) {
// Top-level should never reach here.
src = src.substring(cap[0].length);
this.tokens.push({
type: 'text',
text: cap[0]
});
continue;
}
if (src) {
throw new Error('Infinite loop on byte: ' + src.charCodeAt(0));
}
}
return this.tokens;
}
// 重写解析器
marked.Parser.prototype.tok = function() {
switch (this.token.type) {
case 'space': {
return '';
}
case 'hint': {
return this.renderer.hint(this.token.state, this.token.text);
}
case 'hr': {
return this.renderer.hr();
}
case 'heading': {
return this.renderer.heading(
this.inline.output(this.token.text),
this.token.depth,
unescape(this.inlineText.output(this.token.text)),
this.slugger);
}
case 'code': {
return this.renderer.code(this.token.text,
this.token.lang,
this.token.escaped);
}
case 'table': {
var header = '',
body = '',
i,
row,
cell,
j;
// header
cell = '';
for (i = 0; i < this.token.header.length; i++) {
cell += this.renderer.tablecell(
this.inline.output(this.token.header[i]),
{ header: true, align: this.token.align[i] }
);
}
header += this.renderer.tablerow(cell);
for (i = 0; i < this.token.cells.length; i++) {
row = this.token.cells[i];
cell = '';
for (j = 0; j < row.length; j++) {
cell += this.renderer.tablecell(
this.inline.output(row[j]),
{ header: false, align: this.token.align[j] }
);
}
body += this.renderer.tablerow(cell);
}
return this.renderer.table(header, body);
}
case 'blockquote_start': {
body = '';
while (this.next().type !== 'blockquote_end') {
body += this.tok();
}
return this.renderer.blockquote(body);
}
case 'list_start': {
body = '';
var ordered = this.token.ordered,
start = this.token.start;
while (this.next().type !== 'list_end') {
body += this.tok();
}
return this.renderer.list(body, ordered, start);
}
case 'list_item_start': {
body = '';
var loose = this.token.loose;
var checked = this.token.checked;
var task = this.token.task;
if (this.token.task) {
body += this.renderer.checkbox(checked);
}
while (this.next().type !== 'list_item_end') {
body += !loose && this.token.type === 'text'
? this.parseText()
: this.tok();
}
return this.renderer.listitem(body, task, checked);
}
case 'html': {
// TODO parse inline content if parameter markdown=1
return this.renderer.html(this.token.text);
}
case 'paragraph': {
return this.renderer.paragraph(this.inline.output(this.token.text));
}
case 'text': {
return this.renderer.paragraph(this.parseText());
}
default: {
var errMsg = 'Token with "' + this.token.type + '" type was not found.';
if (this.options.silent) {
console.log(errMsg);
} else {
throw new Error(errMsg);
}
}
}
};
// 新建渲染器
const renderer = new marked.Renderer()
const hintState={
info:``,
warning: ``,
danger: ``,
success: ``
}
// 自定义语法的渲染函数
renderer.hint = (state, text) => {
return `
${text.replace(/\n/g,'
')}
`
}
// code高亮
renderer.code = (code, language, isEscaped) => {
const isMarkup = language === 'markup'
let hled
if (isMarkup) {
hled = hljs.highlightAuto(code).value;
} else {
hled = hljs.highlight(language, code).value
}
return `
${hled}
`
}
renderer.listitem = (body, task, checked) => {
let className = ''
if(task){
className = 'task-item'
}
return `${body} \n`;
}
marked.setOptions({ renderer })
export default marked