博客
关于我
强烈建议你试试无所不能的chatGPT,快点击我
修改了一个HTML2Markdown 函数
阅读量:6827 次
发布时间:2019-06-26

本文共 7706 字,大约阅读时间需要 25 分钟。

最初的版本是从这里找来的:

我给它加了一个过滤掉span、font的功能,并能把表格合理地转换成Markdown编码。

javascriptvar toMarkdown=function(string){    var ELEMENTS=[        {patterns:'p',replacement:function(str,attrs,innerHTML){return innerHTML?'\n\n'+innerHTML+'\n':'';}},        {patterns:'br',type:'void',replacement:'\n'},        {patterns:'h([1-6])',replacement:function(str,hLevel,attrs,innerHTML){var hPrefix='';for(var i=0;i
<.*?>/ig,'')+'`':'';}}, // 下面这部分是我自己写的。 {patterns:'table',replacement:function(str,attrs,innerHTML){ innerHTML=innerHTML.replace(/<\/tr>/ig,'\r\n').replace(/
]+)?>/ig,'|').replace(/<\/t[hd]>/ig,'|').replace(/
]+)?>/ig,'').replace(/<\/?tbody[^>]*?>/ig,'').replace(/
]+)?>([\s\S]*?)\n<\/thead>/ig,function(all,a,b){ return b+b.replace(/[^|]+/g,'--').replace('|--|','|')+'|\n'; }); return '\n'+innerHTML+'\n'; }}, {patterns:['font','span'],replacement:function(str,attrs,innerHTML){return innerHTML;}}, // 上面的内容是我自己写的 {patterns:'pre',replacement:function(str,attrs,innerHTML){return innerHTML?'```\n'+innerHTML+'\n```\n':''}}, {patterns:'img',type:'void',replacement:function(str,attrs,innerHTML){ var src=attrs.match(attrRegExp('src')),alt=attrs.match(attrRegExp('alt')),title=attrs.match(attrRegExp('title')); return'!['+(alt&&alt[1]?alt[1]:'')+']'+'('+src[1]+(title&&title[1]?' "'+title[1]+'"':'')+')';} } ]; for(var i=0,len=ELEMENTS.length;i
]*)\\/?>':'<'+elProperties.tag+'\\b([^>]*)>([\\s\\S]*?)<\\/'+elProperties.tag+'>',regex=new RegExp(pattern,'gi'),markdown=''; if(typeof elProperties.replacement==='string'){ markdown=html.replace(regex,elProperties.replacement); } else{ markdown=html.replace(regex,function(str,p1,p2,p3){ return elProperties.replacement.call(this,str,p1,p2,p3); }); } return markdown; } function attrRegExp(attr){ return new RegExp(attr+'\\s*=\\s*["\']?([^"\']*)["\']?','i'); } string=string.replace(/
]*>`([\s\S]*)`<\/pre>/gi,function(str,innerHTML){ innerHTML=innerHTML.replace(/^\t+/g,' '); innerHTML=innerHTML.replace(/\n/g,'\n '); return'\n\n '+innerHTML+'\n'; }); string=string.replace(/^(\s{0,3}\d+)\. /g,'$1\\. '); var noChildrenRegex=/<(ul|ol)\b[^>]*>(?:(?!
<\/\1>/gi; while(string.match(noChildrenRegex)){ string=string.replace(noChildrenRegex,function(str){return replaceLists(str);}); } function replaceLists(html){ html=html.replace(/<(ul|ol)\b[^>]*>([\s\S]*?)<\/\1>/gi,function(str,listType,innerHTML){ var lis=innerHTML.split(''); lis.splice(lis.length-1,1); for(i=0,len=lis.length;i
]*>([\s\S]*)/i,function(str,innerHTML){ innerHTML=innerHTML.replace(/^\s+/,''); innerHTML=innerHTML.replace(/\n\n/g,'\n\n '); innerHTML=innerHTML.replace(/\n([ ]*)+(\*|\d+\.) /g,'\n$1 $2 '); return prefix+innerHTML; }); } } return lis.join('\n'); }); return'\n\n'+html.replace(/[ \t]+\n|\s+$/g,''); } var deepest=/
]*>((?:(?!
<\/blockquote>/gi; while(string.match(deepest)){ string=string.replace(deepest,function(str){return replaceBlockquotes(str);}); } function replaceBlockquotes(html){ html=html.replace(/
]*>([\s\S]*?)<\/blockquote>/gi,function(str,inner){ inner=inner.replace(/^\s+|\s+$/g,''); inner=cleanUp(inner); inner=inner.replace(/^/gm,'> '); inner=inner.replace(/^(>([ \t]{2,}>)+)/gm,'> >'); return inner; }); return html; } function cleanUp(string){ string=string.replace(/^[\t\r\n]+|[\t\r\n]+$/g,''); string=string.replace(/\n\s+\n/g,'\n\n'); string=string.replace(/\n{3,}/g,'\n\n'); return string; } return cleanUp(string); };if(typeof exports==='object'){exports.toMarkdown=toMarkdown;}

稍微又做了点修改,让没有表头的表格也能正常地转换成MarkDown。

javascriptvar toMarkdown=function(string){    var ELEMENTS=[        {patterns:'p',replacement:function(str,attrs,innerHTML){return innerHTML?'\n\n'+innerHTML+'\n':'';}},        {patterns:'br',isvoid:true,replacement:'\n'},        {patterns:'h([1-6])',replacement:function(str,hLevel,attrs,innerHTML){var hPrefix='';for(var i=0;i
<.*?>/ig,'')+'`':'';}}, {patterns:'table',replacement:function(str,attrs,innerHTML){ innerHTML=innerHTML.replace(/\r?\n/g,'').replace(/<\/tr>/ig,'\n').replace(/
]+)?>/ig,'|').replace(/<\/t[hd]>/ig,'|').replace(/
]+)?>/ig,'').replace(/<\/?tbody[^>]*?>/ig,'').replace(/<\/?thead[^>]*?>/ig,'').replace(/^(.*?)\n/,function(all,b){ return b+'\n'+b.replace(/[^|]+/g,'--')+'\n'; }); return '\n'+innerHTML+'\n'; }}, //{patterns:['font','span','div','section','article','aside','div','div','div','span'],replacement:function(str,attrs,innerHTML){return innerHTML;}}, {patterns:'pre',replacement:function(str,attrs,innerHTML){return innerHTML?'```\n'+innerHTML+'\n```\n':''}}, {patterns:'img',isvoid:true,replacement:function(str,attrs,innerHTML){ var src=attrs.match(attrRegExp('src')),alt=attrs.match(attrRegExp('alt')),title=attrs.match(attrRegExp('title')); return'!['+(alt&&alt[1]?alt[1]:'')+']'+'('+src[1]+(title&&title[1]?' "'+title[1]+'"':'')+')';} } ]; for(var i=0,len=ELEMENTS.length;i
/ig,''); function replaceEls(html,elProperties){ var pattern=elProperties.isvoid?'<'+elProperties.tag+'\\b([^>]*)\\/?>':'<'+elProperties.tag+'\\b([^>]*)>([\\s\\S]*?)<\\/'+elProperties.tag+'>',regex=new RegExp(pattern,'gi'),markdown=''; if(typeof elProperties.replacement==='string'){ markdown=html.replace(regex,elProperties.replacement); } else{ markdown=html.replace(regex,function(str,p1,p2,p3){ return elProperties.replacement.call(this,str,p1,p2,p3); }); } return markdown; } function attrRegExp(attr){ return new RegExp(attr+'\\s*=\\s*["\']?([^"\']*)["\']?','i'); } string=string.replace(/
]*>`([\s\S]*)`<\/pre>/gi,function(str,innerHTML){ innerHTML=innerHTML.replace(/^\t+/g,' '); innerHTML=innerHTML.replace(/\n/g,'\n '); return'\n\n '+innerHTML+'\n'; }); string=string.replace(/^(\s{0,3}\d+)\. /g,'$1\\. '); var noChildrenRegex=/<(ul|ol)\b[^>]*>(?:(?!
<\/\1>/gi; while(string.match(noChildrenRegex)){ string=string.replace(noChildrenRegex,function(str){return replaceLists(str);}); } function replaceLists(html){ html=html.replace(/<(ul|ol)\b[^>]*>([\s\S]*?)<\/\1>/gi,function(str,listType,innerHTML){ var lis=innerHTML.split(''); lis.splice(lis.length-1,1); for(i=0,len=lis.length;i
]*>([\s\S]*)/i,function(str,innerHTML){ innerHTML=innerHTML.replace(/^\s+/,''); innerHTML=innerHTML.replace(/\n\n/g,'\n\n '); innerHTML=innerHTML.replace(/\n([ ]*)+(\*|\d+\.) /g,'\n$1 $2 '); return prefix+innerHTML; }); } } return lis.join('\n'); }); return'\n\n'+html.replace(/[ \t]+\n|\s+$/g,''); } var deepest=/
]*>((?:(?!
<\/blockquote>/gi; while(string.match(deepest)){ string=string.replace(deepest,function(str){return replaceBlockquotes(str);}); } function replaceBlockquotes(html){ html=html.replace(/
]*>([\s\S]*?)<\/blockquote>/gi,function(str,inner){ inner=inner.replace(/^\s+|\s+$/g,''); inner=cleanUp(inner); inner=inner.replace(/^/gm,'> '); inner=inner.replace(/^(>([ \t]{2,}>)+)/gm,'> >'); return inner; }); return html; } function cleanUp(string){ string=string.replace(/^[\t\r\n]+|[\t\r\n]+$/g,''); string=string.replace(/\n\s+\n/g,'\n\n'); string=string.replace(/\n{3,}/g,'\n\n'); return string; } return cleanUp(string); };if(typeof exports==='object'){exports.toMarkdown=toMarkdown;}

转载地址:http://dhykl.baihongyu.com/

你可能感兴趣的文章
MySQL修改root密码的多种方法
查看>>
硬件的一些性能指标
查看>>
day2-cacti
查看>>
MFC绘图
查看>>
Codevs 1744 格子染色==BZOJ 1296 粉刷匠
查看>>
最小生成树-Prim算法和Kruskal算法
查看>>
(6)dd命令安装Linux
查看>>
机器学习第3课:线性代数回顾(Linear Algebra Review)
查看>>
MathType使用中的四个小技巧
查看>>
ajax
查看>>
【转】淘测试---新时代的测试工程师
查看>>
Leetcode3---Longest Substring Without Repeating Characters
查看>>
upc组队赛17 Bits Reverse【暴力枚举】
查看>>
JavaScript设计模式 策略模式
查看>>
转:java反射详解
查看>>
NFS服务
查看>>
超详细!使用 LVS 实现负载均衡原理及安装配置详解---转
查看>>
一些新面试题的解答
查看>>
软件工程中的图
查看>>
F# ≥ C# (Pattern matching)
查看>>