wordsoap-regex用于清理Microsoft Word脏HTML输出的正则表达式。module.exports = { tmsoTags: /<[/]?(font|span|xml|del|ins|[ovwxp]:w+)[^>]?>/, tmsoAttributes: /<( [^>])(?:class|lang|style|size|face|[ovwxp]:w+)= (?:'[^']'|\"\"[^\"\"]\"\"|[^s>]+)([^>]*)/