index.ts 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229
  1. /**
  2. * @description: HTML 解析器,将 HTML 字符串转换为微信小程序 JSON 结构
  3. * @author: ML 1940694428@qq.com
  4. * @date: 2025/03/31
  5. */
  6. interface ViewNode {
  7. type: "view" | "text" | "image";
  8. text?: string;
  9. src?: string;
  10. css?: Record<string, string>;
  11. views?: ViewNode[];
  12. }
  13. interface VirtualElement {
  14. tagName: string;
  15. attributes: Record<string, string>;
  16. children: (VirtualElement | VirtualText)[];
  17. }
  18. interface VirtualText {
  19. text: string;
  20. }
  21. export default class HtmlParser {
  22. private screenWidth?: number;
  23. constructor(screenWidth?: number) {
  24. this.screenWidth = screenWidth;
  25. }
  26. /**
  27. * 过滤 HTML 字符串中的指定内容
  28. * @param html 原始 HTML 字符串
  29. * @param filters 过滤规则(键值对,例如 { '&nbsp;': '' })
  30. * @returns 过滤后的新字符串
  31. */
  32. public filterHtmlString(html: string, filters: Record<string, string>): string {
  33. let filteredHtml = html;
  34. for (const [key, value] of Object.entries(filters)) {
  35. const regex = new RegExp(key, "g"); // 全局匹配 key
  36. filteredHtml = filteredHtml.replace(regex, value);
  37. }
  38. return filteredHtml;
  39. }
  40. /**
  41. * 解析 HTML 到微信小程序 JSON 结构
  42. * @param html 原始 HTML 字符串
  43. * @returns 转换后的 JSON 结构
  44. */
  45. public parseHtmlToJson(html: string): ViewNode[] {
  46. const tempHtml = this.filterHtmlString(html, {
  47. '&nbsp;': ' '
  48. });
  49. const tempDiv = this.createElement(tempHtml);
  50. return this.parseElement(tempDiv);
  51. }
  52. /**
  53. * 创建一个虚拟的 DOM 结构
  54. * @param html HTML 字符串
  55. * @returns 虚拟的 DOM 结构
  56. */
  57. private createElement(html: string): VirtualElement {
  58. const div: VirtualElement = { tagName: "div", attributes: {}, children: [] };
  59. const stack: VirtualElement[] = [div];
  60. const re = /<([a-zA-Z]+)([^>]*?)\/?>|<\/([a-zA-Z]+)>|([^<]+)/g;
  61. let match: RegExpExecArray | null;
  62. while ((match = re.exec(html))) {
  63. try {
  64. if (match[1]) {
  65. // 开始标签或自闭合标签
  66. const tagName = match[1].toLowerCase();
  67. const attributes = this.parseAttributes(match[2]);
  68. const element: VirtualElement = { tagName, attributes, children: [] };
  69. stack[stack.length - 1].children.push(element);
  70. if (!match[0].endsWith('/>')) {
  71. stack.push(element);
  72. }
  73. } else if (match[3]) {
  74. // 结束标签
  75. if (stack.length === 1) {
  76. throw new Error(`Unexpected closing tag: ${match[3]}`);
  77. }
  78. stack.pop();
  79. } else if (match[4]) {
  80. // 文本节点
  81. const text = match[4].trim();
  82. if (text) {
  83. const textNode: VirtualText = { text };
  84. stack[stack.length - 1].children.push(textNode);
  85. }
  86. }
  87. } catch (error) {
  88. console.error(error.message);
  89. }
  90. }
  91. if (stack.length > 1) {
  92. console.error("Unclosed tags detected");
  93. }
  94. return div;
  95. }
  96. /**
  97. * 解析 HTML 元素的属性
  98. * @param attributeString 属性字符串
  99. * @returns 属性对象
  100. */
  101. private parseAttributes(attributeString: string): Record<string, string> {
  102. const attributes: Record<string, string> = {};
  103. const re = /([a-zA-Z\-]+)="([^"]*)"/g;
  104. let match: RegExpExecArray | null;
  105. while ((match = re.exec(attributeString))) {
  106. attributes[match[1]] = match[2];
  107. }
  108. return attributes;
  109. }
  110. /**
  111. * 递归解析虚拟 DOM 元素
  112. * @param element 虚拟 DOM 节点
  113. * @returns JSON 结构
  114. */
  115. private parseElement(element: VirtualElement): ViewNode[] {
  116. const result: ViewNode[] = [];
  117. for (const node of element.children) {
  118. if ('text' in node) {
  119. result.push({ type: "text", text: node.text });
  120. } else {
  121. const tagName = node.tagName;
  122. const styles = this.parseInlineStyle(node.attributes.style || "");
  123. const children = this.parseElement(node);
  124. // 生成 JSON 结构
  125. let parsedNode: ViewNode = { type: "view", css: styles, views: children };
  126. // 当前获取的标签
  127. let _tagName = tagName;
  128. //受支持的标签
  129. const supportedTags = ["p", "div", "span", "strong", "em", "code", "img"];
  130. // 如果标签不受支持,则直接转为 div
  131. if (!supportedTags.includes(_tagName)) _tagName = "div";
  132. switch (_tagName) {
  133. case "p":
  134. parsedNode.css = {
  135. display: "block",
  136. wordWrap: "break-word",
  137. wordBreak: "break-word",
  138. whiteSpace: "normal",
  139. maxWidth: this.screenWidth ? `${this.screenWidth}px` : "100%",
  140. ...styles,
  141. };
  142. break;
  143. case "div":
  144. parsedNode.type = "view";
  145. break;
  146. case "span":
  147. case "strong":
  148. case "em":
  149. parsedNode.type = "text";
  150. parsedNode.text = node.children.map((child: VirtualElement | VirtualText) => 'text' in child ? child.text : '').join("");
  151. delete parsedNode.views;
  152. break;
  153. case "code":
  154. parsedNode.type = "view";
  155. parsedNode.css = {
  156. display: "block",
  157. whiteSpace: "pre-wrap",
  158. wordWrap: "break-word",
  159. wordBreak: "break-word",
  160. overflow: "auto",
  161. color: "#333",
  162. border: "1px solid #f0f0f0",
  163. backgroundColor: "#f8f8f8",
  164. padding: "10px",
  165. borderRadius: "4px",
  166. ...styles,
  167. };
  168. break;
  169. case "img":
  170. parsedNode.type = "image";
  171. parsedNode.src = node.attributes.src || "";
  172. delete parsedNode.views;
  173. break;
  174. case "font":
  175. result.push(...children);
  176. continue;
  177. default:
  178. continue;
  179. }
  180. result.push(parsedNode);
  181. }
  182. }
  183. return result;
  184. }
  185. /**
  186. * 解析内联样式
  187. * @param styleString CSS 样式字符串
  188. * @returns JSON 格式的样式对象
  189. */
  190. private parseInlineStyle(styleString: string): Record<string, string> {
  191. const styles: Record<string, string> = {};
  192. styleString.split(";").forEach((style) => {
  193. const [key, value] = style.split(":").map((s) => s.trim());
  194. if (key && value) {
  195. styles[this.camelCase(key)] = value;
  196. }
  197. });
  198. return styles;
  199. }
  200. /**
  201. * 转换 CSS 属性名为驼峰命名
  202. */
  203. private camelCase(input: string): string {
  204. return input.replace(/-([a-z])/g, (_, letter) => letter.toUpperCase());
  205. }
  206. }