| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229 |
- /**
- * @description: HTML 解析器,将 HTML 字符串转换为微信小程序 JSON 结构
- * @author: ML 1940694428@qq.com
- * @date: 2025/03/31
- */
- interface ViewNode {
- type: "view" | "text" | "image";
- text?: string;
- src?: string;
- css?: Record<string, string>;
- views?: ViewNode[];
- }
- interface VirtualElement {
- tagName: string;
- attributes: Record<string, string>;
- children: (VirtualElement | VirtualText)[];
- }
- interface VirtualText {
- text: string;
- }
- export default class HtmlParser {
- private screenWidth?: number;
- constructor(screenWidth?: number) {
- this.screenWidth = screenWidth;
- }
- /**
- * 过滤 HTML 字符串中的指定内容
- * @param html 原始 HTML 字符串
- * @param filters 过滤规则(键值对,例如 { ' ': '' })
- * @returns 过滤后的新字符串
- */
- public filterHtmlString(html: string, filters: Record<string, string>): string {
- let filteredHtml = html;
- for (const [key, value] of Object.entries(filters)) {
- const regex = new RegExp(key, "g"); // 全局匹配 key
- filteredHtml = filteredHtml.replace(regex, value);
- }
- return filteredHtml;
- }
- /**
- * 解析 HTML 到微信小程序 JSON 结构
- * @param html 原始 HTML 字符串
- * @returns 转换后的 JSON 结构
- */
- public parseHtmlToJson(html: string): ViewNode[] {
- const tempHtml = this.filterHtmlString(html, {
- ' ': ' '
- });
- const tempDiv = this.createElement(tempHtml);
- return this.parseElement(tempDiv);
- }
- /**
- * 创建一个虚拟的 DOM 结构
- * @param html HTML 字符串
- * @returns 虚拟的 DOM 结构
- */
- private createElement(html: string): VirtualElement {
- const div: VirtualElement = { tagName: "div", attributes: {}, children: [] };
- const stack: VirtualElement[] = [div];
- const re = /<([a-zA-Z]+)([^>]*?)\/?>|<\/([a-zA-Z]+)>|([^<]+)/g;
- let match: RegExpExecArray | null;
- while ((match = re.exec(html))) {
- try {
- if (match[1]) {
- // 开始标签或自闭合标签
- const tagName = match[1].toLowerCase();
- const attributes = this.parseAttributes(match[2]);
- const element: VirtualElement = { tagName, attributes, children: [] };
- stack[stack.length - 1].children.push(element);
- if (!match[0].endsWith('/>')) {
- stack.push(element);
- }
- } else if (match[3]) {
- // 结束标签
- if (stack.length === 1) {
- throw new Error(`Unexpected closing tag: ${match[3]}`);
- }
- stack.pop();
- } else if (match[4]) {
- // 文本节点
- const text = match[4].trim();
- if (text) {
- const textNode: VirtualText = { text };
- stack[stack.length - 1].children.push(textNode);
- }
- }
- } catch (error) {
- console.error(error.message);
- }
- }
- if (stack.length > 1) {
- console.error("Unclosed tags detected");
- }
- return div;
- }
- /**
- * 解析 HTML 元素的属性
- * @param attributeString 属性字符串
- * @returns 属性对象
- */
- private parseAttributes(attributeString: string): Record<string, string> {
- const attributes: Record<string, string> = {};
- const re = /([a-zA-Z\-]+)="([^"]*)"/g;
- let match: RegExpExecArray | null;
- while ((match = re.exec(attributeString))) {
- attributes[match[1]] = match[2];
- }
- return attributes;
- }
- /**
- * 递归解析虚拟 DOM 元素
- * @param element 虚拟 DOM 节点
- * @returns JSON 结构
- */
- private parseElement(element: VirtualElement): ViewNode[] {
- const result: ViewNode[] = [];
- for (const node of element.children) {
- if ('text' in node) {
- result.push({ type: "text", text: node.text });
- } else {
- const tagName = node.tagName;
- const styles = this.parseInlineStyle(node.attributes.style || "");
- const children = this.parseElement(node);
- // 生成 JSON 结构
- let parsedNode: ViewNode = { type: "view", css: styles, views: children };
- // 当前获取的标签
- let _tagName = tagName;
- //受支持的标签
- const supportedTags = ["p", "div", "span", "strong", "em", "code", "img"];
- // 如果标签不受支持,则直接转为 div
- if (!supportedTags.includes(_tagName)) _tagName = "div";
- switch (_tagName) {
- case "p":
- parsedNode.css = {
- display: "block",
- wordWrap: "break-word",
- wordBreak: "break-word",
- whiteSpace: "normal",
- maxWidth: this.screenWidth ? `${this.screenWidth}px` : "100%",
- ...styles,
- };
- break;
- case "div":
- parsedNode.type = "view";
- break;
- case "span":
- case "strong":
- case "em":
- parsedNode.type = "text";
- parsedNode.text = node.children.map((child: VirtualElement | VirtualText) => 'text' in child ? child.text : '').join("");
- delete parsedNode.views;
- break;
- case "code":
- parsedNode.type = "view";
- parsedNode.css = {
- display: "block",
- whiteSpace: "pre-wrap",
- wordWrap: "break-word",
- wordBreak: "break-word",
- overflow: "auto",
- color: "#333",
- border: "1px solid #f0f0f0",
- backgroundColor: "#f8f8f8",
- padding: "10px",
- borderRadius: "4px",
- ...styles,
- };
- break;
- case "img":
- parsedNode.type = "image";
- parsedNode.src = node.attributes.src || "";
- delete parsedNode.views;
- break;
- case "font":
- result.push(...children);
- continue;
- default:
- continue;
- }
- result.push(parsedNode);
- }
- }
- return result;
- }
- /**
- * 解析内联样式
- * @param styleString CSS 样式字符串
- * @returns JSON 格式的样式对象
- */
- private parseInlineStyle(styleString: string): Record<string, string> {
- const styles: Record<string, string> = {};
- styleString.split(";").forEach((style) => {
- const [key, value] = style.split(":").map((s) => s.trim());
- if (key && value) {
- styles[this.camelCase(key)] = value;
- }
- });
- return styles;
- }
- /**
- * 转换 CSS 属性名为驼峰命名
- */
- private camelCase(input: string): string {
- return input.replace(/-([a-z])/g, (_, letter) => letter.toUpperCase());
- }
- }
|