/**
* @description: HTML 解析器,将 HTML 字符串转换为微信小程序 JSON 结构
* @author: ML 1940694428@qq.com
* @date: 2025/03/31
*/
interface ViewNode {
type: "view" | "text" | "image";
text?: string;
src?: string;
css?: Record;
views?: ViewNode[];
}
interface VirtualElement {
tagName: string;
attributes: Record;
children: (VirtualElement | VirtualText)[];
}
interface VirtualText {
text: string;
}
export default class HtmlParser {
private screenWidth?: number;
constructor(screenWidth?: number) {
this.screenWidth = screenWidth;
}
/**
* 过滤 HTML 字符串中的指定内容
* @param html 原始 HTML 字符串
* @param filters 过滤规则(键值对,例如 { ' ': '' })
* @returns 过滤后的新字符串
*/
public filterHtmlString(html: string, filters: Record): string {
let filteredHtml = html;
for (const [key, value] of Object.entries(filters)) {
const regex = new RegExp(key, "g"); // 全局匹配 key
filteredHtml = filteredHtml.replace(regex, value);
}
return filteredHtml;
}
/**
* 解析 HTML 到微信小程序 JSON 结构
* @param html 原始 HTML 字符串
* @returns 转换后的 JSON 结构
*/
public parseHtmlToJson(html: string): ViewNode[] {
const tempHtml = this.filterHtmlString(html, {
' ': ' '
});
const tempDiv = this.createElement(tempHtml);
return this.parseElement(tempDiv);
}
/**
* 创建一个虚拟的 DOM 结构
* @param html HTML 字符串
* @returns 虚拟的 DOM 结构
*/
private createElement(html: string): VirtualElement {
const div: VirtualElement = { tagName: "div", attributes: {}, children: [] };
const stack: VirtualElement[] = [div];
const re = /<([a-zA-Z]+)([^>]*?)\/?>|<\/([a-zA-Z]+)>|([^<]+)/g;
let match: RegExpExecArray | null;
while ((match = re.exec(html))) {
try {
if (match[1]) {
// 开始标签或自闭合标签
const tagName = match[1].toLowerCase();
const attributes = this.parseAttributes(match[2]);
const element: VirtualElement = { tagName, attributes, children: [] };
stack[stack.length - 1].children.push(element);
if (!match[0].endsWith('/>')) {
stack.push(element);
}
} else if (match[3]) {
// 结束标签
if (stack.length === 1) {
throw new Error(`Unexpected closing tag: ${match[3]}`);
}
stack.pop();
} else if (match[4]) {
// 文本节点
const text = match[4].trim();
if (text) {
const textNode: VirtualText = { text };
stack[stack.length - 1].children.push(textNode);
}
}
} catch (error) {
console.error(error.message);
}
}
if (stack.length > 1) {
console.error("Unclosed tags detected");
}
return div;
}
/**
* 解析 HTML 元素的属性
* @param attributeString 属性字符串
* @returns 属性对象
*/
private parseAttributes(attributeString: string): Record {
const attributes: Record = {};
const re = /([a-zA-Z\-]+)="([^"]*)"/g;
let match: RegExpExecArray | null;
while ((match = re.exec(attributeString))) {
attributes[match[1]] = match[2];
}
return attributes;
}
/**
* 递归解析虚拟 DOM 元素
* @param element 虚拟 DOM 节点
* @returns JSON 结构
*/
private parseElement(element: VirtualElement): ViewNode[] {
const result: ViewNode[] = [];
for (const node of element.children) {
if ('text' in node) {
result.push({ type: "text", text: node.text });
} else {
const tagName = node.tagName;
const styles = this.parseInlineStyle(node.attributes.style || "");
const children = this.parseElement(node);
// 生成 JSON 结构
let parsedNode: ViewNode = { type: "view", css: styles, views: children };
// 当前获取的标签
let _tagName = tagName;
//受支持的标签
const supportedTags = ["p", "div", "span", "strong", "em", "code", "img"];
// 如果标签不受支持,则直接转为 div
if (!supportedTags.includes(_tagName)) _tagName = "div";
switch (_tagName) {
case "p":
parsedNode.css = {
display: "block",
wordWrap: "break-word",
wordBreak: "break-word",
whiteSpace: "normal",
maxWidth: this.screenWidth ? `${this.screenWidth}px` : "100%",
...styles,
};
break;
case "div":
parsedNode.type = "view";
break;
case "span":
case "strong":
case "em":
parsedNode.type = "text";
parsedNode.text = node.children.map((child: VirtualElement | VirtualText) => 'text' in child ? child.text : '').join("");
delete parsedNode.views;
break;
case "code":
parsedNode.type = "view";
parsedNode.css = {
display: "block",
whiteSpace: "pre-wrap",
wordWrap: "break-word",
wordBreak: "break-word",
overflow: "auto",
color: "#333",
border: "1px solid #f0f0f0",
backgroundColor: "#f8f8f8",
padding: "10px",
borderRadius: "4px",
...styles,
};
break;
case "img":
parsedNode.type = "image";
parsedNode.src = node.attributes.src || "";
delete parsedNode.views;
break;
case "font":
result.push(...children);
continue;
default:
continue;
}
result.push(parsedNode);
}
}
return result;
}
/**
* 解析内联样式
* @param styleString CSS 样式字符串
* @returns JSON 格式的样式对象
*/
private parseInlineStyle(styleString: string): Record {
const styles: Record = {};
styleString.split(";").forEach((style) => {
const [key, value] = style.split(":").map((s) => s.trim());
if (key && value) {
styles[this.camelCase(key)] = value;
}
});
return styles;
}
/**
* 转换 CSS 属性名为驼峰命名
*/
private camelCase(input: string): string {
return input.replace(/-([a-z])/g, (_, letter) => letter.toUpperCase());
}
}