216 lines
5.8 KiB
TypeScript
216 lines
5.8 KiB
TypeScript
// DOM extraction logic for Playwright-based HTML dumps.
|
|
// Runs in the browser context via page.evaluate.
|
|
|
|
export type ExportedElement = {
|
|
el: string;
|
|
x: string;
|
|
t?: string;
|
|
l?: string;
|
|
p?: string;
|
|
n?: string;
|
|
i?: string;
|
|
d?: string;
|
|
r?: string;
|
|
h?: string;
|
|
};
|
|
|
|
export const extractDom = (): ExportedElement[] => {
|
|
const MAX_TEXT = 60;
|
|
|
|
const clean = (t: string | null): string | null =>
|
|
t ? t.replace(/\s+/g, " ").trim().slice(0, MAX_TEXT) : null;
|
|
|
|
const isDynamicId = (id: string | null): boolean =>
|
|
!!id && (id.includes(":-") || /:[a-z0-9]+:/i.test(id));
|
|
|
|
const shortTag = (t: string): string =>
|
|
({
|
|
BUTTON: "bu",
|
|
A: "a",
|
|
INPUT: "in",
|
|
SELECT: "s",
|
|
TEXTAREA: "ta",
|
|
DIV: "d",
|
|
SPAN: "sp",
|
|
} as Record<string, string>)[t] || t.toLowerCase();
|
|
|
|
const isNoiseClass = (c: string): boolean =>
|
|
!c ||
|
|
c.length < 3 ||
|
|
/^css-/.test(c) ||
|
|
/^[a-z0-9]{6,}$/i.test(c) ||
|
|
/^\\w{1,3}-\\w{4,}$/.test(c);
|
|
|
|
const isStableDataAttr = (name: string, value: string | null): boolean =>
|
|
name === "data-testid" ||
|
|
name === "data-modal-component" ||
|
|
name === "data-modal-id" ||
|
|
(value != null &&
|
|
value.length <= 40 &&
|
|
!/\\s/.test(value) &&
|
|
/^[a-z0-9_.:-]+$/i.test(value));
|
|
|
|
const siblingIndex = (node: Element) => {
|
|
const parent = node.parentElement;
|
|
if (!parent) return { idx: 0, count: 1 };
|
|
|
|
const siblings = Array.from(parent.children).filter(
|
|
(n) => (n as Element).tagName === node.tagName
|
|
);
|
|
const idx = siblings.indexOf(node);
|
|
return { idx, count: siblings.length };
|
|
};
|
|
|
|
const getPaths = (el: Element): { full: string; key: string } => {
|
|
const parts: string[] = [];
|
|
const keyParts: string[] = [];
|
|
let node: Node | null = el;
|
|
|
|
while (node && node.nodeType === 1 && node !== document.body) {
|
|
const element = node as Element;
|
|
const tag = shortTag(element.tagName);
|
|
|
|
const id = element.id || null;
|
|
const attrNames = element.getAttributeNames
|
|
? element.getAttributeNames()
|
|
: [];
|
|
|
|
const attrs: string[] = [];
|
|
const keyAttrs: string[] = [];
|
|
|
|
if (id) {
|
|
const idChunk = "#" + id;
|
|
attrs.push(idChunk);
|
|
keyAttrs.push(idChunk);
|
|
}
|
|
|
|
for (const a of attrNames) {
|
|
if (!a.startsWith("data-")) continue;
|
|
const v = element.getAttribute(a);
|
|
if (!isStableDataAttr(a, v)) continue;
|
|
const attrChunk = "[" + a + "=" + v + "]";
|
|
attrs.push(attrChunk);
|
|
if (
|
|
a === "data-testid" ||
|
|
a === "data-modal-component" ||
|
|
a === "data-modal-id"
|
|
) {
|
|
keyAttrs.push(attrChunk);
|
|
}
|
|
}
|
|
|
|
const role = element.getAttribute ? element.getAttribute("role") : null;
|
|
if (role) {
|
|
const roleChunk = "[r=" + role + "]";
|
|
attrs.push(roleChunk);
|
|
keyAttrs.push(roleChunk);
|
|
}
|
|
|
|
let chunk = tag;
|
|
let keyChunk = tag;
|
|
|
|
if (attrs.length > 0) {
|
|
chunk += attrs.join("");
|
|
} else {
|
|
let cls = Array.from(element.classList).filter(
|
|
(c) => !isNoiseClass(c)
|
|
);
|
|
if (cls.length > 2) cls = cls.slice(0, 2);
|
|
if (!cls.length) cls = ["c0"];
|
|
const clsChunk = "." + cls.join(".");
|
|
chunk += clsChunk;
|
|
}
|
|
|
|
if (keyAttrs.length > 0) {
|
|
keyChunk += keyAttrs.join("");
|
|
} else {
|
|
let cls = Array.from(element.classList).filter(
|
|
(c) => !isNoiseClass(c)
|
|
);
|
|
if (cls.length > 2) cls = cls.slice(0, 2);
|
|
if (!cls.length) cls = ["c0"];
|
|
keyChunk += "." + cls.join(".");
|
|
}
|
|
|
|
parts.unshift(chunk);
|
|
keyParts.unshift(keyChunk);
|
|
|
|
node = element.parentNode;
|
|
}
|
|
|
|
return { full: parts.join(">"), key: keyParts.join(">") };
|
|
};
|
|
|
|
const items: ExportedElement[] = [];
|
|
const seenStructure = new Set<string>();
|
|
const seen = new Map<string, number>();
|
|
|
|
const addItem = (o: ExportedElement & { keyPath: string }) => {
|
|
const structureKey = o.el + "|" + o.keyPath;
|
|
if (seenStructure.has(structureKey)) return;
|
|
seenStructure.add(structureKey);
|
|
|
|
const keyParts = [o.el, o.x];
|
|
if (o.t) keyParts.push("t=" + o.t);
|
|
if (o.l) keyParts.push("l=" + o.l);
|
|
if (o.p) keyParts.push("p=" + o.p);
|
|
if (o.n) keyParts.push("n=" + o.n);
|
|
if (o.i) keyParts.push("i=" + o.i);
|
|
if (o.d) keyParts.push("d=" + o.d);
|
|
if (o.r) keyParts.push("r=" + o.r);
|
|
const key = keyParts.join("|");
|
|
const prev = seen.get(key) || 0;
|
|
|
|
if (prev > 0) {
|
|
let hVal = 0;
|
|
const str = key + "#" + prev;
|
|
for (let i = 0; i < str.length; i++) {
|
|
hVal = (hVal * 31 + str.charCodeAt(i)) >>> 0;
|
|
}
|
|
const hex = (hVal & 0xfff).toString(16).padStart(3, "0");
|
|
o.h = hex;
|
|
}
|
|
|
|
seen.set(key, prev + 1);
|
|
|
|
const { keyPath, ...rest } = o;
|
|
items.push(rest);
|
|
};
|
|
|
|
const elements = Array.from(
|
|
document.querySelectorAll<HTMLElement>("button,a,input,select,textarea")
|
|
);
|
|
|
|
for (const e of elements) {
|
|
const t = clean(e.innerText);
|
|
const l = clean(e.getAttribute("aria-label"));
|
|
const p = clean(e.getAttribute("placeholder"));
|
|
const n = e.getAttribute("name");
|
|
const r = e.getAttribute("role");
|
|
const id = e.id || null;
|
|
const stableId = isDynamicId(id);
|
|
const d = e.getAttribute("data-testid");
|
|
|
|
if (r === "menuitem" && !t && !l && !p) continue;
|
|
if (!(t || l || p || n || !stableId || d || r)) continue;
|
|
|
|
const { full, key } = getPaths(e);
|
|
const o: ExportedElement & { keyPath: string } = {
|
|
el: shortTag(e.tagName),
|
|
x: full,
|
|
keyPath: key,
|
|
};
|
|
|
|
if (t) o.t = t;
|
|
if (l && l !== t) o.l = l;
|
|
if (p && p !== t && p !== l) o.p = p;
|
|
if (n) o.n = n;
|
|
if (!stableId && id) o.i = id;
|
|
if (d) o.d = d;
|
|
if (r) o.r = r;
|
|
|
|
addItem(o);
|
|
}
|
|
|
|
return items;
|
|
}; |