240 lines
6.3 KiB
TypeScript
240 lines
6.3 KiB
TypeScript
// DOM extraction logic for Playwright-based HTML dumps.
|
|
// Runs in the browser context via page.evaluate.
|
|
|
|
export type ExportedElement = {
|
|
el: string;
|
|
x: string;
|
|
t?: string;
|
|
l?: string;
|
|
p?: string;
|
|
n?: string;
|
|
i?: string;
|
|
d?: string;
|
|
r?: string;
|
|
h?: string;
|
|
c?: number;
|
|
samples?: {
|
|
t?: string[];
|
|
l?: string[];
|
|
p?: string[];
|
|
};
|
|
};
|
|
|
|
export const extractDom = (): ExportedElement[] => {
|
|
const MAX_TEXT = 60;
|
|
|
|
const clean = (t: string | null): string | null =>
|
|
t ? t.replace(/\s+/g, " ").trim().slice(0, MAX_TEXT) : null;
|
|
|
|
const isDynamicId = (id: string | null): boolean =>
|
|
!!id && (id.includes(":-") || /:[a-z0-9]+:/i.test(id));
|
|
|
|
const shortTag = (t: string): string =>
|
|
({
|
|
BUTTON: "bu",
|
|
A: "a",
|
|
INPUT: "in",
|
|
SELECT: "s",
|
|
TEXTAREA: "ta",
|
|
DIV: "d",
|
|
SPAN: "sp",
|
|
} as Record<string, string>)[t] || t.toLowerCase();
|
|
|
|
const isNoiseClass = (c: string): boolean =>
|
|
!c ||
|
|
c.length < 3 ||
|
|
/^css-/.test(c) ||
|
|
/^[a-z0-9]{6,}$/i.test(c) ||
|
|
/^\\w{1,3}-\\w{4,}$/.test(c);
|
|
|
|
const isStableDataAttr = (name: string, value: string | null): boolean =>
|
|
name === "data-testid" ||
|
|
name === "data-modal-component" ||
|
|
name === "data-modal-id" ||
|
|
(value != null &&
|
|
value.length <= 40 &&
|
|
!/\\s/.test(value) &&
|
|
/^[a-z0-9_.:-]+$/i.test(value));
|
|
|
|
const siblingIndex = (node: Element) => {
|
|
const parent = node.parentElement;
|
|
if (!parent) return { idx: 0, count: 1 };
|
|
|
|
const siblings = Array.from(parent.children).filter(
|
|
(n) => (n as Element).tagName === node.tagName
|
|
);
|
|
const idx = siblings.indexOf(node);
|
|
return { idx, count: siblings.length };
|
|
};
|
|
|
|
const getPaths = (el: Element): { full: string; key: string } => {
|
|
const parts: string[] = [];
|
|
const keyParts: string[] = [];
|
|
let node: Node | null = el;
|
|
|
|
while (node && node.nodeType === 1 && node !== document.body) {
|
|
const element = node as Element;
|
|
const tag = shortTag(element.tagName);
|
|
|
|
const id = element.id || null;
|
|
const attrNames = element.getAttributeNames
|
|
? element.getAttributeNames()
|
|
: [];
|
|
|
|
const attrs: string[] = [];
|
|
const keyAttrs: string[] = [];
|
|
|
|
if (id) {
|
|
const idChunk = "#" + id;
|
|
attrs.push(idChunk);
|
|
keyAttrs.push(idChunk);
|
|
}
|
|
|
|
for (const a of attrNames) {
|
|
if (!a.startsWith("data-")) continue;
|
|
const v = element.getAttribute(a);
|
|
if (!isStableDataAttr(a, v)) continue;
|
|
const attrChunk = "[" + a + "=" + v + "]";
|
|
attrs.push(attrChunk);
|
|
if (
|
|
a === "data-testid" ||
|
|
a === "data-modal-component" ||
|
|
a === "data-modal-id"
|
|
) {
|
|
keyAttrs.push(attrChunk);
|
|
}
|
|
}
|
|
|
|
const role = element.getAttribute ? element.getAttribute("role") : null;
|
|
if (role) {
|
|
const roleChunk = "[r=" + role + "]";
|
|
attrs.push(roleChunk);
|
|
keyAttrs.push(roleChunk);
|
|
}
|
|
|
|
let chunk = tag;
|
|
let keyChunk = tag;
|
|
|
|
if (attrs.length > 0) {
|
|
chunk += attrs.join("");
|
|
} else {
|
|
let cls = Array.from(element.classList).filter(
|
|
(c) => !isNoiseClass(c)
|
|
);
|
|
if (cls.length > 2) cls = cls.slice(0, 2);
|
|
if (!cls.length) cls = ["c0"];
|
|
const clsChunk = "." + cls.join(".");
|
|
chunk += clsChunk;
|
|
}
|
|
|
|
if (keyAttrs.length > 0) {
|
|
keyChunk += keyAttrs.join("");
|
|
} else {
|
|
let cls = Array.from(element.classList).filter(
|
|
(c) => !isNoiseClass(c)
|
|
);
|
|
if (cls.length > 2) cls = cls.slice(0, 2);
|
|
if (!cls.length) cls = ["c0"];
|
|
keyChunk += "." + cls.join(".");
|
|
}
|
|
|
|
parts.unshift(chunk);
|
|
keyParts.unshift(keyChunk);
|
|
|
|
node = element.parentNode;
|
|
}
|
|
|
|
return { full: parts.join(">"), key: keyParts.join(">") };
|
|
};
|
|
|
|
const items: ExportedElement[] = [];
|
|
|
|
type Group = {
|
|
element: ExportedElement & { keyPath: string };
|
|
count: number;
|
|
t: Set<string>;
|
|
l: Set<string>;
|
|
p: Set<string>;
|
|
};
|
|
|
|
const groups = new Map<string, Group>();
|
|
|
|
const elements = Array.from(
|
|
document.querySelectorAll<HTMLElement>("button,a,input,select,textarea")
|
|
);
|
|
|
|
for (const e of elements) {
|
|
const t = clean(e.innerText);
|
|
const l = clean(e.getAttribute("aria-label"));
|
|
const p = clean(e.getAttribute("placeholder"));
|
|
const n = e.getAttribute("name");
|
|
const r = e.getAttribute("role");
|
|
const id = e.id || null;
|
|
const stableId = isDynamicId(id);
|
|
const d = e.getAttribute("data-testid");
|
|
|
|
if (r === "menuitem" && !t && !l && !p) continue;
|
|
if (!(t || l || p || n || !stableId || d || r)) continue;
|
|
|
|
const { full, key } = getPaths(e);
|
|
const base: ExportedElement & { keyPath: string } = {
|
|
el: shortTag(e.tagName),
|
|
x: full,
|
|
keyPath: key,
|
|
};
|
|
|
|
if (t) base.t = t;
|
|
if (l && l !== t) base.l = l;
|
|
if (p && p !== t && p !== l) base.p = p;
|
|
if (n) base.n = n;
|
|
if (!stableId && id) base.i = id;
|
|
if (d) base.d = d;
|
|
if (r) base.r = r;
|
|
|
|
const structureKey = base.el + "|" + base.keyPath;
|
|
let group = groups.get(structureKey);
|
|
if (!group) {
|
|
group = {
|
|
element: base,
|
|
count: 0,
|
|
t: new Set<string>(),
|
|
l: new Set<string>(),
|
|
p: new Set<string>(),
|
|
};
|
|
groups.set(structureKey, group);
|
|
}
|
|
|
|
group.count += 1;
|
|
if (base.t) group.t.add(base.t);
|
|
if (base.l) group.l.add(base.l);
|
|
if (base.p) group.p.add(base.p);
|
|
}
|
|
|
|
const MAX_SAMPLES = 5;
|
|
|
|
for (const group of groups.values()) {
|
|
const { keyPath, ...rest } = group.element;
|
|
const out: ExportedElement = { ...rest };
|
|
|
|
if (group.count > 1) {
|
|
out.c = group.count;
|
|
}
|
|
|
|
const samples: { t?: string[]; l?: string[]; p?: string[] } = {};
|
|
const tSamples = Array.from(group.t).slice(0, MAX_SAMPLES);
|
|
const lSamples = Array.from(group.l).slice(0, MAX_SAMPLES);
|
|
const pSamples = Array.from(group.p).slice(0, MAX_SAMPLES);
|
|
|
|
if (group.count > 1 && tSamples.length > 1) samples.t = tSamples;
|
|
if (group.count > 1 && lSamples.length > 1) samples.l = lSamples;
|
|
if (group.count > 1 && pSamples.length > 1) samples.p = pSamples;
|
|
|
|
if (Object.keys(samples).length > 0) {
|
|
out.samples = samples;
|
|
}
|
|
|
|
items.push(out);
|
|
}
|
|
|
|
return items;
|
|
}; |