Files
gridpilot.gg/scripts/dom-export/domExtractor.ts
2025-11-30 16:24:02 +01:00

240 lines
6.3 KiB
TypeScript

// DOM extraction logic for Playwright-based HTML dumps.
// Runs in the browser context via page.evaluate.
export type ExportedElement = {
el: string;
x: string;
t?: string;
l?: string;
p?: string;
n?: string;
i?: string;
d?: string;
r?: string;
h?: string;
c?: number;
samples?: {
t?: string[];
l?: string[];
p?: string[];
};
};
export const extractDom = (): ExportedElement[] => {
const MAX_TEXT = 60;
const clean = (t: string | null): string | null =>
t ? t.replace(/\s+/g, " ").trim().slice(0, MAX_TEXT) : null;
const isDynamicId = (id: string | null): boolean =>
!!id && (id.includes(":-") || /:[a-z0-9]+:/i.test(id));
const shortTag = (t: string): string =>
({
BUTTON: "bu",
A: "a",
INPUT: "in",
SELECT: "s",
TEXTAREA: "ta",
DIV: "d",
SPAN: "sp",
} as Record<string, string>)[t] || t.toLowerCase();
const isNoiseClass = (c: string): boolean =>
!c ||
c.length < 3 ||
/^css-/.test(c) ||
/^[a-z0-9]{6,}$/i.test(c) ||
/^\\w{1,3}-\\w{4,}$/.test(c);
const isStableDataAttr = (name: string, value: string | null): boolean =>
name === "data-testid" ||
name === "data-modal-component" ||
name === "data-modal-id" ||
(value != null &&
value.length <= 40 &&
!/\\s/.test(value) &&
/^[a-z0-9_.:-]+$/i.test(value));
const siblingIndex = (node: Element) => {
const parent = node.parentElement;
if (!parent) return { idx: 0, count: 1 };
const siblings = Array.from(parent.children).filter(
(n) => (n as Element).tagName === node.tagName
);
const idx = siblings.indexOf(node);
return { idx, count: siblings.length };
};
const getPaths = (el: Element): { full: string; key: string } => {
const parts: string[] = [];
const keyParts: string[] = [];
let node: Node | null = el;
while (node && node.nodeType === 1 && node !== document.body) {
const element = node as Element;
const tag = shortTag(element.tagName);
const id = element.id || null;
const attrNames = element.getAttributeNames
? element.getAttributeNames()
: [];
const attrs: string[] = [];
const keyAttrs: string[] = [];
if (id) {
const idChunk = "#" + id;
attrs.push(idChunk);
keyAttrs.push(idChunk);
}
for (const a of attrNames) {
if (!a.startsWith("data-")) continue;
const v = element.getAttribute(a);
if (!isStableDataAttr(a, v)) continue;
const attrChunk = "[" + a + "=" + v + "]";
attrs.push(attrChunk);
if (
a === "data-testid" ||
a === "data-modal-component" ||
a === "data-modal-id"
) {
keyAttrs.push(attrChunk);
}
}
const role = element.getAttribute ? element.getAttribute("role") : null;
if (role) {
const roleChunk = "[r=" + role + "]";
attrs.push(roleChunk);
keyAttrs.push(roleChunk);
}
let chunk = tag;
let keyChunk = tag;
if (attrs.length > 0) {
chunk += attrs.join("");
} else {
let cls = Array.from(element.classList).filter(
(c) => !isNoiseClass(c)
);
if (cls.length > 2) cls = cls.slice(0, 2);
if (!cls.length) cls = ["c0"];
const clsChunk = "." + cls.join(".");
chunk += clsChunk;
}
if (keyAttrs.length > 0) {
keyChunk += keyAttrs.join("");
} else {
let cls = Array.from(element.classList).filter(
(c) => !isNoiseClass(c)
);
if (cls.length > 2) cls = cls.slice(0, 2);
if (!cls.length) cls = ["c0"];
keyChunk += "." + cls.join(".");
}
parts.unshift(chunk);
keyParts.unshift(keyChunk);
node = element.parentNode;
}
return { full: parts.join(">"), key: keyParts.join(">") };
};
const items: ExportedElement[] = [];
type Group = {
element: ExportedElement & { keyPath: string };
count: number;
t: Set<string>;
l: Set<string>;
p: Set<string>;
};
const groups = new Map<string, Group>();
const elements = Array.from(
document.querySelectorAll<HTMLElement>("button,a,input,select,textarea")
);
for (const e of elements) {
const t = clean(e.innerText);
const l = clean(e.getAttribute("aria-label"));
const p = clean(e.getAttribute("placeholder"));
const n = e.getAttribute("name");
const r = e.getAttribute("role");
const id = e.id || null;
const stableId = isDynamicId(id);
const d = e.getAttribute("data-testid");
if (r === "menuitem" && !t && !l && !p) continue;
if (!(t || l || p || n || !stableId || d || r)) continue;
const { full, key } = getPaths(e);
const base: ExportedElement & { keyPath: string } = {
el: shortTag(e.tagName),
x: full,
keyPath: key,
};
if (t) base.t = t;
if (l && l !== t) base.l = l;
if (p && p !== t && p !== l) base.p = p;
if (n) base.n = n;
if (!stableId && id) base.i = id;
if (d) base.d = d;
if (r) base.r = r;
const structureKey = base.el + "|" + base.keyPath;
let group = groups.get(structureKey);
if (!group) {
group = {
element: base,
count: 0,
t: new Set<string>(),
l: new Set<string>(),
p: new Set<string>(),
};
groups.set(structureKey, group);
}
group.count += 1;
if (base.t) group.t.add(base.t);
if (base.l) group.l.add(base.l);
if (base.p) group.p.add(base.p);
}
const MAX_SAMPLES = 5;
for (const group of groups.values()) {
const { keyPath, ...rest } = group.element;
const out: ExportedElement = { ...rest };
if (group.count > 1) {
out.c = group.count;
}
const samples: { t?: string[]; l?: string[]; p?: string[] } = {};
const tSamples = Array.from(group.t).slice(0, MAX_SAMPLES);
const lSamples = Array.from(group.l).slice(0, MAX_SAMPLES);
const pSamples = Array.from(group.p).slice(0, MAX_SAMPLES);
if (group.count > 1 && tSamples.length > 1) samples.t = tSamples;
if (group.count > 1 && lSamples.length > 1) samples.l = lSamples;
if (group.count > 1 && pSamples.length > 1) samples.p = pSamples;
if (Object.keys(samples).length > 0) {
out.samples = samples;
}
items.push(out);
}
return items;
};