// DOM extraction logic for Playwright-based HTML dumps. // Runs in the browser context via page.evaluate. export type ExportedElement = { el: string; x: string; t?: string; l?: string; p?: string; n?: string; i?: string; d?: string; r?: string; h?: string; c?: number; samples?: { t?: string[]; l?: string[]; p?: string[]; }; }; export const extractDom = (): ExportedElement[] => { const MAX_TEXT = 60; const clean = (t: string | null): string | null => t ? t.replace(/\s+/g, " ").trim().slice(0, MAX_TEXT) : null; const isDynamicId = (id: string | null): boolean => !!id && (id.includes(":-") || /:[a-z0-9]+:/i.test(id)); const shortTag = (t: string): string => ({ BUTTON: "bu", A: "a", INPUT: "in", SELECT: "s", TEXTAREA: "ta", DIV: "d", SPAN: "sp", } as Record)[t] || t.toLowerCase(); const isNoiseClass = (c: string): boolean => !c || c.length < 3 || /^css-/.test(c) || /^[a-z0-9]{6,}$/i.test(c) || /^\\w{1,3}-\\w{4,}$/.test(c); const isStableDataAttr = (name: string, value: string | null): boolean => name === "data-testid" || name === "data-modal-component" || name === "data-modal-id" || (value != null && value.length <= 40 && !/\\s/.test(value) && /^[a-z0-9_.:-]+$/i.test(value)); const siblingIndex = (node: Element) => { const parent = node.parentElement; if (!parent) return { idx: 0, count: 1 }; const siblings = Array.from(parent.children).filter( (n) => (n as Element).tagName === node.tagName ); const idx = siblings.indexOf(node); return { idx, count: siblings.length }; }; const getPaths = (el: Element): { full: string; key: string } => { const parts: string[] = []; const keyParts: string[] = []; let node: Node | null = el; while (node && node.nodeType === 1 && node !== document.body) { const element = node as Element; const tag = shortTag(element.tagName); const id = element.id || null; const attrNames = element.getAttributeNames ? element.getAttributeNames() : []; const attrs: string[] = []; const keyAttrs: string[] = []; if (id) { const idChunk = "#" + id; attrs.push(idChunk); keyAttrs.push(idChunk); } for (const a of attrNames) { if (!a.startsWith("data-")) continue; const v = element.getAttribute(a); if (!isStableDataAttr(a, v)) continue; const attrChunk = "[" + a + "=" + v + "]"; attrs.push(attrChunk); if ( a === "data-testid" || a === "data-modal-component" || a === "data-modal-id" ) { keyAttrs.push(attrChunk); } } const role = element.getAttribute ? element.getAttribute("role") : null; if (role) { const roleChunk = "[r=" + role + "]"; attrs.push(roleChunk); keyAttrs.push(roleChunk); } let chunk = tag; let keyChunk = tag; if (attrs.length > 0) { chunk += attrs.join(""); } else { let cls = Array.from(element.classList).filter( (c) => !isNoiseClass(c) ); if (cls.length > 2) cls = cls.slice(0, 2); if (!cls.length) cls = ["c0"]; const clsChunk = "." + cls.join("."); chunk += clsChunk; } if (keyAttrs.length > 0) { keyChunk += keyAttrs.join(""); } else { let cls = Array.from(element.classList).filter( (c) => !isNoiseClass(c) ); if (cls.length > 2) cls = cls.slice(0, 2); if (!cls.length) cls = ["c0"]; keyChunk += "." + cls.join("."); } parts.unshift(chunk); keyParts.unshift(keyChunk); node = element.parentNode; } return { full: parts.join(">"), key: keyParts.join(">") }; }; const items: ExportedElement[] = []; type Group = { element: ExportedElement & { keyPath: string }; count: number; t: Set; l: Set; p: Set; }; const groups = new Map(); const elements = Array.from( document.querySelectorAll("button,a,input,select,textarea") ); for (const e of elements) { const t = clean(e.innerText); const l = clean(e.getAttribute("aria-label")); const p = clean(e.getAttribute("placeholder")); const n = e.getAttribute("name"); const r = e.getAttribute("role"); const id = e.id || null; const stableId = isDynamicId(id); const d = e.getAttribute("data-testid"); if (r === "menuitem" && !t && !l && !p) continue; if (!(t || l || p || n || !stableId || d || r)) continue; const { full, key } = getPaths(e); const base: ExportedElement & { keyPath: string } = { el: shortTag(e.tagName), x: full, keyPath: key, }; if (t) base.t = t; if (l && l !== t) base.l = l; if (p && p !== t && p !== l) base.p = p; if (n) base.n = n; if (!stableId && id) base.i = id; if (d) base.d = d; if (r) base.r = r; const structureKey = base.el + "|" + base.keyPath; let group = groups.get(structureKey); if (!group) { group = { element: base, count: 0, t: new Set(), l: new Set(), p: new Set(), }; groups.set(structureKey, group); } group.count += 1; if (base.t) group.t.add(base.t); if (base.l) group.l.add(base.l); if (base.p) group.p.add(base.p); } const MAX_SAMPLES = 5; for (const group of groups.values()) { const { keyPath, ...rest } = group.element; const out: ExportedElement = { ...rest }; if (group.count > 1) { out.c = group.count; } const samples: { t?: string[]; l?: string[]; p?: string[] } = {}; const tSamples = Array.from(group.t).slice(0, MAX_SAMPLES); const lSamples = Array.from(group.l).slice(0, MAX_SAMPLES); const pSamples = Array.from(group.p).slice(0, MAX_SAMPLES); if (group.count > 1 && tSamples.length > 1) samples.t = tSamples; if (group.count > 1 && lSamples.length > 1) samples.l = lSamples; if (group.count > 1 && pSamples.length > 1) samples.p = pSamples; if (Object.keys(samples).length > 0) { out.samples = samples; } items.push(out); } return items; };