// DOM extraction logic for Playwright-based HTML dumps. // Runs in the browser context via page.evaluate. export type ExportedElement = { el: string; x: string; t?: string; l?: string; p?: string; n?: string; i?: string; d?: string; r?: string; h?: string; }; export const extractDom = (): ExportedElement[] => { const MAX_TEXT = 60; const clean = (t: string | null): string | null => t ? t.replace(/\s+/g, " ").trim().slice(0, MAX_TEXT) : null; const isDynamicId = (id: string | null): boolean => !!id && (id.includes(":-") || /:[a-z0-9]+:/i.test(id)); const shortTag = (t: string): string => ({ BUTTON: "bu", A: "a", INPUT: "in", SELECT: "s", TEXTAREA: "ta", DIV: "d", SPAN: "sp", } as Record)[t] || t.toLowerCase(); const isNoiseClass = (c: string): boolean => !c || c.length < 3 || /^css-/.test(c) || /^[a-z0-9]{6,}$/i.test(c) || /^\\w{1,3}-\\w{4,}$/.test(c); const isStableDataAttr = (name: string, value: string | null): boolean => name === "data-testid" || name === "data-modal-component" || name === "data-modal-id" || (value != null && value.length <= 40 && !/\\s/.test(value) && /^[a-z0-9_.:-]+$/i.test(value)); const siblingIndex = (node: Element) => { const parent = node.parentElement; if (!parent) return { idx: 0, count: 1 }; const siblings = Array.from(parent.children).filter( (n) => (n as Element).tagName === node.tagName ); const idx = siblings.indexOf(node); return { idx, count: siblings.length }; }; const getPaths = (el: Element): { full: string; key: string } => { const parts: string[] = []; const keyParts: string[] = []; let node: Node | null = el; while (node && node.nodeType === 1 && node !== document.body) { const element = node as Element; const tag = shortTag(element.tagName); const id = element.id || null; const attrNames = element.getAttributeNames ? element.getAttributeNames() : []; const attrs: string[] = []; const keyAttrs: string[] = []; if (id) { const idChunk = "#" + id; attrs.push(idChunk); keyAttrs.push(idChunk); } for (const a of attrNames) { if (!a.startsWith("data-")) continue; const v = element.getAttribute(a); if (!isStableDataAttr(a, v)) continue; const attrChunk = "[" + a + "=" + v + "]"; attrs.push(attrChunk); if ( a === "data-testid" || a === "data-modal-component" || a === "data-modal-id" ) { keyAttrs.push(attrChunk); } } const role = element.getAttribute ? element.getAttribute("role") : null; if (role) { const roleChunk = "[r=" + role + "]"; attrs.push(roleChunk); keyAttrs.push(roleChunk); } let chunk = tag; let keyChunk = tag; if (attrs.length > 0) { chunk += attrs.join(""); } else { let cls = Array.from(element.classList).filter( (c) => !isNoiseClass(c) ); if (cls.length > 2) cls = cls.slice(0, 2); if (!cls.length) cls = ["c0"]; const clsChunk = "." + cls.join("."); chunk += clsChunk; } if (keyAttrs.length > 0) { keyChunk += keyAttrs.join(""); } else { let cls = Array.from(element.classList).filter( (c) => !isNoiseClass(c) ); if (cls.length > 2) cls = cls.slice(0, 2); if (!cls.length) cls = ["c0"]; keyChunk += "." + cls.join("."); } parts.unshift(chunk); keyParts.unshift(keyChunk); node = element.parentNode; } return { full: parts.join(">"), key: keyParts.join(">") }; }; const items: ExportedElement[] = []; const seenStructure = new Set(); const seen = new Map(); const addItem = (o: ExportedElement & { keyPath: string }) => { const structureKey = o.el + "|" + o.keyPath; if (seenStructure.has(structureKey)) return; seenStructure.add(structureKey); const keyParts = [o.el, o.x]; if (o.t) keyParts.push("t=" + o.t); if (o.l) keyParts.push("l=" + o.l); if (o.p) keyParts.push("p=" + o.p); if (o.n) keyParts.push("n=" + o.n); if (o.i) keyParts.push("i=" + o.i); if (o.d) keyParts.push("d=" + o.d); if (o.r) keyParts.push("r=" + o.r); const key = keyParts.join("|"); const prev = seen.get(key) || 0; if (prev > 0) { let hVal = 0; const str = key + "#" + prev; for (let i = 0; i < str.length; i++) { hVal = (hVal * 31 + str.charCodeAt(i)) >>> 0; } const hex = (hVal & 0xfff).toString(16).padStart(3, "0"); o.h = hex; } seen.set(key, prev + 1); const { keyPath, ...rest } = o; items.push(rest); }; const elements = Array.from( document.querySelectorAll("button,a,input,select,textarea") ); for (const e of elements) { const t = clean(e.innerText); const l = clean(e.getAttribute("aria-label")); const p = clean(e.getAttribute("placeholder")); const n = e.getAttribute("name"); const r = e.getAttribute("role"); const id = e.id || null; const stableId = isDynamicId(id); const d = e.getAttribute("data-testid"); if (r === "menuitem" && !t && !l && !p) continue; if (!(t || l || p || n || !stableId || d || r)) continue; const { full, key } = getPaths(e); const o: ExportedElement & { keyPath: string } = { el: shortTag(e.tagName), x: full, keyPath: key, }; if (t) o.t = t; if (l && l !== t) o.l = l; if (p && p !== t && p !== l) o.p = p; if (n) o.n = n; if (!stableId && id) o.i = id; if (d) o.d = d; if (r) o.r = r; addItem(o); } return items; };