// Developer-only scripts moved out of infrastructure: DOM exporter for local HTML dumps. // NOT for production automation; intended as a developer utility to generate compact DOM exports // for manual inspection and to aid writing Playwright automations. const { chromium } = require("playwright"); const fs = require("fs").promises; const path = require("path"); const INPUT_DIR = path.join(process.cwd(), "html-dumps"); const OUTPUT_DIR = path.join(process.cwd(), "html-dumps-optimized"); const domExportScript = `(() => { const MAX_TEXT = 60; const clean = t => t ? t.replace(/\\s+/g, " ").trim().slice(0, MAX_TEXT) : null; const isDynamicId = id => id && (id.includes(":-") || /:[a-z0-9]+:/i.test(id)); const shortTag = t => ({ BUTTON: "bu", A: "a", INPUT: "in", SELECT: "s", TEXTAREA: "ta", DIV: "d", SPAN: "sp" }[t] || t.toLowerCase()); const isNoiseClass = c => !c || c.length < 3 || /^css-/.test(c) || /^[a-z0-9]{6,}$/i.test(c) || /^\\w{1,3}-\\w{4,}$/.test(c); const siblingIndex = node => { const sib = [...node.parentNode.children] .filter(n => n.tagName === node.tagName); return { idx: sib.indexOf(node), count: sib.length }; }; const getSemSiblingPath = el => { const parts = []; let node = el; while (node && node.nodeType === 1 && node !== document.body) { const tag = shortTag(node.tagName); const { idx, count } = siblingIndex(node); const isTarget = node === el; const targetSuffix = isTarget && idx >= 0 ? ":" + idx : ""; const parentSuffix = !isTarget && count > 1 && idx >= 0 ? "@" + idx : ""; const sibSuffix = targetSuffix || parentSuffix; const id = node.id; const attrNames = node.getAttributeNames ? node.getAttributeNames() : []; const attrs = []; let hasDataAttr = false; if (id) attrs.push("#" + id); for (const a of attrNames) { if (a.startsWith("data-")) { attrs.push("[" + a + "=" + node.getAttribute(a) + "]"); hasDataAttr = true; } } const role = node.getAttribute ? node.getAttribute("role") : null; if (role) attrs.push("[r=" + role + "]"); let chunk = tag; if (attrs.length > 0) { chunk += attrs.join(""); } else { let cls = [...node.classList].filter(c => !isNoiseClass(c)); if (cls.length > 2) cls = cls.slice(0, 2); if (!cls.length) cls = ["c0"]; chunk += "." + cls.join("."); } chunk += (sibSuffix || ""); parts.unshift(chunk); node = node.parentNode; } return parts.join(">"); }; const items = []; const seen = new Map(); const addItem = o => { const keyParts = [o.el, o.x]; if (o.t) keyParts.push("t=" + o.t); if (o.l) keyParts.push("l=" + o.l); if (o.p) keyParts.push("p=" + o.p); if (o.n) keyParts.push("n=" + o.n); if (o.i) keyParts.push("i=" + o.i); if (o.d) keyParts.push("d=" + o.d); if (o.r) keyParts.push("r=" + o.r); const key = keyParts.join("|"); const prev = seen.get(key) || 0; if (prev > 0) { let h = 0; const str = key + "#" + prev; for (let i = 0; i < str.length; i++) { h = (h * 31 + str.charCodeAt(i)) >>> 0; } const hex = (h & 0xfff).toString(16).padStart(3, "0"); o.h = hex; } seen.set(key, prev + 1); items.push(o); }; const elements = [...document.querySelectorAll("button,a,input,select,textarea")]; for (const e of elements) { const t = clean(e.innerText); const l = clean(e.getAttribute("aria-label")); const p = clean(e.getAttribute("placeholder")); const n = e.getAttribute("name"); const r = e.getAttribute("role"); const id = e.id; const stableId = isDynamicId(id) ? null : id; const d = e.getAttribute("data-testid"); if (r === "menuitem" && !t && !l && !p) continue; if (!(t || l || p || n || stableId || d || r)) continue; const o = { el: shortTag(e.tagName), x: getSemSiblingPath(e) }; if (t) o.t = t; if (l && l !== t) o.l = l; if (p && p !== t && p !== l) o.p = p; if (n) o.n = n; if (stableId) o.i = stableId; if (d) o.d = d; if (r) o.r = r; addItem(o); } const json = JSON.stringify(items, null, 2); console.log("chars:", json.length); console.log("elements:", items.length); console.log(items); return items; })();`; const domExtractor = `() => { const MAX_TEXT = 60; const clean = t => t ? t.replace(/\\s+/g, " ").trim().slice(0, MAX_TEXT) : null; const isDynamicId = id => id && (id.includes(":-") || /:[a-z0-9]+:/i.test(id)); const shortTag = t => ({ BUTTON: "bu", A: "a", INPUT: "in", SELECT: "s", TEXTAREA: "ta", DIV: "d", SPAN: "sp" }[t] || t.toLowerCase()); const isNoiseClass = c => !c || c.length < 3 || /^css-/.test(c) || /^[a-z0-9]{6,}$/i.test(c) || /^\\w{1,3}-\\w{4,}$/.test(c); const siblingIndex = node => { const sib = [...node.parentNode.children] .filter(n => n.tagName === node.tagName); return { idx: sib.indexOf(node), count: sib.length }; }; const getSemSiblingPath = el => { const parts = []; let node = el; while (node && node.nodeType === 1 && node !== document.body) { const tag = shortTag(node.tagName); const { idx, count } = siblingIndex(node); const isTarget = node === el; const targetSuffix = isTarget && idx >= 0 ? ":" + idx : ""; const parentSuffix = !isTarget && count > 1 && idx >= 0 ? "@" + idx : ""; const sibSuffix = targetSuffix || parentSuffix; const id = node.id; const attrNames = node.getAttributeNames ? node.getAttributeNames() : []; const attrs = []; let hasDataAttr = false; if (id) attrs.push("#" + id); for (const a of attrNames) { if (a.startsWith("data-")) { attrs.push("[" + a + "=" + node.getAttribute(a) + "]"); hasDataAttr = true; } } const role = node.getAttribute ? node.getAttribute("role") : null; if (role) attrs.push("[r=" + role + "]"); let chunk = tag; if (attrs.length > 0) { chunk += attrs.join(""); } else { let cls = [...node.classList].filter(c => !isNoiseClass(c)); if (cls.length > 2) cls = cls.slice(0, 2); if (!cls.length) cls = ["c0"]; chunk += "." + cls.join("."); } chunk += (sibSuffix || ""); parts.unshift(chunk); node = node.parentNode; } return parts.join(">"); }; const items = []; const seen = new Map(); const addItem = o => { const keyParts = [o.el, o.x]; if (o.t) keyParts.push("t=" + o.t); if (o.l) keyParts.push("l=" + o.l); if (o.p) keyParts.push("p=" + o.p); if (o.n) keyParts.push("n=" + o.n); if (o.i) keyParts.push("i=" + o.i); if (o.d) keyParts.push("d=" + o.d); if (o.r) keyParts.push("r=" + o.r); const key = keyParts.join("|"); const prev = seen.get(key) || 0; if (prev > 0) { let h = 0; const str = key + "#" + prev; for (let i = 0; i < str.length; i++) { h = (h * 31 + str.charCodeAt(i)) >>> 0; } const hex = (h & 0xfff).toString(16).padStart(3, "0"); o.h = hex; } seen.set(key, prev + 1); items.push(o); }; const elements = [...document.querySelectorAll("button,a,input,select,textarea")]; for (const e of elements) { const t = clean(e.innerText); const l = clean(e.getAttribute("aria-label")); const p = clean(e.getAttribute("placeholder")); const n = e.getAttribute("name"); const r = e.getAttribute("role"); const id = e.id; const stableId = isDynamicId(id) ? null : id; const d = e.getAttribute("data-testid"); if (r === "menuitem" && !t && !l && !p) continue; if (!(t || l || p || n || stableId || d || r)) continue; const o = { el: shortTag(e.tagName), x: getSemSiblingPath(e) }; if (t) o.t = t; if (l && l !== t) o.l = l; if (p && p !== t && p !== l) o.p = p; if (n) o.n = n; if (stableId) o.i = stableId; if (d) o.d = d; if (r) o.r = r; addItem(o); } return items; }`; module.exports = { domExportScript }; async function ensureDir(dir: string) { try { await fs.mkdir(dir, { recursive: true }); } catch {} } async function exportAll() { await ensureDir(OUTPUT_DIR); async function collectHtmlFiles(dir: string): Promise { const entries = await fs.readdir(dir, { withFileTypes: true }); const results: string[] = []; for (const ent of entries) { const p = path.join(dir, ent.name); if (ent.isDirectory()) { results.push(...(await collectHtmlFiles(p))); } else if (ent.isFile() && ent.name.endsWith(".html")) { results.push(path.relative(INPUT_DIR, p)); } } return results; } let htmlFiles = []; try { htmlFiles = await collectHtmlFiles(INPUT_DIR); } catch (err) { console.error( "Could not read input directory recursively:", INPUT_DIR, err ); process.exit(1); return; } if (htmlFiles.length === 0) { console.log("No .html files found in", INPUT_DIR); return; } const browser = await chromium.launch({ headless: true }); try { for (const file of htmlFiles) { const abs = path.join(INPUT_DIR, file); const url = "file://" + abs; const page = await browser.newPage(); try { await page.goto(url, { waitUntil: "domcontentloaded", timeout: 10000 }); const items = await page.evaluate( new Function("return (" + domExtractor + ")()") ); const outPath = path.join(OUTPUT_DIR, file.replace(/\.html$/, ".json")); await fs.mkdir(path.dirname(outPath), { recursive: true }); await fs.writeFile(outPath, JSON.stringify(items, null, 2), "utf8"); console.log( "exported " + file + " -> " + path.relative(process.cwd(), outPath) + " (elements: " + (Array.isArray(items) ? items.length : 0) + ")" ); } catch (e) { console.error("Failed processing", file, e); } finally { await page.close(); } } } finally { await browser.close(); } } if (require.main === module) { exportAll().catch((err) => { console.error(err); process.exit(1); }); }