// Developer-only scripts moved out of infrastructure: DOM exporter for local HTML dumps. // NOT for production automation; intended as a developer utility to generate compact DOM exports // for manual inspection and to aid writing Playwright automations. // // Usage (from repo root): // npm install -D playwright ts-node typescript @types/node // npx playwright install // npx ts-node scripts/dom-export/exportHtmlDumps.ts // // Output: ./html-dumps-optimized/*.json // // This file intentionally contains both the in-page extractor string (exported) and the // Playwright runner that iterates ./html-dumps/*.html and writes .json files into // ./html-dumps-optimized. These artifacts are developer helpers and must not be imported // into production automation code. const { chromium } = require("playwright"); const fs = require("fs").promises; const path = require("path"); const INPUT_DIR = path.join(process.cwd(), "html-dumps"); const OUTPUT_DIR = path.join(process.cwd(), "html-dumps-optimized"); // Developer helper: in-page DOM extractor string (for console or page.evaluate). // Kept as a plain const to avoid ES module import/export issues when running with ts-node in CJS mode. // This version compresses output aggressively using short tags/fields and a semantic short DOM path. const domExportScript = `(() => { const MAX_TEXT = 60; const clean = t => t ? t.replace(/\\s+/g, " ").trim().slice(0, MAX_TEXT) : null; const isDynamicId = id => id && (id.includes(":-") || /:[a-z0-9]+:/i.test(id)); const shortTag = t => ({ BUTTON: "bu", A: "a", INPUT: "in", SELECT: "s", TEXTAREA: "ta", DIV: "d", SPAN: "sp" }[t] || t.toLowerCase()); const isNoiseClass = c => !c || c.length < 3 || /^css-/.test(c) || /^[a-z0-9]{6,}$/i.test(c) || /^\\w{1,3}-\\w{4,}$/.test(c); const siblingIndex = node => { const sib = [...node.parentNode.children] .filter(n => n.tagName === node.tagName); return { idx: sib.indexOf(node), count: sib.length }; }; const getSemSiblingPath = el => { const parts = []; let node = el; let depth = 0; while (node && node.nodeType === 1 && node !== document.body && depth < 5) { const { idx, count } = siblingIndex(node); const isTarget = node === el; const tag = shortTag(node.tagName); const targetSuffix = isTarget && idx >= 0 ? ":" + idx : ""; const parentSuffix = !isTarget && count > 1 && idx >= 0 ? "@" + idx : ""; const sibSuffix = targetSuffix || parentSuffix; let cls = [...node.classList].filter(c => !isNoiseClass(c)); if (cls.length > 2) cls = cls.slice(0, 2); if (!cls.length) cls = ["c0"]; const attrs = []; const id = node.id; if (id && !isDynamicId(id)) attrs.push("#" + id); const attrNames = node.getAttributeNames ? node.getAttributeNames() : []; let hasDataAttr = false; for (const a of attrNames) { if (a.startsWith("data-")) { attrs.push("[" + a + "=" + node.getAttribute(a) + "]"); hasDataAttr = true; } } const role = node.getAttribute ? node.getAttribute("role") : null; if (role) attrs.push("[r=" + role + "]"); const chunk = tag + "." + cls.join(".") + attrs.join("") + (sibSuffix ? sibSuffix : ""); parts.unshift(chunk); depth += 1; const hasStrongAnchor = (id && !isDynamicId(id)) || hasDataAttr || !!role; if (depth >= 5 || hasStrongAnchor) break; node = node.parentNode; } return parts.join(">"); }; const items = []; const seen = new Map(); const addItem = o => { const keyParts = [o.el, o.x]; if (o.t) keyParts.push("t=" + o.t); if (o.l) keyParts.push("l=" + o.l); if (o.p) keyParts.push("p=" + o.p); if (o.n) keyParts.push("n=" + o.n); if (o.i) keyParts.push("i=" + o.i); if (o.d) keyParts.push("d=" + o.d); if (o.r) keyParts.push("r=" + o.r); const key = keyParts.join("|"); const prev = seen.get(key) || 0; if (prev > 0) { let h = 0; const str = key + "#" + prev; for (let i = 0; i < str.length; i++) { h = (h * 31 + str.charCodeAt(i)) >>> 0; } const hex = (h & 0xfff).toString(16).padStart(3, "0"); o.h = hex; } seen.set(key, prev + 1); items.push(o); }; const elements = [...document.querySelectorAll("button,a,input,select,textarea")]; for (const e of elements) { const t = clean(e.innerText); const l = clean(e.getAttribute("aria-label")); const p = clean(e.getAttribute("placeholder")); const n = e.getAttribute("name"); const r = e.getAttribute("role"); const id = e.id; const stableId = isDynamicId(id) ? null : id; const d = e.getAttribute("data-testid"); // skip menuitems with no meaningful text/label/placeholder if (r === "menuitem" && !t && !l && !p) continue; // keep only meaningful ones if (!(t || l || p || n || stableId || d || r)) continue; const o = { el: shortTag(e.tagName), x: getSemSiblingPath(e) }; if (t) o.t = t; if (l && l !== t) o.l = l; if (p && p !== t && p !== l) o.p = p; if (n) o.n = n; if (stableId) o.i = stableId; if (d) o.d = d; if (r) o.r = r; addItem(o); } const json = JSON.stringify(items, null, 2); console.log("chars:", json.length); console.log("elements:", items.length); console.log(items); return items; })();`; const domExtractor = `() => { const MAX_TEXT = 60; const clean = t => t ? t.replace(/\\s+/g, " ").trim().slice(0, MAX_TEXT) : null; const isDynamicId = id => id && (id.includes(":-") || /:[a-z0-9]+:/i.test(id)); const shortTag = t => ({ BUTTON: "bu", A: "a", INPUT: "in", SELECT: "s", TEXTAREA: "ta", DIV: "d", SPAN: "sp" }[t] || t.toLowerCase()); const isNoiseClass = c => !c || c.length < 3 || /^css-/.test(c) || /^[a-z0-9]{6,}$/i.test(c) || /^\\w{1,3}-\\w{4,}$/.test(c); const siblingIndex = node => { const sib = [...node.parentNode.children] .filter(n => n.tagName === node.tagName); return { idx: sib.indexOf(node), count: sib.length }; }; const getSemSiblingPath = el => { const parts = []; let node = el; let depth = 0; while (node && node.nodeType === 1 && node !== document.body && depth < 5) { const { idx, count } = siblingIndex(node); const isTarget = node === el; const tag = shortTag(node.tagName); const targetSuffix = isTarget && idx >= 0 ? ":" + idx : ""; const parentSuffix = !isTarget && count > 1 && idx >= 0 ? "@" + idx : ""; const sibSuffix = targetSuffix || parentSuffix; let cls = [...node.classList].filter(c => !isNoiseClass(c)); if (cls.length > 2) cls = cls.slice(0, 2); if (!cls.length) cls = ["c0"]; const attrs = []; const id = node.id; if (id && !isDynamicId(id)) attrs.push("#" + id); const attrNames = node.getAttributeNames ? node.getAttributeNames() : []; let hasDataAttr = false; for (const a of attrNames) { if (a.startsWith("data-")) { attrs.push("[" + a + "=" + node.getAttribute(a) + "]"); hasDataAttr = true; } } const role = node.getAttribute ? node.getAttribute("role") : null; if (role) attrs.push("[r=" + role + "]"); const chunk = tag + "." + cls.join(".") + attrs.join("") + (sibSuffix ? sibSuffix : ""); parts.unshift(chunk); depth += 1; const hasStrongAnchor = (id && !isDynamicId(id)) || hasDataAttr || !!role; if (depth >= 5 || hasStrongAnchor) break; node = node.parentNode; } return parts.join(">"); }; const items = []; const seen = new Map(); const addItem = o => { const keyParts = [o.el, o.x]; if (o.t) keyParts.push("t=" + o.t); if (o.l) keyParts.push("l=" + o.l); if (o.p) keyParts.push("p=" + o.p); if (o.n) keyParts.push("n=" + o.n); if (o.i) keyParts.push("i=" + o.i); if (o.d) keyParts.push("d=" + o.d); if (o.r) keyParts.push("r=" + o.r); const key = keyParts.join("|"); const prev = seen.get(key) || 0; if (prev > 0) { let h = 0; const str = key + "#" + prev; for (let i = 0; i < str.length; i++) { h = (h * 31 + str.charCodeAt(i)) >>> 0; } const hex = (h & 0xfff).toString(16).padStart(3, "0"); o.h = hex; } seen.set(key, prev + 1); items.push(o); }; const elements = [...document.querySelectorAll("button,a,input,select,textarea")]; for (const e of elements) { const t = clean(e.innerText); const l = clean(e.getAttribute("aria-label")); const p = clean(e.getAttribute("placeholder")); const n = e.getAttribute("name"); const r = e.getAttribute("role"); const id = e.id; const stableId = isDynamicId(id) ? null : id; const d = e.getAttribute("data-testid"); // skip menuitems with no meaningful text/label/placeholder if (r === "menuitem" && !t && !l && !p) continue; if (!(t || l || p || n || stableId || d || r)) continue; const o = { el: shortTag(e.tagName), x: getSemSiblingPath(e) }; if (t) o.t = t; if (l && l !== t) o.l = l; if (p && p !== t && p !== l) o.p = p; if (n) o.n = n; if (stableId) o.i = stableId; if (d) o.d = d; if (r) o.r = r; addItem(o); } return items; }`; module.exports = { domExportScript }; async function ensureDir(dir: string) { try { await fs.mkdir(dir, { recursive: true }); } catch { // ignore } } async function exportAll() { await ensureDir(OUTPUT_DIR); async function collectHtmlFiles(dir: string): Promise { const entries = (await fs.readdir(dir, { withFileTypes: true })) as any[]; const results: string[] = []; for (const ent of entries) { const p = path.join(dir, ent.name); if (ent.isDirectory()) { results.push(...(await collectHtmlFiles(p))); } else if (ent.isFile() && ent.name.endsWith(".html")) { results.push(path.relative(INPUT_DIR, p)); } } return results; } let htmlFiles: string[] = []; try { htmlFiles = await collectHtmlFiles(INPUT_DIR); } catch (err) { console.error( "Could not read input directory recursively:", INPUT_DIR, err ); process.exit(1); return; } if (htmlFiles.length === 0) { console.log("No .html files found in", INPUT_DIR); return; } const browser = await chromium.launch({ headless: true }); try { for (const file of htmlFiles) { const abs = path.join(INPUT_DIR, file); const url = "file://" + abs; const page = await browser.newPage(); try { await page.goto(url, { waitUntil: "domcontentloaded", timeout: 10000 }); const items = await page.evaluate( new Function("return (" + domExtractor + ")()") as any ); const outPath = path.join(OUTPUT_DIR, file.replace(/\.html$/, ".json")); await fs.mkdir(path.dirname(outPath), { recursive: true }); await fs.writeFile(outPath, JSON.stringify(items, null, 2), "utf8"); console.log( "exported " + file + " -> " + path.relative(process.cwd(), outPath) + " (elements: " + (Array.isArray(items) ? items.length : 0) + ")" ); } catch (e) { console.error("Failed processing", file, e); } finally { await page.close(); } } } finally { await browser.close(); } } if (require.main === module) { exportAll().catch((err) => { console.error(err); process.exit(1); }); }