Files
gridpilot.gg/scripts/dom-export/exportHtmlDumps.ts
2025-11-27 13:26:17 +01:00

404 lines
12 KiB
TypeScript

// Developer-only scripts moved out of infrastructure: DOM exporter for local HTML dumps.
// NOT for production automation; intended as a developer utility to generate compact DOM exports
// for manual inspection and to aid writing Playwright automations.
//
// Usage (from repo root):
// npm install -D playwright ts-node typescript @types/node
// npx playwright install
// npx ts-node scripts/dom-export/exportHtmlDumps.ts
//
// Output: ./html-dumps-optimized/*.json
//
// This file intentionally contains both the in-page extractor string (exported) and the
// Playwright runner that iterates ./html-dumps/*.html and writes .json files into
// ./html-dumps-optimized. These artifacts are developer helpers and must not be imported
// into production automation code.
const { chromium } = require("playwright");
const fs = require("fs").promises;
const path = require("path");
const INPUT_DIR = path.join(process.cwd(), "html-dumps");
const OUTPUT_DIR = path.join(process.cwd(), "html-dumps-optimized");
// Developer helper: in-page DOM extractor string (for console or page.evaluate).
// Kept as a plain const to avoid ES module import/export issues when running with ts-node in CJS mode.
// This version compresses output aggressively using short tags/fields and a semantic short DOM path.
const domExportScript = `(() => {
const MAX_TEXT = 60;
const clean = t =>
t ? t.replace(/\\s+/g, " ").trim().slice(0, MAX_TEXT) : null;
const isDynamicId = id =>
id && (id.includes(":-") || /:[a-z0-9]+:/i.test(id));
const shortTag = t => ({
BUTTON: "bu",
A: "a",
INPUT: "in",
SELECT: "s",
TEXTAREA: "ta",
DIV: "d",
SPAN: "sp"
}[t] || t.toLowerCase());
const isNoiseClass = c =>
!c ||
c.length < 3 ||
/^css-/.test(c) ||
/^[a-z0-9]{6,}$/i.test(c) ||
/^\\w{1,3}-\\w{4,}$/.test(c);
const siblingIndex = node => {
const sib = [...node.parentNode.children]
.filter(n => n.tagName === node.tagName);
return { idx: sib.indexOf(node), count: sib.length };
};
const getSemSiblingPath = el => {
const parts = [];
let node = el;
let depth = 0;
while (node && node.nodeType === 1 && node !== document.body && depth < 5) {
const { idx, count } = siblingIndex(node);
const isTarget = node === el;
const tag = shortTag(node.tagName);
const targetSuffix = isTarget && idx >= 0 ? ":" + idx : "";
const parentSuffix = !isTarget && count > 1 && idx >= 0 ? "@" + idx : "";
const sibSuffix = targetSuffix || parentSuffix;
let cls = [...node.classList].filter(c => !isNoiseClass(c));
if (cls.length > 2) cls = cls.slice(0, 2);
if (!cls.length) cls = ["c0"];
const attrs = [];
const id = node.id;
if (id && !isDynamicId(id)) attrs.push("#" + id);
const attrNames = node.getAttributeNames ? node.getAttributeNames() : [];
let hasDataAttr = false;
for (const a of attrNames) {
if (a.startsWith("data-")) {
attrs.push("[" + a + "=" + node.getAttribute(a) + "]");
hasDataAttr = true;
}
}
const role = node.getAttribute ? node.getAttribute("role") : null;
if (role) attrs.push("[r=" + role + "]");
const chunk = tag + "." + cls.join(".") + attrs.join("") + (sibSuffix ? sibSuffix : "");
parts.unshift(chunk);
depth += 1;
const hasStrongAnchor =
(id && !isDynamicId(id)) || hasDataAttr || !!role;
if (depth >= 5 || hasStrongAnchor) break;
node = node.parentNode;
}
return parts.join(">");
};
const items = [];
const seen = new Map();
const addItem = o => {
const keyParts = [o.el, o.x];
if (o.t) keyParts.push("t=" + o.t);
if (o.l) keyParts.push("l=" + o.l);
if (o.p) keyParts.push("p=" + o.p);
if (o.n) keyParts.push("n=" + o.n);
if (o.i) keyParts.push("i=" + o.i);
if (o.d) keyParts.push("d=" + o.d);
if (o.r) keyParts.push("r=" + o.r);
const key = keyParts.join("|");
const prev = seen.get(key) || 0;
if (prev > 0) {
let h = 0;
const str = key + "#" + prev;
for (let i = 0; i < str.length; i++) {
h = (h * 31 + str.charCodeAt(i)) >>> 0;
}
const hex = (h & 0xfff).toString(16).padStart(3, "0");
o.h = hex;
}
seen.set(key, prev + 1);
items.push(o);
};
const elements = [...document.querySelectorAll("button,a,input,select,textarea")];
for (const e of elements) {
const t = clean(e.innerText);
const l = clean(e.getAttribute("aria-label"));
const p = clean(e.getAttribute("placeholder"));
const n = e.getAttribute("name");
const r = e.getAttribute("role");
const id = e.id;
const stableId = isDynamicId(id) ? null : id;
const d = e.getAttribute("data-testid");
// skip menuitems with no meaningful text/label/placeholder
if (r === "menuitem" && !t && !l && !p) continue;
// keep only meaningful ones
if (!(t || l || p || n || stableId || d || r)) continue;
const o = { el: shortTag(e.tagName), x: getSemSiblingPath(e) };
if (t) o.t = t;
if (l && l !== t) o.l = l;
if (p && p !== t && p !== l) o.p = p;
if (n) o.n = n;
if (stableId) o.i = stableId;
if (d) o.d = d;
if (r) o.r = r;
addItem(o);
}
const json = JSON.stringify(items, null, 2);
console.log("chars:", json.length);
console.log("elements:", items.length);
console.log(items);
return items;
})();`;
const domExtractor = `() => {
const MAX_TEXT = 60;
const clean = t =>
t ? t.replace(/\\s+/g, " ").trim().slice(0, MAX_TEXT) : null;
const isDynamicId = id =>
id && (id.includes(":-") || /:[a-z0-9]+:/i.test(id));
const shortTag = t => ({
BUTTON: "bu",
A: "a",
INPUT: "in",
SELECT: "s",
TEXTAREA: "ta",
DIV: "d",
SPAN: "sp"
}[t] || t.toLowerCase());
const isNoiseClass = c =>
!c ||
c.length < 3 ||
/^css-/.test(c) ||
/^[a-z0-9]{6,}$/i.test(c) ||
/^\\w{1,3}-\\w{4,}$/.test(c);
const siblingIndex = node => {
const sib = [...node.parentNode.children]
.filter(n => n.tagName === node.tagName);
return { idx: sib.indexOf(node), count: sib.length };
};
const getSemSiblingPath = el => {
const parts = [];
let node = el;
let depth = 0;
while (node && node.nodeType === 1 && node !== document.body && depth < 5) {
const { idx, count } = siblingIndex(node);
const isTarget = node === el;
const tag = shortTag(node.tagName);
const targetSuffix = isTarget && idx >= 0 ? ":" + idx : "";
const parentSuffix = !isTarget && count > 1 && idx >= 0 ? "@" + idx : "";
const sibSuffix = targetSuffix || parentSuffix;
let cls = [...node.classList].filter(c => !isNoiseClass(c));
if (cls.length > 2) cls = cls.slice(0, 2);
if (!cls.length) cls = ["c0"];
const attrs = [];
const id = node.id;
if (id && !isDynamicId(id)) attrs.push("#" + id);
const attrNames = node.getAttributeNames ? node.getAttributeNames() : [];
let hasDataAttr = false;
for (const a of attrNames) {
if (a.startsWith("data-")) {
attrs.push("[" + a + "=" + node.getAttribute(a) + "]");
hasDataAttr = true;
}
}
const role = node.getAttribute ? node.getAttribute("role") : null;
if (role) attrs.push("[r=" + role + "]");
const chunk = tag + "." + cls.join(".") + attrs.join("") + (sibSuffix ? sibSuffix : "");
parts.unshift(chunk);
depth += 1;
const hasStrongAnchor =
(id && !isDynamicId(id)) || hasDataAttr || !!role;
if (depth >= 5 || hasStrongAnchor) break;
node = node.parentNode;
}
return parts.join(">");
};
const items = [];
const seen = new Map();
const addItem = o => {
const keyParts = [o.el, o.x];
if (o.t) keyParts.push("t=" + o.t);
if (o.l) keyParts.push("l=" + o.l);
if (o.p) keyParts.push("p=" + o.p);
if (o.n) keyParts.push("n=" + o.n);
if (o.i) keyParts.push("i=" + o.i);
if (o.d) keyParts.push("d=" + o.d);
if (o.r) keyParts.push("r=" + o.r);
const key = keyParts.join("|");
const prev = seen.get(key) || 0;
if (prev > 0) {
let h = 0;
const str = key + "#" + prev;
for (let i = 0; i < str.length; i++) {
h = (h * 31 + str.charCodeAt(i)) >>> 0;
}
const hex = (h & 0xfff).toString(16).padStart(3, "0");
o.h = hex;
}
seen.set(key, prev + 1);
items.push(o);
};
const elements = [...document.querySelectorAll("button,a,input,select,textarea")];
for (const e of elements) {
const t = clean(e.innerText);
const l = clean(e.getAttribute("aria-label"));
const p = clean(e.getAttribute("placeholder"));
const n = e.getAttribute("name");
const r = e.getAttribute("role");
const id = e.id;
const stableId = isDynamicId(id) ? null : id;
const d = e.getAttribute("data-testid");
// skip menuitems with no meaningful text/label/placeholder
if (r === "menuitem" && !t && !l && !p) continue;
if (!(t || l || p || n || stableId || d || r)) continue;
const o = { el: shortTag(e.tagName), x: getSemSiblingPath(e) };
if (t) o.t = t;
if (l && l !== t) o.l = l;
if (p && p !== t && p !== l) o.p = p;
if (n) o.n = n;
if (stableId) o.i = stableId;
if (d) o.d = d;
if (r) o.r = r;
addItem(o);
}
return items;
}`;
module.exports = { domExportScript };
async function ensureDir(dir: string) {
try {
await fs.mkdir(dir, { recursive: true });
} catch {
// ignore
}
}
async function exportAll() {
await ensureDir(OUTPUT_DIR);
async function collectHtmlFiles(dir: string): Promise<string[]> {
const entries = (await fs.readdir(dir, { withFileTypes: true })) as any[];
const results: string[] = [];
for (const ent of entries) {
const p = path.join(dir, ent.name);
if (ent.isDirectory()) {
results.push(...(await collectHtmlFiles(p)));
} else if (ent.isFile() && ent.name.endsWith(".html")) {
results.push(path.relative(INPUT_DIR, p));
}
}
return results;
}
let htmlFiles: string[] = [];
try {
htmlFiles = await collectHtmlFiles(INPUT_DIR);
} catch (err) {
console.error(
"Could not read input directory recursively:",
INPUT_DIR,
err
);
process.exit(1);
return;
}
if (htmlFiles.length === 0) {
console.log("No .html files found in", INPUT_DIR);
return;
}
const browser = await chromium.launch({ headless: true });
try {
for (const file of htmlFiles) {
const abs = path.join(INPUT_DIR, file);
const url = "file://" + abs;
const page = await browser.newPage();
try {
await page.goto(url, { waitUntil: "domcontentloaded", timeout: 10000 });
const items = await page.evaluate(
new Function("return (" + domExtractor + ")()") as any
);
const outPath = path.join(OUTPUT_DIR, file.replace(/\.html$/, ".json"));
await fs.mkdir(path.dirname(outPath), { recursive: true });
await fs.writeFile(outPath, JSON.stringify(items, null, 2), "utf8");
console.log(
"exported " +
file +
" -> " +
path.relative(process.cwd(), outPath) +
" (elements: " +
(Array.isArray(items) ? items.length : 0) +
")"
);
} catch (e) {
console.error("Failed processing", file, e);
} finally {
await page.close();
}
}
} finally {
await browser.close();
}
}
if (require.main === module) {
exportAll().catch((err) => {
console.error(err);
process.exit(1);
});
}