dumps optimized
This commit is contained in:
@@ -1,18 +1,6 @@
|
||||
// Developer-only scripts moved out of infrastructure: DOM exporter for local HTML dumps.
|
||||
// NOT for production automation; intended as a developer utility to generate compact DOM exports
|
||||
// for manual inspection and to aid writing Playwright automations.
|
||||
//
|
||||
// Usage (from repo root):
|
||||
// npm install -D playwright ts-node typescript @types/node
|
||||
// npx playwright install
|
||||
// npx ts-node scripts/dom-export/exportHtmlDumps.ts
|
||||
//
|
||||
// Output: ./html-dumps-optimized/*.json
|
||||
//
|
||||
// This file intentionally contains both the in-page extractor string (exported) and the
|
||||
// Playwright runner that iterates ./html-dumps/*.html and writes .json files into
|
||||
// ./html-dumps-optimized. These artifacts are developer helpers and must not be imported
|
||||
// into production automation code.
|
||||
|
||||
const { chromium } = require("playwright");
|
||||
const fs = require("fs").promises;
|
||||
@@ -21,9 +9,6 @@ const path = require("path");
|
||||
const INPUT_DIR = path.join(process.cwd(), "html-dumps");
|
||||
const OUTPUT_DIR = path.join(process.cwd(), "html-dumps-optimized");
|
||||
|
||||
// Developer helper: in-page DOM extractor string (for console or page.evaluate).
|
||||
// Kept as a plain const to avoid ES module import/export issues when running with ts-node in CJS mode.
|
||||
// This version compresses output aggressively using short tags/fields and a semantic short DOM path.
|
||||
const domExportScript = `(() => {
|
||||
const MAX_TEXT = 60;
|
||||
|
||||
@@ -59,28 +44,24 @@ const domExportScript = `(() => {
|
||||
const getSemSiblingPath = el => {
|
||||
const parts = [];
|
||||
let node = el;
|
||||
let depth = 0;
|
||||
|
||||
while (node && node.nodeType === 1 && node !== document.body && depth < 5) {
|
||||
const { idx, count } = siblingIndex(node);
|
||||
const isTarget = node === el;
|
||||
while (node && node.nodeType === 1 && node !== document.body) {
|
||||
const tag = shortTag(node.tagName);
|
||||
|
||||
const { idx, count } = siblingIndex(node);
|
||||
const isTarget = node === el;
|
||||
const targetSuffix = isTarget && idx >= 0 ? ":" + idx : "";
|
||||
const parentSuffix = !isTarget && count > 1 && idx >= 0 ? "@" + idx : "";
|
||||
const sibSuffix = targetSuffix || parentSuffix;
|
||||
|
||||
let cls = [...node.classList].filter(c => !isNoiseClass(c));
|
||||
if (cls.length > 2) cls = cls.slice(0, 2);
|
||||
if (!cls.length) cls = ["c0"];
|
||||
const id = node.id;
|
||||
const attrNames = node.getAttributeNames ? node.getAttributeNames() : [];
|
||||
|
||||
const attrs = [];
|
||||
|
||||
const id = node.id;
|
||||
if (id && !isDynamicId(id)) attrs.push("#" + id);
|
||||
|
||||
const attrNames = node.getAttributeNames ? node.getAttributeNames() : [];
|
||||
let hasDataAttr = false;
|
||||
|
||||
if (id) attrs.push("#" + id);
|
||||
|
||||
for (const a of attrNames) {
|
||||
if (a.startsWith("data-")) {
|
||||
attrs.push("[" + a + "=" + node.getAttribute(a) + "]");
|
||||
@@ -91,16 +72,20 @@ const domExportScript = `(() => {
|
||||
const role = node.getAttribute ? node.getAttribute("role") : null;
|
||||
if (role) attrs.push("[r=" + role + "]");
|
||||
|
||||
const chunk = tag + "." + cls.join(".") + attrs.join("") + (sibSuffix ? sibSuffix : "");
|
||||
let chunk = tag;
|
||||
|
||||
if (attrs.length > 0) {
|
||||
chunk += attrs.join("");
|
||||
} else {
|
||||
let cls = [...node.classList].filter(c => !isNoiseClass(c));
|
||||
if (cls.length > 2) cls = cls.slice(0, 2);
|
||||
if (!cls.length) cls = ["c0"];
|
||||
chunk += "." + cls.join(".");
|
||||
}
|
||||
|
||||
chunk += (sibSuffix || "");
|
||||
parts.unshift(chunk);
|
||||
|
||||
depth += 1;
|
||||
|
||||
const hasStrongAnchor =
|
||||
(id && !isDynamicId(id)) || hasDataAttr || !!role;
|
||||
|
||||
if (depth >= 5 || hasStrongAnchor) break;
|
||||
|
||||
node = node.parentNode;
|
||||
}
|
||||
|
||||
@@ -146,10 +131,7 @@ const domExportScript = `(() => {
|
||||
const stableId = isDynamicId(id) ? null : id;
|
||||
const d = e.getAttribute("data-testid");
|
||||
|
||||
// skip menuitems with no meaningful text/label/placeholder
|
||||
if (r === "menuitem" && !t && !l && !p) continue;
|
||||
|
||||
// keep only meaningful ones
|
||||
if (!(t || l || p || n || stableId || d || r)) continue;
|
||||
|
||||
const o = { el: shortTag(e.tagName), x: getSemSiblingPath(e) };
|
||||
@@ -208,28 +190,24 @@ const domExtractor = `() => {
|
||||
const getSemSiblingPath = el => {
|
||||
const parts = [];
|
||||
let node = el;
|
||||
let depth = 0;
|
||||
|
||||
while (node && node.nodeType === 1 && node !== document.body && depth < 5) {
|
||||
const { idx, count } = siblingIndex(node);
|
||||
const isTarget = node === el;
|
||||
while (node && node.nodeType === 1 && node !== document.body) {
|
||||
const tag = shortTag(node.tagName);
|
||||
|
||||
const { idx, count } = siblingIndex(node);
|
||||
const isTarget = node === el;
|
||||
const targetSuffix = isTarget && idx >= 0 ? ":" + idx : "";
|
||||
const parentSuffix = !isTarget && count > 1 && idx >= 0 ? "@" + idx : "";
|
||||
const sibSuffix = targetSuffix || parentSuffix;
|
||||
|
||||
let cls = [...node.classList].filter(c => !isNoiseClass(c));
|
||||
if (cls.length > 2) cls = cls.slice(0, 2);
|
||||
if (!cls.length) cls = ["c0"];
|
||||
const id = node.id;
|
||||
const attrNames = node.getAttributeNames ? node.getAttributeNames() : [];
|
||||
|
||||
const attrs = [];
|
||||
|
||||
const id = node.id;
|
||||
if (id && !isDynamicId(id)) attrs.push("#" + id);
|
||||
|
||||
const attrNames = node.getAttributeNames ? node.getAttributeNames() : [];
|
||||
let hasDataAttr = false;
|
||||
|
||||
if (id) attrs.push("#" + id);
|
||||
|
||||
for (const a of attrNames) {
|
||||
if (a.startsWith("data-")) {
|
||||
attrs.push("[" + a + "=" + node.getAttribute(a) + "]");
|
||||
@@ -240,16 +218,20 @@ const domExtractor = `() => {
|
||||
const role = node.getAttribute ? node.getAttribute("role") : null;
|
||||
if (role) attrs.push("[r=" + role + "]");
|
||||
|
||||
const chunk = tag + "." + cls.join(".") + attrs.join("") + (sibSuffix ? sibSuffix : "");
|
||||
let chunk = tag;
|
||||
|
||||
if (attrs.length > 0) {
|
||||
chunk += attrs.join("");
|
||||
} else {
|
||||
let cls = [...node.classList].filter(c => !isNoiseClass(c));
|
||||
if (cls.length > 2) cls = cls.slice(0, 2);
|
||||
if (!cls.length) cls = ["c0"];
|
||||
chunk += "." + cls.join(".");
|
||||
}
|
||||
|
||||
chunk += (sibSuffix || "");
|
||||
parts.unshift(chunk);
|
||||
|
||||
depth += 1;
|
||||
|
||||
const hasStrongAnchor =
|
||||
(id && !isDynamicId(id)) || hasDataAttr || !!role;
|
||||
|
||||
if (depth >= 5 || hasStrongAnchor) break;
|
||||
|
||||
node = node.parentNode;
|
||||
}
|
||||
|
||||
@@ -295,9 +277,7 @@ const domExtractor = `() => {
|
||||
const stableId = isDynamicId(id) ? null : id;
|
||||
const d = e.getAttribute("data-testid");
|
||||
|
||||
// skip menuitems with no meaningful text/label/placeholder
|
||||
if (r === "menuitem" && !t && !l && !p) continue;
|
||||
|
||||
if (!(t || l || p || n || stableId || d || r)) continue;
|
||||
|
||||
const o = { el: shortTag(e.tagName), x: getSemSiblingPath(e) };
|
||||
@@ -321,16 +301,14 @@ module.exports = { domExportScript };
|
||||
async function ensureDir(dir: string) {
|
||||
try {
|
||||
await fs.mkdir(dir, { recursive: true });
|
||||
} catch {
|
||||
// ignore
|
||||
}
|
||||
} catch {}
|
||||
}
|
||||
|
||||
async function exportAll() {
|
||||
await ensureDir(OUTPUT_DIR);
|
||||
|
||||
async function collectHtmlFiles(dir: string): Promise<string[]> {
|
||||
const entries = (await fs.readdir(dir, { withFileTypes: true })) as any[];
|
||||
const entries = await fs.readdir(dir, { withFileTypes: true });
|
||||
const results: string[] = [];
|
||||
for (const ent of entries) {
|
||||
const p = path.join(dir, ent.name);
|
||||
@@ -343,7 +321,7 @@ async function exportAll() {
|
||||
return results;
|
||||
}
|
||||
|
||||
let htmlFiles: string[] = [];
|
||||
let htmlFiles = [];
|
||||
try {
|
||||
htmlFiles = await collectHtmlFiles(INPUT_DIR);
|
||||
} catch (err) {
|
||||
@@ -370,7 +348,7 @@ async function exportAll() {
|
||||
try {
|
||||
await page.goto(url, { waitUntil: "domcontentloaded", timeout: 10000 });
|
||||
const items = await page.evaluate(
|
||||
new Function("return (" + domExtractor + ")()") as any
|
||||
new Function("return (" + domExtractor + ")()")
|
||||
);
|
||||
const outPath = path.join(OUTPUT_DIR, file.replace(/\.html$/, ".json"));
|
||||
await fs.mkdir(path.dirname(outPath), { recursive: true });
|
||||
|
||||
Reference in New Issue
Block a user