Files
klz-cables.com/lib/html-compat.ts
2025-12-28 23:28:31 +01:00

115 lines
2.9 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
/**
* HTML Compatibility Layer
* Handles HTML entities and formatting from WordPress exports
*/
export function processHTML(html: string | null | undefined): string {
if (!html) return '';
// Replace common HTML entities
let processed = html;
const entities: Record<string, string> = {
'\u00A0': ' ', // Non-breaking space
'&': '&',
'<': '<',
'>': '>',
'"': '"',
"'": "'",
'¢': '¢',
'£': '£',
'¥': '¥',
'€': '€',
'©': '©',
'®': '®',
'™': '™',
'°': '°',
'±': '±',
'×': '×',
'÷': '÷',
'µ': 'µ',
'¶': '¶',
'§': '§',
'á': 'á',
'é': 'é',
'í': 'í',
'ó': 'ó',
'ú': 'ú',
'Á': 'Á',
'É': 'É',
'Í': 'Í',
'Ó': 'Ó',
'Ú': 'Ú',
'ñ': 'ñ',
'Ñ': 'Ñ',
'ü': 'ü',
'Ü': 'Ü',
'ö': 'ö',
'Ö': 'Ö',
'ä': 'ä',
'Ä': 'Ä',
'ß': 'ß',
'—': '—',
'': '',
'…': '…',
'«': '«',
'»': '»',
'': "'",
'': "'",
'“': '"',
'”': '"',
'•': '•',
'·': '·'
};
// Replace entities
for (const [entity, char] of Object.entries(entities)) {
processed = processed.replace(new RegExp(entity.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'), 'g'), char);
}
// Remove script tags
processed = processed.replace(/<script\b[^<]*(?:(?!<\/script>)<[^<]*)*<\/script>/gi, '');
// Remove style tags
processed = processed.replace(/<style\b[^<]*(?:(?!<\/style>)<[^<]*)*<\/style>/gi, '');
// Remove inline event handlers
processed = processed.replace(/\s+on\w+\s*=\s*["'][^"']*["']/gi, '');
// Remove dangerous attributes
processed = processed.replace(/\s+(href|src)\s*=\s*["']\s*javascript:/gi, '');
// Remove any remaining WordPress shortcode-like content (e.g., [vc_row...])
processed = processed.replace(/\[[^\]]*\]/g, '');
// Keep HTML structure from processed data - allow divs with our classes
// Allow: <p>, <br>, <h1-6>, <strong>, <b>, <em>, <i>, <ul>, <ol>, <li>, <a>, <div>, <span>, <img>
// Also allow our vc-row/vc-column classes
processed = processed.replace(/<\/?(?!\/?(p|br|h[1-6]|strong|b|em|i|ul|ol|li|a|div|span|img|small)(\s|>))[^>]*>/gi, '');
// Clean up empty paragraphs and extra spaces
processed = processed.replace(/<p>\s*<\/p>/g, '');
processed = processed.replace(/\s+/g, ' ').trim();
return processed;
}
export function stripHTML(html: string | null | undefined): string {
if (!html) return '';
return html.replace(/<[^>]*>/g, '');
}
export function extractTextFromHTML(html: string | null | undefined): string {
if (!html) return '';
return processHTML(html);
}
/**
* Get dictionary for translations
* This is a compatibility function for the i18n system
*/
export function getDictionary(locale: string): Record<string, string> {
// For now, return empty dictionary
// In a real implementation, this would load translation files
return {};
}