/**
* HTML Compatibility Layer
* Handles HTML entities and formatting from WordPress exports
*/
export function processHTML(html: string | null | undefined): string {
if (!html) return '';
// Replace common HTML entities
let processed = html;
const entities: Record = {
'\u00A0': ' ', // Non-breaking space
'&': '&',
'<': '<',
'>': '>',
'"': '"',
"'": "'",
'¢': '¢',
'£': '£',
'¥': '¥',
'€': '€',
'©': '©',
'®': '®',
'™': '™',
'°': '°',
'±': '±',
'×': '×',
'÷': '÷',
'µ': 'µ',
'¶': '¶',
'§': '§',
'á': 'á',
'é': 'é',
'í': 'í',
'ó': 'ó',
'ú': 'ú',
'Á': 'Á',
'É': 'É',
'Í': 'Í',
'Ó': 'Ó',
'Ú': 'Ú',
'ñ': 'ñ',
'Ñ': 'Ñ',
'ü': 'ü',
'Ü': 'Ü',
'ö': 'ö',
'Ö': 'Ö',
'ä': 'ä',
'Ä': 'Ä',
'ß': 'ß',
'—': '—',
'–': '–',
'…': '…',
'«': '«',
'»': '»',
'‘': "'",
'’': "'",
'“': '"',
'”': '"',
'•': '•',
'·': '·'
};
// Replace entities
for (const [entity, char] of Object.entries(entities)) {
processed = processed.replace(new RegExp(entity.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'), 'g'), char);
}
// Remove script tags
processed = processed.replace(/