115 lines
2.9 KiB
TypeScript
115 lines
2.9 KiB
TypeScript
/**
|
||
* HTML Compatibility Layer
|
||
* Handles HTML entities and formatting from WordPress exports
|
||
*/
|
||
|
||
export function processHTML(html: string | null | undefined): string {
|
||
if (!html) return '';
|
||
|
||
// Replace common HTML entities
|
||
let processed = html;
|
||
|
||
const entities: Record<string, string> = {
|
||
'\u00A0': ' ', // Non-breaking space
|
||
'&': '&',
|
||
'<': '<',
|
||
'>': '>',
|
||
'"': '"',
|
||
"'": "'",
|
||
'¢': '¢',
|
||
'£': '£',
|
||
'¥': '¥',
|
||
'€': '€',
|
||
'©': '©',
|
||
'®': '®',
|
||
'™': '™',
|
||
'°': '°',
|
||
'±': '±',
|
||
'×': '×',
|
||
'÷': '÷',
|
||
'µ': 'µ',
|
||
'¶': '¶',
|
||
'§': '§',
|
||
'á': 'á',
|
||
'é': 'é',
|
||
'í': 'í',
|
||
'ó': 'ó',
|
||
'ú': 'ú',
|
||
'Á': 'Á',
|
||
'É': 'É',
|
||
'Í': 'Í',
|
||
'Ó': 'Ó',
|
||
'Ú': 'Ú',
|
||
'ñ': 'ñ',
|
||
'Ñ': 'Ñ',
|
||
'ü': 'ü',
|
||
'Ü': 'Ü',
|
||
'ö': 'ö',
|
||
'Ö': 'Ö',
|
||
'ä': 'ä',
|
||
'Ä': 'Ä',
|
||
'ß': 'ß',
|
||
'—': '—',
|
||
'–': '–',
|
||
'…': '…',
|
||
'«': '«',
|
||
'»': '»',
|
||
'‘': "'",
|
||
'’': "'",
|
||
'“': '"',
|
||
'”': '"',
|
||
'•': '•',
|
||
'·': '·'
|
||
};
|
||
|
||
// Replace entities
|
||
for (const [entity, char] of Object.entries(entities)) {
|
||
processed = processed.replace(new RegExp(entity.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'), 'g'), char);
|
||
}
|
||
|
||
// Remove script tags
|
||
processed = processed.replace(/<script\b[^<]*(?:(?!<\/script>)<[^<]*)*<\/script>/gi, '');
|
||
|
||
// Remove style tags
|
||
processed = processed.replace(/<style\b[^<]*(?:(?!<\/style>)<[^<]*)*<\/style>/gi, '');
|
||
|
||
// Remove inline event handlers
|
||
processed = processed.replace(/\s+on\w+\s*=\s*["'][^"']*["']/gi, '');
|
||
|
||
// Remove dangerous attributes
|
||
processed = processed.replace(/\s+(href|src)\s*=\s*["']\s*javascript:/gi, '');
|
||
|
||
// Remove any remaining WordPress shortcode-like content (e.g., [vc_row...])
|
||
processed = processed.replace(/\[[^\]]*\]/g, '');
|
||
|
||
// Keep HTML structure from processed data - allow divs with our classes
|
||
// Allow: <p>, <br>, <h1-6>, <strong>, <b>, <em>, <i>, <ul>, <ol>, <li>, <a>, <div>, <span>, <img>
|
||
// Also allow our vc-row/vc-column classes
|
||
processed = processed.replace(/<\/?(?!\/?(p|br|h[1-6]|strong|b|em|i|ul|ol|li|a|div|span|img|small)(\s|>))[^>]*>/gi, '');
|
||
|
||
// Clean up empty paragraphs and extra spaces
|
||
processed = processed.replace(/<p>\s*<\/p>/g, '');
|
||
processed = processed.replace(/\s+/g, ' ').trim();
|
||
|
||
return processed;
|
||
}
|
||
|
||
export function stripHTML(html: string | null | undefined): string {
|
||
if (!html) return '';
|
||
return html.replace(/<[^>]*>/g, '');
|
||
}
|
||
|
||
export function extractTextFromHTML(html: string | null | undefined): string {
|
||
if (!html) return '';
|
||
return processHTML(html);
|
||
}
|
||
|
||
/**
|
||
* Get dictionary for translations
|
||
* This is a compatibility function for the i18n system
|
||
*/
|
||
export function getDictionary(locale: string): Record<string, string> {
|
||
// For now, return empty dictionary
|
||
// In a real implementation, this would load translation files
|
||
return {};
|
||
} |