initial migration
This commit is contained in:
115
lib/html-compat.ts
Normal file
115
lib/html-compat.ts
Normal file
@@ -0,0 +1,115 @@
|
||||
/**
|
||||
* HTML Compatibility Layer
|
||||
* Handles HTML entities and formatting from WordPress exports
|
||||
*/
|
||||
|
||||
export function processHTML(html: string | null | undefined): string {
|
||||
if (!html) return '';
|
||||
|
||||
// Replace common HTML entities
|
||||
let processed = html;
|
||||
|
||||
const entities: Record<string, string> = {
|
||||
'\u00A0': ' ', // Non-breaking space
|
||||
'&': '&',
|
||||
'<': '<',
|
||||
'>': '>',
|
||||
'"': '"',
|
||||
"'": "'",
|
||||
'¢': '¢',
|
||||
'£': '£',
|
||||
'¥': '¥',
|
||||
'€': '€',
|
||||
'©': '©',
|
||||
'®': '®',
|
||||
'™': '™',
|
||||
'°': '°',
|
||||
'±': '±',
|
||||
'×': '×',
|
||||
'÷': '÷',
|
||||
'µ': 'µ',
|
||||
'¶': '¶',
|
||||
'§': '§',
|
||||
'á': 'á',
|
||||
'é': 'é',
|
||||
'í': 'í',
|
||||
'ó': 'ó',
|
||||
'ú': 'ú',
|
||||
'Á': 'Á',
|
||||
'É': 'É',
|
||||
'Í': 'Í',
|
||||
'Ó': 'Ó',
|
||||
'Ú': 'Ú',
|
||||
'ñ': 'ñ',
|
||||
'Ñ': 'Ñ',
|
||||
'ü': 'ü',
|
||||
'Ü': 'Ü',
|
||||
'ö': 'ö',
|
||||
'Ö': 'Ö',
|
||||
'ä': 'ä',
|
||||
'Ä': 'Ä',
|
||||
'ß': 'ß',
|
||||
'—': '—',
|
||||
'–': '–',
|
||||
'…': '…',
|
||||
'«': '«',
|
||||
'»': '»',
|
||||
'‘': "'",
|
||||
'’': "'",
|
||||
'“': '"',
|
||||
'”': '"',
|
||||
'•': '•',
|
||||
'·': '·'
|
||||
};
|
||||
|
||||
// Replace entities
|
||||
for (const [entity, char] of Object.entries(entities)) {
|
||||
processed = processed.replace(new RegExp(entity.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'), 'g'), char);
|
||||
}
|
||||
|
||||
// Remove script tags
|
||||
processed = processed.replace(/<script\b[^<]*(?:(?!<\/script>)<[^<]*)*<\/script>/gi, '');
|
||||
|
||||
// Remove style tags
|
||||
processed = processed.replace(/<style\b[^<]*(?:(?!<\/style>)<[^<]*)*<\/style>/gi, '');
|
||||
|
||||
// Remove inline event handlers
|
||||
processed = processed.replace(/\s+on\w+\s*=\s*["'][^"']*["']/gi, '');
|
||||
|
||||
// Remove dangerous attributes
|
||||
processed = processed.replace(/\s+(href|src)\s*=\s*["']\s*javascript:/gi, '');
|
||||
|
||||
// Remove any remaining WordPress shortcode-like content (e.g., [vc_row...])
|
||||
processed = processed.replace(/\[[^\]]*\]/g, '');
|
||||
|
||||
// Keep HTML structure from processed data - allow divs with our classes
|
||||
// Allow: <p>, <br>, <h1-6>, <strong>, <b>, <em>, <i>, <ul>, <ol>, <li>, <a>, <div>, <span>, <img>
|
||||
// Also allow our vc-row/vc-column classes
|
||||
processed = processed.replace(/<\/?(?!\/?(p|br|h[1-6]|strong|b|em|i|ul|ol|li|a|div|span|img|small)(\s|>))[^>]*>/gi, '');
|
||||
|
||||
// Clean up empty paragraphs and extra spaces
|
||||
processed = processed.replace(/<p>\s*<\/p>/g, '');
|
||||
processed = processed.replace(/\s+/g, ' ').trim();
|
||||
|
||||
return processed;
|
||||
}
|
||||
|
||||
export function stripHTML(html: string | null | undefined): string {
|
||||
if (!html) return '';
|
||||
return html.replace(/<[^>]*>/g, '');
|
||||
}
|
||||
|
||||
export function extractTextFromHTML(html: string | null | undefined): string {
|
||||
if (!html) return '';
|
||||
return processHTML(html);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get dictionary for translations
|
||||
* This is a compatibility function for the i18n system
|
||||
*/
|
||||
export function getDictionary(locale: string): Record<string, string> {
|
||||
// For now, return empty dictionary
|
||||
// In a real implementation, this would load translation files
|
||||
return {};
|
||||
}
|
||||
Reference in New Issue
Block a user