import { translate } from 'translate-american-british-english';
import { InternalLink } from './qualityCheck';

/* 
 * this postprocessor is applied to the final text in frontend
 * it is happening in frontend as the backend only sees parts with streaming, so we can not guarantee it to work there
 * activated using the setting textPostprocessor
 */

class Postprocessor {
    public central(text: string, links: InternalLink[]) {
        return this
        ._replaceFoodService(text, links)
    }

    private _replaceFoodService(text: string, links: InternalLink[]) {
        // Define the regex pattern to match 'food service' in a case-insensitive way
        let pattern = /food[\s\-]+service/gi;;
        
        // Use a function as the replacement to preserve the original capitalization
        let replacedText = text.replace(pattern, (match) => {
            // Remove the whitespace from the matched string
            return match.replace(/[\s\-]+/g, '');
        });

        // If we have capitalized Food Service in original response, might get incorrect FoodService as replacement
        pattern = /FoodService|foodService/g;
        replacedText = replacedText.replace(pattern, "Foodservice");

        // Capitalize I
        replacedText = replacedText.replace(/\bi\b/g, "I");

        // translate to american english
        replacedText = translate(replacedText, { american: true });

        /*
         * Remove not allowed links
         */
        const parser = new DOMParser();
        const doc = parser.parseFromString(replacedText, 'text/html');
        const foundLinks = doc.querySelectorAll('a');
        const allowedHrefs = links?.map(l => l.link)
      
        foundLinks.forEach(link => {
          if (!allowedHrefs?.includes(link.href)) {
            const textNode = document.createTextNode(link.textContent || '');
            link.parentNode?.replaceChild(textNode, link);
          }
        });

    
        /*
         * Capitalize h2 and h3 content
         */
        // Find all <h2> and <h3> elements in the parsed document
        const headings = doc.querySelectorAll('h2, h3');

        // Array of words not to capitalize in headlines
        const wordsNotToCapitalize = [
        'a', 'an', 'the', 
        'and', 'but', 'or', 'for', 'nor', 
        'at', 'by', 'to', 'from', 'in', 'near', 'of', 'off', 'on', 'over', 'past', 'with', 'upon', 'after', 'like', 'plus', 'but', 'except', 'for', 'per', 'sans', 'than'
        ];

        // Loop through each heading and capitalize its text, with exceptions
        headings.forEach(heading => {
            if (heading.textContent) heading.textContent = heading.textContent
            .trim() // Remove leading and trailing spaces
            .replace(/\s+/g, ' ') // Replace multiple spaces with a single space
            .split(' ')
            .map((word, index) => {
                const lowerCaseWord = word.toLowerCase();
                // Check if the word is in the list of words not to capitalize, and it's not the first word
                if (wordsNotToCapitalize.includes(lowerCaseWord) && index !== 0) {
                    return lowerCaseWord; // Return the word in lowercase if it should not be capitalized
                } else {
                    // Capitalize the first letter of the word and ensure the rest is in original case (could be uppercase because of brand, we don't know)
                    return word.charAt(0).toUpperCase() + word.slice(1);
                }
            })
            .join(' ');
        });

      
        return doc.body.innerHTML;
    }
}

export default Postprocessor;