package org.webharvest.runtime.processors;

import java.io.IOException;
import org.htmlcleaner.BrowserCompactXmlSerializer;
import org.htmlcleaner.CleanerProperties;
import org.htmlcleaner.CompactXmlSerializer;
import org.htmlcleaner.HtmlCleaner;
import org.htmlcleaner.PrettyXmlSerializer;
import org.htmlcleaner.SimpleXmlSerializer;
import org.htmlcleaner.TagNode;
import org.webharvest.definition.HtmlToXmlDef;
import org.webharvest.exception.ParserException;
import org.webharvest.runtime.Scraper;
import org.webharvest.runtime.ScraperContext;
import org.webharvest.runtime.scripting.ScriptEngine;
import org.webharvest.runtime.templaters.BaseTemplater;
import org.webharvest.runtime.variables.NodeVariable;
import org.webharvest.runtime.variables.Variable;
import org.webharvest.utils.CommonUtil;

/* loaded from: input_file:org/webharvest/runtime/processors/HtmlToXmlProcessor.class */
public class HtmlToXmlProcessor extends BaseProcessor {
    private HtmlToXmlDef htmlToXmlDef;

    public HtmlToXmlProcessor(HtmlToXmlDef htmlToXmlDef) {
        super(htmlToXmlDef);
        this.htmlToXmlDef = htmlToXmlDef;
    }

    @Override // org.webharvest.runtime.processors.BaseProcessor
    public Variable execute(Scraper scraper, ScraperContext scraperContext) {
        Variable bodyTextContent = getBodyTextContent(this.htmlToXmlDef, scraper, scraperContext);
        HtmlCleaner htmlCleaner = new HtmlCleaner();
        CleanerProperties properties = htmlCleaner.getProperties();
        ScriptEngine scriptEngine = scraper.getScriptEngine();
        String execute = BaseTemplater.execute(this.htmlToXmlDef.getAdvancedXmlEscape(), scriptEngine);
        if (execute != null) {
            properties.setAdvancedXmlEscape(CommonUtil.isBooleanTrue(execute));
        }
        String execute2 = BaseTemplater.execute(this.htmlToXmlDef.getUseCdataForScriptAndStyle(), scriptEngine);
        if (execute2 != null) {
            properties.setUseCdataForScriptAndStyle(CommonUtil.isBooleanTrue(execute2));
        }
        String execute3 = BaseTemplater.execute(this.htmlToXmlDef.getTranslateSpecialEntities(), scriptEngine);
        if (execute3 != null) {
            properties.setTranslateSpecialEntities(CommonUtil.isBooleanTrue(execute3));
        }
        String execute4 = BaseTemplater.execute(this.htmlToXmlDef.getRecognizeUnicodeChars(), scriptEngine);
        if (execute4 != null) {
            properties.setRecognizeUnicodeChars(CommonUtil.isBooleanTrue(execute4));
        }
        String execute5 = BaseTemplater.execute(this.htmlToXmlDef.getOmitUnknownTags(), scriptEngine);
        if (execute5 != null) {
            properties.setOmitUnknownTags(CommonUtil.isBooleanTrue(execute5));
        }
        String execute6 = BaseTemplater.execute(this.htmlToXmlDef.getUseEmptyElementTags(), scriptEngine);
        if (execute6 != null) {
            properties.setUseEmptyElementTags(CommonUtil.isBooleanTrue(execute6));
        }
        String execute7 = BaseTemplater.execute(this.htmlToXmlDef.getTreatUnknownTagsAsContent(), scriptEngine);
        if (execute7 != null) {
            properties.setTreatUnknownTagsAsContent(CommonUtil.isBooleanTrue(execute7));
        }
        String execute8 = BaseTemplater.execute(this.htmlToXmlDef.getOmitDeprecatedTags(), scriptEngine);
        if (execute8 != null) {
            properties.setOmitDeprecatedTags(CommonUtil.isBooleanTrue(execute8));
        }
        String execute9 = BaseTemplater.execute(this.htmlToXmlDef.getTreatDeprecatedTagsAsContent(), scriptEngine);
        if (execute9 != null) {
            properties.setTreatDeprecatedTagsAsContent(CommonUtil.isBooleanTrue(execute9));
        }
        String execute10 = BaseTemplater.execute(this.htmlToXmlDef.getOmitXmlDecl(), scriptEngine);
        if (execute10 != null) {
            properties.setOmitXmlDeclaration(CommonUtil.isBooleanTrue(execute10));
        }
        String execute11 = BaseTemplater.execute(this.htmlToXmlDef.getOmitComments(), scriptEngine);
        if (execute11 != null) {
            properties.setOmitComments(CommonUtil.isBooleanTrue(execute11));
        }
        String execute12 = BaseTemplater.execute(this.htmlToXmlDef.getOmitHtmlEnvelope(), scriptEngine);
        if (execute12 != null) {
            properties.setOmitHtmlEnvelope(CommonUtil.isBooleanTrue(execute12));
        }
        String execute13 = BaseTemplater.execute(this.htmlToXmlDef.getAllowMultiWordAttributes(), scriptEngine);
        if (execute13 != null) {
            properties.setAllowMultiWordAttributes(CommonUtil.isBooleanTrue(execute13));
        }
        String execute14 = BaseTemplater.execute(this.htmlToXmlDef.getAllowHtmlInsideAttributes(), scriptEngine);
        if (execute14 != null) {
            properties.setAllowHtmlInsideAttributes(CommonUtil.isBooleanTrue(execute14));
        }
        String execute15 = BaseTemplater.execute(this.htmlToXmlDef.getNamespacesAware(), scriptEngine);
        if (execute15 != null) {
            properties.setNamespacesAware(CommonUtil.isBooleanTrue(execute15));
        } else {
            properties.setNamespacesAware(false);
        }
        String execute16 = BaseTemplater.execute(this.htmlToXmlDef.getHyphenReplacement(), scriptEngine);
        if (execute16 != null) {
            properties.setHyphenReplacementInComment(execute16);
        }
        String execute17 = BaseTemplater.execute(this.htmlToXmlDef.getPrunetags(), scriptEngine);
        if (execute17 != null) {
            properties.setPruneTags(execute17);
        }
        String execute18 = BaseTemplater.execute(this.htmlToXmlDef.getBooleanAtts(), scriptEngine);
        if (execute18 != null) {
            properties.setBooleanAttributeValues(execute18);
        }
        String execute19 = BaseTemplater.execute(this.htmlToXmlDef.getOutputType(), scriptEngine);
        try {
            TagNode clean = htmlCleaner.clean(bodyTextContent.toString());
            return new NodeVariable("simple".equalsIgnoreCase(execute19) ? new SimpleXmlSerializer(properties).getXmlAsString(clean) : "pretty".equalsIgnoreCase(execute19) ? new PrettyXmlSerializer(properties).getXmlAsString(clean) : "browser-compact".equalsIgnoreCase(execute19) ? new BrowserCompactXmlSerializer(properties).getXmlAsString(clean) : new CompactXmlSerializer(properties).getXmlAsString(clean));
        } catch (IOException e) {
            throw new ParserException(e);
        }
    }
}
