package org.webharvest.runtime.processors;

import java.io.IOException;
import org.htmlcleaner.HtmlCleaner;
import org.webharvest.definition.HtmlToXmlDef;
import org.webharvest.exception.ParserException;
import org.webharvest.runtime.Scraper;
import org.webharvest.runtime.ScraperContext;
import org.webharvest.runtime.scripting.ScriptEngine;
import org.webharvest.runtime.templaters.BaseTemplater;
import org.webharvest.runtime.variables.NodeVariable;
import org.webharvest.runtime.variables.Variable;
import org.webharvest.utils.CommonUtil;

/* loaded from: input_file:lib/webharvest-core-1.0.jar:org/webharvest/runtime/processors/HtmlToXmlProcessor.class */
public class HtmlToXmlProcessor extends BaseProcessor {
    private HtmlToXmlDef htmlToXmlDef;

    public HtmlToXmlProcessor(HtmlToXmlDef htmlToXmlDef) {
        super(htmlToXmlDef);
        this.htmlToXmlDef = htmlToXmlDef;
    }

    @Override // org.webharvest.runtime.processors.BaseProcessor
    public Variable execute(Scraper scraper, ScraperContext scraperContext) {
        HtmlCleaner htmlCleaner = new HtmlCleaner(getBodyTextContent(this.htmlToXmlDef, scraper, scraperContext).toString());
        ScriptEngine scriptEngine = scraper.getScriptEngine();
        String execute = BaseTemplater.execute(this.htmlToXmlDef.getAdvancedXmlEscape(), scriptEngine);
        if (execute != null) {
            htmlCleaner.setAdvancedXmlEscape(CommonUtil.isBooleanTrue(execute));
        }
        String execute2 = BaseTemplater.execute(this.htmlToXmlDef.getUseCdataForScriptAndStyle(), scriptEngine);
        if (execute2 != null) {
            htmlCleaner.setUseCdataForScriptAndStyle(CommonUtil.isBooleanTrue(execute2));
        }
        String execute3 = BaseTemplater.execute(this.htmlToXmlDef.getTranslateSpecialEntities(), scriptEngine);
        if (execute3 != null) {
            htmlCleaner.setTranslateSpecialEntities(CommonUtil.isBooleanTrue(execute3));
        }
        String execute4 = BaseTemplater.execute(this.htmlToXmlDef.getRecognizeUnicodeChars(), scriptEngine);
        if (execute4 != null) {
            htmlCleaner.setRecognizeUnicodeChars(CommonUtil.isBooleanTrue(execute4));
        }
        String execute5 = BaseTemplater.execute(this.htmlToXmlDef.getOmitUnknownTags(), scriptEngine);
        if (execute5 != null) {
            htmlCleaner.setOmitUnknownTags(CommonUtil.isBooleanTrue(execute5));
        }
        String execute6 = BaseTemplater.execute(this.htmlToXmlDef.getTreatUnknownTagsAsContent(), scriptEngine);
        if (execute6 != null) {
            htmlCleaner.setTreatUnknownTagsAsContent(CommonUtil.isBooleanTrue(execute6));
        }
        String execute7 = BaseTemplater.execute(this.htmlToXmlDef.getOmitDeprecatedTags(), scriptEngine);
        if (execute7 != null) {
            htmlCleaner.setOmitDeprecatedTags(CommonUtil.isBooleanTrue(execute7));
        }
        String execute8 = BaseTemplater.execute(this.htmlToXmlDef.getTreatDeprecatedTagsAsContent(), scriptEngine);
        if (execute8 != null) {
            htmlCleaner.setTreatDeprecatedTagsAsContent(CommonUtil.isBooleanTrue(execute8));
        }
        String execute9 = BaseTemplater.execute(this.htmlToXmlDef.getOmitComments(), scriptEngine);
        if (execute9 != null) {
            htmlCleaner.setOmitComments(CommonUtil.isBooleanTrue(execute9));
        }
        String execute10 = BaseTemplater.execute(this.htmlToXmlDef.getOmitHtmlEnvelope(), scriptEngine);
        if (execute10 != null) {
            htmlCleaner.setOmitHtmlEnvelope(CommonUtil.isBooleanTrue(execute10));
        }
        String execute11 = BaseTemplater.execute(this.htmlToXmlDef.getAllowMultiWordAttributes(), scriptEngine);
        if (execute11 != null) {
            htmlCleaner.setAllowMultiWordAttributes(CommonUtil.isBooleanTrue(execute11));
        }
        String execute12 = BaseTemplater.execute(this.htmlToXmlDef.getAllowHtmlInsideAttributes(), scriptEngine);
        if (execute12 != null) {
            htmlCleaner.setAllowHtmlInsideAttributes(CommonUtil.isBooleanTrue(execute12));
        }
        String execute13 = BaseTemplater.execute(this.htmlToXmlDef.getNamespacesAware(), scriptEngine);
        if (execute13 != null) {
            htmlCleaner.setNamespacesAware(CommonUtil.isBooleanTrue(execute13));
        } else {
            htmlCleaner.setNamespacesAware(false);
        }
        String execute14 = BaseTemplater.execute(this.htmlToXmlDef.getPrunetags(), scriptEngine);
        if (execute14 != null) {
            htmlCleaner.setPruneTags(execute14);
        }
        String execute15 = BaseTemplater.execute(this.htmlToXmlDef.getOutputType(), scriptEngine);
        try {
            htmlCleaner.clean();
            return new NodeVariable("simple".equalsIgnoreCase(execute15) ? htmlCleaner.getXmlAsString() : "pretty".equalsIgnoreCase(execute15) ? htmlCleaner.getPrettyXmlAsString() : "browser-compact".equalsIgnoreCase(execute15) ? htmlCleaner.getBrowserCompactXmlAsString() : htmlCleaner.getCompactXmlAsString());
        } catch (IOException e) {
            throw new ParserException(e);
        }
    }
}
