package org.pageseeder.flint.berlioz.tika;

import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.OutputStream;
import java.io.Reader;
import java.io.StringReader;
import java.io.StringWriter;
import java.nio.charset.StandardCharsets;
import javax.xml.transform.TransformerConfigurationException;
import javax.xml.transform.sax.SAXTransformerFactory;
import javax.xml.transform.sax.TransformerHandler;
import javax.xml.transform.stream.StreamResult;
import org.apache.commons.io.IOUtils;
import org.apache.tika.config.TikaConfig;
import org.apache.tika.exception.TikaException;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.parser.ParseContext;
import org.pageseeder.berlioz.GlobalSettings;
import org.pageseeder.flint.IndexException;
import org.pageseeder.flint.content.Content;
import org.pageseeder.flint.content.ContentTranslator;
import org.pageseeder.xmlwriter.XMLWriterImpl;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.xml.sax.ContentHandler;

/* loaded from: input_file:org/pageseeder/flint/berlioz/tika/TikaTranslator.class */
public class TikaTranslator implements ContentTranslator {
    private static final Logger LOGGER = LoggerFactory.getLogger(TikaTranslator.class);
    private static final SAXTransformerFactory factory = (SAXTransformerFactory) SAXTransformerFactory.newInstance();
    private static final TikaConfig TIKA_CONFIG = TikaConfig.getDefaultConfig();

    public Reader translate(Content content) throws IndexException {
        if (content.isDeleted()) {
            return null;
        }
        try {
            LOGGER.debug("Attempting to translate content {}", content.toString());
            Metadata metadata = new Metadata();
            String str = null;
            TikaInputStream tikaInputStream = null;
            File file = content.getFile();
            if (file != null) {
                if (file.length() <= GlobalSettings.get("flint.index.max-tika-size", TikaTranslatorFactory.MAX_INDEXING_SIZE)) {
                    tikaInputStream = TikaInputStream.get(file.toPath());
                }
            } else {
                tikaInputStream = TikaInputStream.get(content.getSource());
            }
            if (tikaInputStream != null) {
                ParseContext parseContext = new ParseContext();
                try {
                    try {
                        ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
                        new AutoDetectParser(TIKA_CONFIG).parse(tikaInputStream, getHandler(byteArrayOutputStream), metadata, parseContext);
                        str = new String(byteArrayOutputStream.toByteArray(), StandardCharsets.UTF_8);
                        IOUtils.closeQuietly(tikaInputStream);
                    } catch (Throwable th) {
                        IOUtils.closeQuietly(tikaInputStream);
                        throw th;
                    }
                } catch (TikaException e) {
                    LOGGER.error("Failed to parse content with TIKA", e);
                    str = "<error>" + (e.getMessage() == null ? "Unknown error while reading content in TIKA" : e.getMessage()) + "</error>";
                    IOUtils.closeQuietly(tikaInputStream);
                }
            }
            StringWriter stringWriter = new StringWriter();
            XMLWriterImpl xMLWriterImpl = new XMLWriterImpl(stringWriter);
            xMLWriterImpl.openElement("content");
            xMLWriterImpl.attribute("source", "tika");
            if (str != null) {
                xMLWriterImpl.writeXML(str.replace(" xmlns=\"http://www.w3.org/1999/xhtml\"", ""));
            }
            xMLWriterImpl.closeElement();
            return new StringReader(stringWriter.toString());
        } catch (Exception e2) {
            LOGGER.error("Failed to translate content {}", content, e2);
            return null;
        }
    }

    private ContentHandler getHandler(OutputStream outputStream) throws TransformerConfigurationException {
        TransformerHandler newTransformerHandler = factory.newTransformerHandler();
        newTransformerHandler.getTransformer().setOutputProperty("method", "xml");
        newTransformerHandler.getTransformer().setOutputProperty("encoding", "UTF-8");
        newTransformerHandler.getTransformer().setOutputProperty("omit-xml-declaration", "yes");
        newTransformerHandler.setResult(new StreamResult(outputStream));
        return newTransformerHandler;
    }
}
