/*
 * Decompiled with CFR 0.152.
 */
package de.lwsystems.tikaparser;

import de.lwsystems.tikaparser.ImageUtils;
import java.awt.Graphics2D;
import java.awt.Image;
import java.awt.image.BufferedImage;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.net.URI;
import java.nio.charset.StandardCharsets;
import java.nio.file.FileSystems;
import java.nio.file.Path;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import javax.imageio.ImageIO;
import javax.xml.parsers.SAXParser;
import net.sourceforge.tess4j.ITesseract;
import net.sourceforge.tess4j.Tesseract;
import org.apache.commons.io.IOUtils;
import org.apache.commons.logging.LogFactory;
import org.apache.tika.exception.TikaException;
import org.apache.tika.io.TemporaryResources;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
import org.apache.tika.mime.MediaTypeRegistry;
import org.apache.tika.parser.AbstractParser;
import org.apache.tika.parser.CompositeParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.apache.tika.parser.image.ImageParser;
import org.apache.tika.parser.image.TiffParser;
import org.apache.tika.parser.jpeg.JpegParser;
import org.apache.tika.parser.ocr.TesseractOCRConfig;
import org.apache.tika.sax.EmbeddedContentHandler;
import org.apache.tika.sax.OfflineContentHandler;
import org.apache.tika.sax.XHTMLContentHandler;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;

public class Tess4jParser
extends AbstractParser {
    private static final double MINIMUM_DESKEW_THRESHOLD = 0.05;
    private static final long serialVersionUID = -8167538283213097265L;
    private static final TesseractOCRConfig DEFAULT_CONFIG = new TesseractOCRConfig();
    private static final Set<MediaType> SUPPORTED_TYPES = Collections.unmodifiableSet(new HashSet<MediaType>(Arrays.asList(MediaType.image((String)"png"), MediaType.image((String)"jpeg"), MediaType.image((String)"tiff"), MediaType.image((String)"x-ms-bmp"), MediaType.image((String)"gif"))));
    private Map<String, Boolean> TESSERACT_PRESENT = new HashMap<String, Boolean>();
    private Map<String, ITesseract> instances = new HashMap<String, ITesseract>();
    private static Parser _TMP_IMAGE_METADATA_PARSER = new CompositeImageParser();

    public Set<MediaType> getSupportedTypes(ParseContext context) {
        TesseractOCRConfig config = (TesseractOCRConfig)context.get(TesseractOCRConfig.class, (Object)DEFAULT_CONFIG);
        if (this.hasTesseract(config)) {
            return SUPPORTED_TYPES;
        }
        return Collections.emptySet();
    }

    private void setEnv(TesseractOCRConfig config, ProcessBuilder pb) {
        String tessdataPrefix = "TESSDATA_PREFIX";
        Map<String, String> env = pb.environment();
        if (!config.getTessdataPath().isEmpty()) {
            env.put(tessdataPrefix, config.getTessdataPath());
        } else if (!config.getTesseractPath().isEmpty()) {
            env.put(tessdataPrefix, config.getTesseractPath());
        }
    }

    public boolean hasTesseract(TesseractOCRConfig config) {
        String tessdataPath = config.getTessdataPath();
        String language = config.getLanguage();
        String instanceKey = tessdataPath + language;
        if (this.TESSERACT_PRESENT.containsKey(instanceKey)) {
            return this.TESSERACT_PRESENT.get(instanceKey);
        }
        try {
            Tesseract instance = new Tesseract();
            instance.setLanguage(language);
            instance.setDatapath(tessdataPath);
            Path trainedDataFilePath = FileSystems.getDefault().getPath(tessdataPath, language + ".traineddata");
            if (!trainedDataFilePath.toFile().exists()) {
                return false;
            }
            instance.setPageSegMode(Integer.parseInt(config.getPageSegMode()));
            this.instances.put(instanceKey, (ITesseract)instance);
        }
        catch (Throwable e) {
            return false;
        }
        this.TESSERACT_PRESENT.put(instanceKey, true);
        return true;
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException {
        TesseractOCRConfig config = (TesseractOCRConfig)context.get(TesseractOCRConfig.class, (Object)DEFAULT_CONFIG);
        if (!this.hasTesseract(config)) {
            return;
        }
        TemporaryResources tmp = new TemporaryResources();
        try {
            TikaInputStream tikaStream = TikaInputStream.get((InputStream)stream, (TemporaryResources)tmp);
            XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
            xhtml.startDocument();
            File tmpImgFile = tmp.createTemporaryFile();
            File file = tmp.createTemporaryFile();
            IOUtils.copy((InputStream)tikaStream, (OutputStream)new FileOutputStream(file));
            BufferedImage bi = ImageIO.read((InputStream)TikaInputStream.get((URI)file.toURI()));
            this.parse(bi, context, xhtml, config);
            _TMP_IMAGE_METADATA_PARSER.parse((InputStream)TikaInputStream.get((URI)file.toURI()), (ContentHandler)new EmbeddedContentHandler((ContentHandler)xhtml), metadata, context);
            xhtml.endDocument();
        }
        finally {
            tmp.dispose();
        }
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    public void parse(Image image, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException {
        FileOutputStream fos = null;
        Object tis = null;
        TesseractOCRConfig config = (TesseractOCRConfig)context.get(TesseractOCRConfig.class, (Object)DEFAULT_CONFIG);
        if (!this.hasTesseract(config)) {
            return;
        }
        ITesseract instance = this.instances.get(config.getTessdataPath());
        TemporaryResources tmp = new TemporaryResources();
        try {
            XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
            xhtml.startDocument();
            this.parse(Tess4jParser.toBufferedImage(image), context, xhtml, config);
            xhtml.endDocument();
        }
        finally {
            tmp.dispose();
            if (tis != null) {
                tis.close();
            }
            if (fos != null) {
                fos.close();
            }
        }
    }

    public void parseInline(InputStream stream, XHTMLContentHandler xhtml, TesseractOCRConfig config) throws IOException, SAXException, TikaException {
        this.parseInline(stream, xhtml, new ParseContext(), config);
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    public void parseInline(InputStream stream, XHTMLContentHandler xhtml, ParseContext parseContext, TesseractOCRConfig config) throws IOException, SAXException, TikaException {
        if (!this.hasTesseract(config)) {
            return;
        }
        TemporaryResources tmp = new TemporaryResources();
        try {
            TikaInputStream tikaInputStream = TikaInputStream.get((InputStream)stream, (TemporaryResources)tmp);
            File input = tikaInputStream.getFile();
            long size = tikaInputStream.getLength();
            if (size < (long)config.getMinFileSizeToOcr() && size > (long)config.getMaxFileSizeToOcr()) {
                return;
            }
            BufferedImage bi = ImageIO.read(stream);
            this.parse(bi, parseContext, xhtml, config);
        }
        finally {
            tmp.dispose();
        }
    }

    private BufferedImage processImage(BufferedImage image, TesseractOCRConfig config) throws IOException, TikaException {
        return ImageUtils.deskew(image, 0.05);
    }

    private void parse(BufferedImage image, ParseContext parseContext, XHTMLContentHandler xhtml, TesseractOCRConfig config) throws IOException, SAXException, TikaException {
        String ocrResult = null;
        image = this.processImage(image, config);
        try {
            ocrResult = this.instances.get(config.getTessdataPath() + config.getLanguage()).doOCR(image);
        }
        catch (Throwable e) {
            e.printStackTrace();
        }
        this.extractOutput(ocrResult, xhtml);
    }

    private void extractOutput(String stream, XHTMLContentHandler xhtml) throws SAXException, IOException {
        xhtml.startElement("div", "class", "ocr");
        xhtml.characters(stream);
        xhtml.endElement("div");
    }

    private void extractHOCROutput(String ocrResult, ParseContext parseContext, XHTMLContentHandler xhtml) throws TikaException, IOException, SAXException {
        if (parseContext == null) {
            parseContext = new ParseContext();
        }
        SAXParser parser = parseContext.getSAXParser();
        xhtml.startElement("div", "class", "ocr");
        parser.parse(IOUtils.toInputStream((String)ocrResult, (String)"UTF-8"), (DefaultHandler)new OfflineContentHandler((ContentHandler)new HOCRPassThroughHandler((ContentHandler)xhtml)));
        xhtml.endElement("div");
    }

    private void logStream(String logType, final InputStream stream, File file) {
        new Thread(){

            /*
             * WARNING - Removed try catching itself - possible behaviour change.
             */
            @Override
            public void run() {
                InputStreamReader reader = new InputStreamReader(stream, StandardCharsets.UTF_8);
                StringBuilder out = new StringBuilder();
                char[] buffer = new char[1024];
                try {
                    int n = reader.read(buffer);
                    while (n != -1) {
                        out.append(buffer, 0, n);
                        n = reader.read(buffer);
                    }
                }
                catch (IOException n) {
                }
                finally {
                    IOUtils.closeQuietly((InputStream)stream);
                }
                String msg = out.toString();
                LogFactory.getLog(Tess4jParser.class).debug((Object)msg);
            }
        }.start();
    }

    private static BufferedImage toBufferedImage(Image img) {
        if (img instanceof BufferedImage) {
            return (BufferedImage)img;
        }
        if (img instanceof BufferedImage) {
            return (BufferedImage)img;
        }
        BufferedImage bimage = new BufferedImage(img.getWidth(null), img.getHeight(null), 2);
        Graphics2D bGr = bimage.createGraphics();
        bGr.drawImage(img, 0, 0, null);
        bGr.dispose();
        return bimage;
    }

    private static class HOCRPassThroughHandler
    extends DefaultHandler {
        private final ContentHandler xhtml;
        public static final Set<String> IGNORE = HOCRPassThroughHandler.unmodifiableSet("html", "head", "title", "meta", "body");

        public HOCRPassThroughHandler(ContentHandler xhtml) {
            this.xhtml = xhtml;
        }

        @Override
        public void startElement(String uri, String local, String name, Attributes attributes) throws SAXException {
            if (!IGNORE.contains(name)) {
                this.xhtml.startElement(uri, local, name, attributes);
            }
        }

        @Override
        public void endElement(String uri, String local, String name) throws SAXException {
            if (!IGNORE.contains(name)) {
                this.xhtml.endElement(uri, local, name);
            }
        }

        @Override
        public void characters(char[] ch, int start, int length) throws SAXException {
            this.xhtml.characters(ch, start, length);
        }

        private static Set<String> unmodifiableSet(String ... elements) {
            return Collections.unmodifiableSet(new HashSet<String>(Arrays.asList(elements)));
        }
    }

    private static class CompositeImageParser
    extends CompositeParser {
        private static final long serialVersionUID = -2398203346206381382L;
        private static List<Parser> imageParsers = Arrays.asList(new ImageParser(), new JpegParser(), new TiffParser());

        CompositeImageParser() {
            super(new MediaTypeRegistry(), imageParsers);
        }
    }
}

