package org.wikibrain.phrases;

import com.typesafe.config.Config;
import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.net.URL;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.math.Fraction;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.wikibrain.conf.Configuration;
import org.wikibrain.conf.ConfigurationException;
import org.wikibrain.conf.Configurator;
import org.wikibrain.core.dao.DaoException;
import org.wikibrain.core.dao.LocalPageDao;
import org.wikibrain.core.lang.Language;
import org.wikibrain.core.lang.LanguageSet;
import org.wikibrain.download.FileDownloader;
import org.wikibrain.phrases.BasePhraseAnalyzer;
import org.wikibrain.phrases.PrunedCounts;
import org.wikibrain.utils.WpIOUtils;

/* loaded from: input_file:org/wikibrain/phrases/StanfordPhraseAnalyzer.class */
public class StanfordPhraseAnalyzer extends BasePhraseAnalyzer {
    private final File path;
    private LanguageSet languages;
    private static final Logger LOG = LoggerFactory.getLogger(StanfordPhraseAnalyzer.class);
    private static final Language LANG_EN = Language.getByLangCode("en");
    private static final Language LANG_SIMPLE = Language.getByLangCode("simple");
    private static final Pattern MATCH_ENTRY = Pattern.compile("([^\t]*)\t([0-9.e-]+) ([^ ]*)(| (.*))$");

    /* loaded from: input_file:org/wikibrain/phrases/StanfordPhraseAnalyzer$Iter.class */
    protected class Iter implements Iterator<BasePhraseAnalyzer.Entry> {
        BufferedReader reader;
        List<BasePhraseAnalyzer.Entry> buffer = new ArrayList();
        boolean eof = false;

        public Iter() throws IOException {
            this.reader = WpIOUtils.openBufferedReader(StanfordPhraseAnalyzer.this.path);
        }

        @Override // java.util.Iterator
        public boolean hasNext() {
            fillBuffer();
            return !this.buffer.isEmpty();
        }

        /* JADX WARN: Can't rename method to resolve collision */
        @Override // java.util.Iterator
        public BasePhraseAnalyzer.Entry next() {
            fillBuffer();
            if (this.buffer.isEmpty()) {
                return null;
            }
            return this.buffer.remove(0);
        }

        @Override // java.util.Iterator
        public void remove() {
            throw new UnsupportedOperationException();
        }

        private void fillBuffer() {
            if (!this.buffer.isEmpty() || this.eof) {
                return;
            }
            while (!this.eof && this.buffer.isEmpty()) {
                try {
                    parseNextLine();
                } catch (IOException e) {
                    throw new RuntimeException(e);
                } catch (Exception e2) {
                    StanfordPhraseAnalyzer.LOG.debug("Error parsing line:", e2);
                }
            }
        }

        private void parseNextLine() throws IOException {
            if (!this.buffer.isEmpty()) {
                throw new IllegalStateException();
            }
            String readLine = this.reader.readLine();
            if (readLine == null) {
                IOUtils.closeQuietly(this.reader);
                this.eof = true;
                return;
            }
            Record record = new Record(readLine);
            for (Language language : Arrays.asList(StanfordPhraseAnalyzer.LANG_EN, StanfordPhraseAnalyzer.LANG_SIMPLE)) {
                if (StanfordPhraseAnalyzer.this.languages.containsLanguage(language)) {
                    this.buffer.add(new BasePhraseAnalyzer.Entry(language, record.article, record.phrase, record.getNumEnglishLinks()));
                }
            }
        }
    }

    /* loaded from: input_file:org/wikibrain/phrases/StanfordPhraseAnalyzer$Provider.class */
    public static class Provider extends org.wikibrain.conf.Provider<PhraseAnalyzer> {
        public Provider(Configurator configurator, Configuration configuration) throws ConfigurationException {
            super(configurator, configuration);
        }

        public Class getType() {
            return PhraseAnalyzer.class;
        }

        public String getPath() {
            return "phrases.analyzer";
        }

        public PhraseAnalyzer get(String str, Config config, Map<String, String> map) throws ConfigurationException {
            if (!config.getString("type").equals("stanford")) {
                return null;
            }
            return new StanfordPhraseAnalyzer((PhraseAnalyzerDao) getConfigurator().construct(PhraseAnalyzerDao.class, str, config.getConfig("dao"), new HashMap()), (LocalPageDao) getConfigurator().get(LocalPageDao.class, config.getString("localPageDao")), (PrunedCounts.Pruner) getConfigurator().construct(PrunedCounts.Pruner.class, (String) null, config.getConfig("phrasePruner"), (Map) null), (PrunedCounts.Pruner) getConfigurator().construct(PrunedCounts.Pruner.class, (String) null, config.getConfig("pagePruner"), (Map) null), new File(config.getString("path")));
        }

        /* renamed from: get, reason: collision with other method in class */
        public /* bridge */ /* synthetic */ Object m15get(String str, Config config, Map map) throws ConfigurationException {
            return get(str, config, (Map<String, String>) map);
        }
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    /* loaded from: input_file:org/wikibrain/phrases/StanfordPhraseAnalyzer$Record.class */
    public class Record {
        String phrase;
        float fraction;
        String article;
        String[] flags;

        Record(String str) {
            Matcher matcher = StanfordPhraseAnalyzer.MATCH_ENTRY.matcher(str);
            if (!matcher.matches()) {
                throw new IllegalArgumentException("invalid concepts entry: '" + str + "'");
            }
            this.phrase = matcher.group(1);
            this.fraction = Float.valueOf(matcher.group(2)).floatValue();
            this.article = matcher.group(3);
            this.flags = matcher.group(4).trim().split(" ");
        }

        int getNumEnglishLinks() {
            for (String str : this.flags) {
                if (str.startsWith("W:")) {
                    return Fraction.getFraction(str.substring(2)).getNumerator();
                }
            }
            return 0;
        }
    }

    public StanfordPhraseAnalyzer(PhraseAnalyzerDao phraseAnalyzerDao, LocalPageDao localPageDao, PrunedCounts.Pruner<String> pruner, PrunedCounts.Pruner<Integer> pruner2, File file) {
        super(phraseAnalyzerDao, localPageDao, pruner, pruner2);
        this.path = file;
    }

    @Override // org.wikibrain.phrases.BasePhraseAnalyzer
    protected Iterable<BasePhraseAnalyzer.Entry> getCorpus(LanguageSet languageSet) throws IOException, DaoException {
        Iterator it = languageSet.iterator();
        while (it.hasNext()) {
            Language language = (Language) it.next();
            if (language != LANG_EN && language != LANG_SIMPLE) {
                LOG.warn("Stanford only supports English and Simple English (not " + language + ")");
            }
        }
        this.languages = languageSet;
        return new Iterable<BasePhraseAnalyzer.Entry>() { // from class: org.wikibrain.phrases.StanfordPhraseAnalyzer.1
            @Override // java.lang.Iterable
            public Iterator<BasePhraseAnalyzer.Entry> iterator() {
                try {
                    return new Iter();
                } catch (IOException e) {
                    throw new RuntimeException(e);
                }
            }
        };
    }

    public static void downloadDictionaryIfNecessary(Configuration configuration) throws IOException, InterruptedException {
        String string = configuration.get().getString("phrases.analyzer.stanford.path");
        String string2 = configuration.get().getString("phrases.analyzer.stanford.url");
        File file = new File(string);
        File file2 = new File(string + ".completed");
        if (file2.isFile()) {
            return;
        }
        LOG.info("downloading stanford dictionary...");
        new FileDownloader().download(new URL(string2), file);
        FileUtils.touch(file2);
    }
}
