package org.wikibrain.phrases;

import gnu.trove.list.TIntList;
import gnu.trove.list.array.TIntArrayList;
import gnu.trove.set.TIntSet;
import gnu.trove.set.hash.TIntHashSet;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.Iterator;
import java.util.List;
import org.apache.commons.lang3.StringUtils;
import org.wikibrain.core.dao.DaoException;
import org.wikibrain.core.lang.Language;
import org.wikibrain.core.nlp.StringTokenizer;
import org.wikibrain.core.nlp.Token;
import org.wikibrain.utils.WpCollectionUtils;

/* loaded from: input_file:org/wikibrain/phrases/PhraseTokenizer.class */
public class PhraseTokenizer {
    private final LinkProbabilityDao dao;
    private final double minLinkProbabilityForPhrases = 1.0E-5d;

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:org/wikibrain/phrases/PhraseTokenizer$Mention.class */
    public static class Mention {
        TIntList tokens = new TIntArrayList();
        Double probability;

        Mention(int i, int i2, double d) {
            for (int i3 = i; i3 <= i2; i3++) {
                this.tokens.add(i3);
            }
            this.probability = Double.valueOf(d);
        }

        boolean intersects(TIntSet tIntSet) {
            for (int i : this.tokens.toArray()) {
                if (tIntSet.contains(i)) {
                    return true;
                }
            }
            return false;
        }
    }

    public PhraseTokenizer(LinkProbabilityDao linkProbabilityDao) {
        this.dao = linkProbabilityDao;
    }

    public List<String> makePhrases(Language language, String str) throws DaoException {
        ArrayList arrayList = new ArrayList();
        Iterator<Token> it = makePhraseTokens(language, str).iterator();
        while (it.hasNext()) {
            arrayList.add(it.next().getToken());
        }
        return arrayList;
    }

    public List<String> makePhrases(Language language, List<String> list) throws DaoException {
        String join = StringUtils.join(list, " ");
        int i = 0;
        ArrayList arrayList = new ArrayList();
        for (String str : list) {
            if (i > 0) {
                i++;
            }
            Token token = new Token(i, i + str.length(), join);
            if (!token.getToken().equals(str)) {
                throw new IllegalStateException();
            }
            arrayList.add(token);
            i = token.getEnd();
        }
        ArrayList arrayList2 = new ArrayList();
        Iterator<Token> it = makePhraseTokens(language, arrayList).iterator();
        while (it.hasNext()) {
            arrayList2.add(it.next().getToken());
        }
        return arrayList2;
    }

    public List<Token> makePhraseTokens(Language language, Token token) throws DaoException {
        return makePhraseTokens(language, new StringTokenizer().getWordTokens(language, token));
    }

    public List<Token> makePhraseTokens(Language language, String str) throws DaoException {
        return makePhraseTokens(language, new StringTokenizer().getWordTokens(language, str));
    }

    public List<Token> makePhraseTokens(Language language, List<Token> list) throws DaoException {
        if (list.isEmpty()) {
            return new ArrayList();
        }
        if (!WpCollectionUtils.isSorted(list)) {
            list = new ArrayList(list);
            Collections.sort(list);
        }
        ArrayList<Mention> arrayList = new ArrayList();
        for (int i = 0; i < list.size(); i++) {
            StringBuilder sb = new StringBuilder();
            for (int i2 = i; i2 < list.size(); i2++) {
                if (sb.length() > 0) {
                    sb.append(' ');
                }
                sb.append(list.get(i2).getToken());
                double linkProbability = this.dao.getLinkProbability(language, sb.toString());
                if (linkProbability > 1.0E-5d) {
                    arrayList.add(new Mention(i, i2, linkProbability));
                }
                if (!this.dao.isSubgram(language, sb.toString(), true)) {
                    break;
                }
            }
        }
        Collections.sort(arrayList, new Comparator<Mention>() { // from class: org.wikibrain.phrases.PhraseTokenizer.1
            @Override // java.util.Comparator
            public int compare(Mention mention, Mention mention2) {
                return (-1) * mention.probability.compareTo(mention2.probability);
            }
        });
        ArrayList<Mention> arrayList2 = new ArrayList();
        TIntSet tIntHashSet = new TIntHashSet();
        for (Mention mention : arrayList) {
            if (!mention.intersects(tIntHashSet)) {
                tIntHashSet.addAll(mention.tokens);
                arrayList2.add(mention);
            }
        }
        for (int i3 = 0; i3 < list.size(); i3++) {
            if (!tIntHashSet.contains(i3)) {
                arrayList2.add(new Mention(i3, i3, 0.1d));
            }
        }
        Collections.sort(arrayList2, new Comparator<Mention>() { // from class: org.wikibrain.phrases.PhraseTokenizer.2
            @Override // java.util.Comparator
            public int compare(Mention mention2, Mention mention3) {
                return mention2.tokens.min() - mention3.tokens.min();
            }
        });
        ArrayList arrayList3 = new ArrayList();
        for (Mention mention2 : arrayList2) {
            int min = mention2.tokens.min();
            arrayList3.add(new Token(list.get(min).getBegin(), list.get(mention2.tokens.max()).getEnd(), list.get(min).getFullText()));
        }
        return arrayList3;
    }
}
