/*
 * Decompiled with CFR 0.152.
 */
package won.matcher.utils.preprocessing;

import java.io.IOException;
import java.io.InputStream;
import java.util.Arrays;
import java.util.LinkedList;
import java.util.regex.Pattern;
import opennlp.tools.postag.POSModel;
import opennlp.tools.postag.POSTaggerME;
import opennlp.tools.tokenize.SimpleTokenizer;
import opennlp.tools.tokenize.Tokenizer;

public class OpenNlpTokenExtraction {
    Tokenizer tokenizer = SimpleTokenizer.INSTANCE;
    POSTaggerME posTagger = null;

    public OpenNlpTokenExtraction() throws IOException {
        InputStream modelIn = this.getClass().getClassLoader().getResourceAsStream("en-pos-maxent.bin");
        POSModel model = new POSModel(modelIn);
        this.posTagger = new POSTaggerME(model);
    }

    public String[] extractWordTokens(String text) {
        text = text.toLowerCase();
        String[] tokens = this.tokenizer.tokenize(text);
        Pattern filter = Pattern.compile(".{1}+|\\W.*|\\d.*");
        return this.filterTokens(Arrays.asList(tokens), filter);
    }

    public String[] extractRelevantWordTokens(String text) {
        text = text.toLowerCase();
        LinkedList<String> extracted = new LinkedList<String>();
        String[] tokens = this.tokenizer.tokenize(text);
        String[] tags = this.posTagger.tag(tokens);
        for (int i = 0; i < tags.length; ++i) {
            if (!tags[i].startsWith("N") && !tags[i].startsWith("J") && !tags[i].equals("FW")) continue;
            extracted.add(tokens[i]);
        }
        Pattern filter = Pattern.compile(".{1}+|\\W.*|\\d.*");
        return this.filterTokens(extracted, filter);
    }

    private String[] filterTokens(Iterable<String> tokens, Pattern pattern) {
        LinkedList<String> extracted = new LinkedList<String>();
        for (String token : tokens) {
            if (pattern.matcher(token).matches()) continue;
            extracted.add(token);
        }
        return extracted.toArray(new String[extracted.size()]);
    }
}

