package won.matcher.utils.preprocessing;

import java.io.IOException;
import java.util.Arrays;
import java.util.LinkedList;
import java.util.regex.Pattern;
import opennlp.tools.postag.POSModel;
import opennlp.tools.postag.POSTaggerME;
import opennlp.tools.tokenize.SimpleTokenizer;
import opennlp.tools.tokenize.Tokenizer;

/* loaded from: input_file:won/matcher/utils/preprocessing/OpenNlpTokenExtraction.class */
public class OpenNlpTokenExtraction {
    Tokenizer tokenizer = SimpleTokenizer.INSTANCE;
    POSTaggerME posTagger;

    public OpenNlpTokenExtraction() throws IOException {
        this.posTagger = null;
        this.posTagger = new POSTaggerME(new POSModel(getClass().getClassLoader().getResourceAsStream("en-pos-maxent.bin")));
    }

    public String[] extractWordTokens(String str) {
        String[] strArr = this.tokenizer.tokenize(str.toLowerCase());
        return filterTokens(Arrays.asList(strArr), Pattern.compile(".{1}+|\\W.*|\\d.*"));
    }

    public String[] extractRelevantWordTokens(String str) {
        String lowerCase = str.toLowerCase();
        LinkedList linkedList = new LinkedList();
        String[] strArr = this.tokenizer.tokenize(lowerCase);
        String[] tag = this.posTagger.tag(strArr);
        for (int i = 0; i < tag.length; i++) {
            if (tag[i].startsWith("N") || tag[i].startsWith("J") || tag[i].equals("FW")) {
                linkedList.add(strArr[i]);
            }
        }
        return filterTokens(linkedList, Pattern.compile(".{1}+|\\W.*|\\d.*"));
    }

    private String[] filterTokens(Iterable<String> iterable, Pattern pattern) {
        LinkedList linkedList = new LinkedList();
        for (String str : iterable) {
            if (!pattern.matcher(str).matches()) {
                linkedList.add(str);
            }
        }
        return (String[]) linkedList.toArray(new String[linkedList.size()]);
    }
}
