package ai.idylnlp.nlp.recognizer.deep;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import opennlp.tools.namefind.NameSample;
import opennlp.tools.util.FilterObjectStream;
import opennlp.tools.util.ObjectStream;
import org.deeplearning4j.models.embeddings.wordvectors.WordVectors;
import org.deeplearning4j.text.tokenization.tokenizer.preprocessor.CommonPreprocessor;
import org.deeplearning4j.text.tokenization.tokenizerfactory.DefaultTokenizerFactory;
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.dataset.DataSet;

/* loaded from: input_file:ai/idylnlp/nlp/recognizer/deep/NameSampleToDataSetStream.class */
public class NameSampleToDataSetStream extends FilterObjectStream<NameSample, DataSet> {
    private final WordVectors wordVectors;
    private final String[] labels;
    private int windowSize;
    private int vectorSize;
    private Iterator<DataSet> dataSets;

    public NameSampleToDataSetStream(ObjectStream<NameSample> objectStream, WordVectors wordVectors, int i, int i2, String[] strArr) {
        super(objectStream);
        this.dataSets = Collections.emptyListIterator();
        this.wordVectors = wordVectors;
        this.windowSize = i;
        this.vectorSize = i2;
        this.labels = strArr;
    }

    /* renamed from: read, reason: merged with bridge method [inline-methods] */
    public final DataSet m2read() throws IOException {
        NameSample nameSample;
        if (this.dataSets.hasNext()) {
            return this.dataSets.next();
        }
        while (!this.dataSets.hasNext() && (nameSample = (NameSample) this.samples.read()) != null) {
            this.dataSets = createDataSets(nameSample);
        }
        if (this.dataSets.hasNext()) {
            return m2read();
        }
        return null;
    }

    private Iterator<DataSet> createDataSets(NameSample nameSample) {
        DefaultTokenizerFactory defaultTokenizerFactory = new DefaultTokenizerFactory();
        defaultTokenizerFactory.setTokenPreProcessor(new CommonPreprocessor());
        List tokens = defaultTokenizerFactory.create(String.join(" ", nameSample.getSentence())).getTokens();
        List<INDArray> mapToFeatureMatrices = DeepLearningUtils.mapToFeatureMatrices(this.wordVectors, (String[]) tokens.toArray(new String[tokens.size()]), this.windowSize);
        List<INDArray> mapToLabelVectors = DeepLearningUtils.mapToLabelVectors(nameSample, this.windowSize, this.labels);
        ArrayList arrayList = new ArrayList();
        for (int i = 0; i < mapToFeatureMatrices.size(); i++) {
            arrayList.add(new DataSet(mapToFeatureMatrices.get(i), mapToLabelVectors.get(i)));
        }
        return arrayList.iterator();
    }
}
