/*
 * Decompiled with CFR 0.152.
 */
package org.carrot2.text.preprocessing;

import com.carrotsearch.hppc.BitSet;
import com.carrotsearch.hppc.IntArrayList;
import java.util.ArrayList;
import org.carrot2.core.attribute.Processing;
import org.carrot2.shaded.guava.common.collect.Lists;
import org.carrot2.text.analysis.TokenTypeUtils;
import org.carrot2.text.preprocessing.LabelFilterProcessor;
import org.carrot2.text.preprocessing.PreprocessingContext;
import org.carrot2.util.attribute.Attribute;
import org.carrot2.util.attribute.AttributeLevel;
import org.carrot2.util.attribute.Bindable;
import org.carrot2.util.attribute.Group;
import org.carrot2.util.attribute.Input;
import org.carrot2.util.attribute.Label;
import org.carrot2.util.attribute.Level;
import org.carrot2.util.attribute.constraint.IntRange;

@Bindable(prefix="DocumentAssigner")
public class DocumentAssigner {
    @Input
    @Processing
    @Attribute
    @Label(value="Exact phrase assignment")
    @Level(value=AttributeLevel.MEDIUM)
    @Group(value="Preprocessing")
    public boolean exactPhraseAssignment = false;
    @Input
    @Processing
    @Attribute
    @IntRange(min=1, max=100)
    @Label(value="Minimum cluster size")
    @Level(value=AttributeLevel.MEDIUM)
    @Group(value="Preprocessing")
    public int minClusterSize = 2;

    public void assign(PreprocessingContext context) {
        int[] labelsFeatureIndex = context.allLabels.featureIndex;
        int[][] stemsTfByDocument = context.allStems.tfByDocument;
        int[] wordsStemIndex = context.allWords.stemIndex;
        short[] wordsTypes = context.allWords.type;
        int[][] phrasesTfByDocument = context.allPhrases.tfByDocument;
        int[][] phrasesWordIndices = context.allPhrases.wordIndices;
        int wordCount = wordsStemIndex.length;
        int documentCount = context.documents.size();
        BitSet[] labelsDocumentIndices = new BitSet[labelsFeatureIndex.length];
        for (int i = 0; i < labelsFeatureIndex.length; ++i) {
            BitSet documentIndices = new BitSet((long)documentCount);
            int featureIndex = labelsFeatureIndex[i];
            if (featureIndex < wordCount) {
                DocumentAssigner.addTfByDocumentToBitSet(documentIndices, stemsTfByDocument[wordsStemIndex[featureIndex]]);
            } else {
                int phraseIndex = featureIndex - wordCount;
                if (this.exactPhraseAssignment) {
                    DocumentAssigner.addTfByDocumentToBitSet(documentIndices, phrasesTfByDocument[phraseIndex]);
                } else {
                    int[] wordIndices = phrasesWordIndices[phraseIndex];
                    boolean firstAdded = false;
                    for (int j = 0; j < wordIndices.length; ++j) {
                        int wordIndex = wordIndices[j];
                        if (TokenTypeUtils.isCommon(wordsTypes[wordIndex])) continue;
                        if (!firstAdded) {
                            DocumentAssigner.addTfByDocumentToBitSet(documentIndices, stemsTfByDocument[wordsStemIndex[wordIndex]]);
                            firstAdded = true;
                            continue;
                        }
                        BitSet temp = new BitSet((long)documentCount);
                        DocumentAssigner.addTfByDocumentToBitSet(temp, stemsTfByDocument[wordsStemIndex[wordIndex]]);
                        documentIndices.and(temp);
                    }
                }
            }
            labelsDocumentIndices[i] = documentIndices;
        }
        if (this.minClusterSize > 1) {
            IntArrayList newFeatureIndex = new IntArrayList(labelsFeatureIndex.length);
            ArrayList newDocumentIndices = Lists.newArrayListWithExpectedSize((int)labelsFeatureIndex.length);
            for (int i = 0; i < labelsFeatureIndex.length; ++i) {
                if (labelsDocumentIndices[i].cardinality() < (long)this.minClusterSize) continue;
                newFeatureIndex.add(labelsFeatureIndex[i]);
                newDocumentIndices.add(labelsDocumentIndices[i]);
            }
            context.allLabels.documentIndices = newDocumentIndices.toArray(new BitSet[newDocumentIndices.size()]);
            context.allLabels.featureIndex = newFeatureIndex.toArray();
            LabelFilterProcessor.updateFirstPhraseIndex(context);
        } else {
            context.allLabels.documentIndices = labelsDocumentIndices;
        }
    }

    private static void addTfByDocumentToBitSet(BitSet documentIndices, int[] tfByDocument) {
        for (int j = 0; j < tfByDocument.length / 2; ++j) {
            documentIndices.set((long)tfByDocument[j * 2]);
        }
    }
}

