/*
 * Decompiled with CFR 0.152.
 */
package com.wcohen.ss.expt;

import com.wcohen.ss.api.Token;
import com.wcohen.ss.api.Tokenizer;
import com.wcohen.ss.expt.Blocker;
import com.wcohen.ss.expt.MatchData;
import com.wcohen.ss.tokens.CharacterTokenizer;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Set;
import java.util.TreeMap;
import java.util.TreeSet;

public class AbbreviationsBlocker
extends Blocker {
    private static double defaultMaxFraction = 1.0;
    private static final Set<Integer> STOPWORD_TOKEN_MARKER;
    private ArrayList<Blocker.Pair> pairList;
    private double maxFraction;
    private int numCorrectPairs;
    protected Tokenizer tokenizer;

    public AbbreviationsBlocker(Tokenizer tokenizer, double maxFraction) {
        this.maxFraction = maxFraction;
        this.clusterMode = false;
        this.tokenizer = tokenizer;
    }

    public AbbreviationsBlocker() {
        this(CharacterTokenizer.DEFAULT_TOKENIZER, defaultMaxFraction);
    }

    public double getMaxFraction() {
        return this.maxFraction;
    }

    public void setMaxFraction(double maxFraction) {
        this.maxFraction = maxFraction;
    }

    public void block(MatchData data) {
        this.numCorrectPairs = this.countCorrectPairs(data);
        this.pairList = new ArrayList();
        if (this.clusterMode) {
            throw new IllegalArgumentException("clusterMode=true is not valid for this blocker");
        }
        String sfSource = data.getSource(0);
        String lfSource = data.getSource(1);
        if (!sfSource.equals("short")) {
            String tmp = sfSource;
            sfSource = lfSource;
            lfSource = tmp;
        }
        double maxSetSize = (double)data.numInstances(sfSource) * this.maxFraction;
        TreeMap<Token, Set<Integer>> index = new TreeMap<Token, Set<Integer>>();
        for (int i = 0; i < data.numInstances(sfSource); ++i) {
            Token[] tokens = this.tokenizer.tokenize(data.getInstance(sfSource, i).unwrap());
            for (int j = 0; j < tokens.length; ++j) {
                TreeSet<Integer> containers = (TreeSet<Integer>)index.get(tokens[j]);
                if (containers != STOPWORD_TOKEN_MARKER && containers == null) {
                    containers = new TreeSet<Integer>();
                    index.put(tokens[j], containers);
                }
                containers.add(new Integer(i));
                if (!((double)containers.size() > maxSetSize)) continue;
                index.put(tokens[j], STOPWORD_TOKEN_MARKER);
            }
        }
        TreeSet<Integer> pairedUpInstances = new TreeSet<Integer>();
        for (int i = 0; i < data.numInstances(lfSource); ++i) {
            MatchData.Instance lfInst = data.getInstance(lfSource, i);
            pairedUpInstances.clear();
            Token[] tokens = this.tokenizer.tokenize(lfInst.unwrap());
            for (int j = 0; j < tokens.length; ++j) {
                Set containers = (Set)index.get(tokens[j]);
                if (containers == null || containers == STOPWORD_TOKEN_MARKER) continue;
                for (Integer smallIndexInteger : containers) {
                    int smallIndex = smallIndexInteger;
                    if (pairedUpInstances.contains(smallIndexInteger) || sfSource == lfSource && smallIndex <= i) continue;
                    MatchData.Instance sfInst = data.getInstance(sfSource, smallIndex);
                    this.pairList.add(new Blocker.Pair(sfInst, lfInst, sfInst.sameId(lfInst)));
                    pairedUpInstances.add(smallIndexInteger);
                }
            }
        }
    }

    public int size() {
        return this.pairList.size();
    }

    public Blocker.Pair getPair(int i) {
        return this.pairList.get(i);
    }

    public String toString() {
        return "[AbbreviationsBlocker:maxFraction=" + this.maxFraction + "]";
    }

    public int numCorrectPairs() {
        return this.numCorrectPairs;
    }

    static {
        try {
            String s = System.getProperty("blockerMaxFraction");
            if (s != null) {
                defaultMaxFraction = Double.parseDouble(s);
            }
        }
        catch (NumberFormatException numberFormatException) {
            // empty catch block
        }
        STOPWORD_TOKEN_MARKER = new HashSet<Integer>();
    }
}

