/*
 * Decompiled with CFR 0.152.
 */
package cc.mallet.extract;

import cc.mallet.extract.LabeledSpan;
import cc.mallet.extract.LabeledSpans;
import cc.mallet.extract.Span;
import cc.mallet.extract.StringSpan;
import cc.mallet.extract.Tokenization;
import cc.mallet.extract.TokenizationFilter;
import cc.mallet.types.Label;
import cc.mallet.types.LabelAlphabet;
import cc.mallet.types.Sequence;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.io.Serializable;

public class BIOTokenizationFilter
implements TokenizationFilter,
Serializable {
    private static final long serialVersionUID = -8726127297313150023L;
    private static final int CURRENT_SERIAL_VERSION = 1;

    public LabeledSpans constructLabeledSpans(LabelAlphabet dict, Object document, Label backgroundTag, Tokenization input, Sequence seq) {
        LabeledSpans labeled = new LabeledSpans(document);
        this.addSpansFromTags(labeled, input, seq, dict, backgroundTag);
        return labeled;
    }

    private void addSpansFromTags(LabeledSpans labeled, Tokenization input, Sequence tags, LabelAlphabet dict, Label backgroundTag) {
        int i = 0;
        int docidx = 0;
        while (i < tags.size()) {
            Label nextTag;
            Label thisTag = dict.lookupLabel(tags.get(i).toString());
            int startTokenIdx = i;
            while (++i < tags.size() && !this.isBeginTag(nextTag = dict.lookupLabel(tags.get(i).toString())) && this.tagsMatch(thisTag, nextTag)) {
            }
            int endTokenIdx = i;
            Span span = this.createSpan(input, startTokenIdx, endTokenIdx);
            this.addBackgroundIfNecessary(labeled, (StringSpan)span, docidx, backgroundTag);
            docidx = ((StringSpan)span).getEndIdx();
            if (this.isBeginTag(thisTag) || this.isInsideTag(thisTag)) {
                thisTag = this.trimTag(dict, thisTag);
            }
            labeled.add(new LabeledSpan(span, thisTag, thisTag == backgroundTag));
        }
    }

    protected Span createSpan(Tokenization input, int startTokenIdx, int endTokenIdx) {
        return input.subspan(startTokenIdx, endTokenIdx);
    }

    private Label trimTag(LabelAlphabet dict, Label tag) {
        String name = (String)tag.getEntry();
        return dict.lookupLabel(name.substring(2));
    }

    private boolean tagsMatch(Label tag1, Label tag2) {
        String name1 = (String)tag1.getEntry();
        String name2 = (String)tag2.getEntry();
        if (this.isBeginTag(tag1) || this.isInsideTag(tag1)) {
            name1 = name1.substring(2);
        }
        if (this.isInsideTag(tag2)) {
            name2 = name2.substring(2);
        }
        return name1.equals(name2);
    }

    private boolean isBeginTag(Label lbl) {
        String name = (String)lbl.getEntry();
        return name.startsWith("B-");
    }

    private boolean isInsideTag(Label lbl) {
        String name = (String)lbl.getEntry();
        return name.startsWith("I-");
    }

    private void addBackgroundIfNecessary(LabeledSpans labeled, StringSpan span, int docidx, Label background) {
        int nextIdx = span.getStartIdx();
        if (docidx < nextIdx) {
            StringSpan newSpan = new StringSpan((CharSequence)span.getDocument(), docidx, nextIdx);
            labeled.add(new LabeledSpan(newSpan, background, true));
        }
    }

    private void writeObject(ObjectOutputStream out) throws IOException {
        out.defaultWriteObject();
        out.writeInt(1);
    }

    private void readObject(ObjectInputStream in) throws IOException, ClassNotFoundException {
        in.defaultReadObject();
        in.readInt();
    }
}

