/*
 * Decompiled with CFR 0.152.
 */
package org.cleartk.corpus.ace2005;

import java.io.File;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.cas.ArrayFS;
import org.apache.uima.cas.CASException;
import org.apache.uima.fit.component.JCasAnnotator_ImplBase;
import org.apache.uima.fit.descriptor.SofaCapability;
import org.apache.uima.fit.util.FSCollectionFactory;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.cas.FSArray;
import org.apache.uima.jcas.tcas.Annotation;
import org.apache.uima.resource.ResourceInitializationException;
import org.cleartk.ne.type.Chunk;
import org.cleartk.ne.type.NamedEntity;
import org.cleartk.ne.type.NamedEntityMention;
import org.jdom2.Document;
import org.jdom2.Element;
import org.jdom2.JDOMException;
import org.jdom2.input.SAXBuilder;

@SofaCapability(inputSofas={"ACE_2005_APF_URI_VIEW", "_InitialView"}, outputSofas={})
public class Ace2005GoldAnnotator
extends JCasAnnotator_ImplBase {
    Pattern ampPattern;

    public void initialize(UimaContext aContext) throws ResourceInitializationException {
        super.initialize(aContext);
        this.ampPattern = Pattern.compile(Pattern.quote("&amp;"));
    }

    public void process(JCas jCas) throws AnalysisEngineProcessException {
        try {
            String apfUri = jCas.getView("ACE_2005_APF_URI_VIEW").getSofaDataURI();
            JCas initialView = jCas.getView("_InitialView");
            String documentText = initialView.getDocumentText();
            SAXBuilder builder = new SAXBuilder();
            builder.setDTDHandler(null);
            URI sofaDataURI = new URI(apfUri);
            Document doc = builder.build(new File(sofaDataURI));
            Element apfSource = doc.getRootElement();
            Element apfDocument = apfSource.getChild("document");
            for (Element apfEntity : apfDocument.getChildren("entity")) {
                NamedEntity namedEntity = new NamedEntity(initialView);
                namedEntity.setEntityType(apfEntity.getAttributeValue("TYPE"));
                namedEntity.setEntitySubtype(apfEntity.getAttributeValue("SUBTYPE"));
                namedEntity.setEntityClass(apfEntity.getAttributeValue("CLASS"));
                namedEntity.setEntityId(apfEntity.getAttributeValue("ID"));
                namedEntity.addToIndexes();
                ArrayList<NamedEntityMention> mentions = new ArrayList<NamedEntityMention>();
                for (Element entityMention : apfEntity.getChildren("entity_mention")) {
                    int start = Integer.parseInt(entityMention.getChild("extent").getChild("charseq").getAttributeValue("START"));
                    int end = Integer.parseInt(entityMention.getChild("extent").getChild("charseq").getAttributeValue("END"));
                    String givenText = entityMention.getChild("extent").getChild("charseq").getText();
                    String parsedText = documentText.substring(start, end + 1);
                    Matcher ampMatcher = this.ampPattern.matcher(parsedText);
                    parsedText = ampMatcher.replaceAll("&");
                    NamedEntityMention mention = new NamedEntityMention(initialView, start, end + 1);
                    mention.setMentionId(entityMention.getAttributeValue("ID"));
                    mention.setMentionType(entityMention.getAttributeValue("TYPE"));
                    mention.setMentionedEntity(namedEntity);
                    Chunk chunk = new Chunk(initialView, start, end + 1);
                    mention.setAnnotation((Annotation)chunk);
                    int headStart = Integer.parseInt(entityMention.getChild("head").getChild("charseq").getAttributeValue("START"));
                    int headEnd = Integer.parseInt(entityMention.getChild("head").getChild("charseq").getAttributeValue("END"));
                    Chunk head = new Chunk(initialView, headStart, headEnd + 1);
                    mention.setHead((Annotation)head);
                    mention.addToIndexes();
                    mentions.add(mention);
                    givenText = givenText.replaceAll("\\s+", " ");
                    parsedText = givenText.replaceAll("\\s+", " ");
                }
                namedEntity.setMentions(new FSArray(jCas, mentions.size()));
                FSCollectionFactory.fillArrayFS((ArrayFS)namedEntity.getMentions(), mentions);
            }
        }
        catch (CASException ce) {
            throw new AnalysisEngineProcessException((Throwable)ce);
        }
        catch (IOException ioe) {
            throw new AnalysisEngineProcessException((Throwable)ioe);
        }
        catch (JDOMException je) {
            throw new AnalysisEngineProcessException((Throwable)je);
        }
        catch (URISyntaxException use) {
            throw new AnalysisEngineProcessException((Throwable)use);
        }
    }
}

