/*
 * Decompiled with CFR 0.152.
 */
package org.apache.ctakes.temporal.ae;

import java.io.File;
import java.io.FileFilter;
import java.io.IOException;
import java.net.URI;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.ctakes.typesystem.type.syntax.BaseToken;
import org.apache.ctakes.typesystem.type.syntax.ContractionToken;
import org.apache.ctakes.typesystem.type.syntax.NewlineToken;
import org.apache.ctakes.typesystem.type.syntax.NumToken;
import org.apache.ctakes.typesystem.type.syntax.PunctuationToken;
import org.apache.ctakes.typesystem.type.syntax.SymbolToken;
import org.apache.ctakes.typesystem.type.syntax.TerminalTreebankNode;
import org.apache.ctakes.typesystem.type.syntax.TreebankNode;
import org.apache.ctakes.typesystem.type.syntax.WordToken;
import org.apache.ctakes.typesystem.type.textspan.Sentence;
import org.apache.log4j.Logger;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_engine.AnalysisEngineDescription;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.cas.FeatureStructure;
import org.apache.uima.cas.StringArrayFS;
import org.apache.uima.fit.component.JCasAnnotator_ImplBase;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.fit.factory.AnalysisEngineFactory;
import org.apache.uima.fit.util.FSCollectionFactory;
import org.apache.uima.fit.util.JCasUtil;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.cas.FSArray;
import org.apache.uima.jcas.cas.StringArray;
import org.apache.uima.jcas.tcas.Annotation;
import org.apache.uima.resource.ResourceInitializationException;
import org.apache.uima.util.FileUtils;
import org.apache.uima.util.Level;
import org.cleartk.util.ViewUriUtil;
import org.cleartk.util.treebank.TopTreebankNode;
import org.cleartk.util.treebank.TreebankFormatParser;

public class THYMETreebankReader
extends JCasAnnotator_ImplBase {
    public static Logger logger = Logger.getLogger(THYMETreebankReader.class);
    public static final String TREEBANK_DIRECTORY = "treebankDirectory";
    private static final Pattern headerPatt = Pattern.compile("\\[(meta|start|end) [^\\]]*?\\]");
    @ConfigurationParameter(name="treebankDirectory", mandatory=true)
    protected File treebankDirectory;
    File[] subdirs = null;

    public void initialize(UimaContext aContext) throws ResourceInitializationException {
        super.initialize(aContext);
        this.subdirs = this.treebankDirectory.listFiles(new FileFilter(){

            @Override
            public boolean accept(File pathname) {
                return pathname.isDirectory() && !pathname.isHidden();
            }
        });
    }

    public void process(JCas jcas) throws AnalysisEngineProcessException {
        List utilTrees;
        String tbText;
        File subdir;
        URI uri = ViewUriUtil.getURI((JCas)jcas);
        logger.info((Object)("Document id is: " + uri.toString()));
        String fn = uri.getPath().substring(uri.getPath().lastIndexOf(47) + 1) + ".xml.tree";
        File treeFile = null;
        File[] fileArray = this.subdirs;
        int n = fileArray.length;
        for (int i = 0; i < n && !(treeFile = new File(subdir = fileArray[i], fn)).exists(); ++i) {
            treeFile = null;
        }
        if (treeFile == null) {
            this.getContext().getLogger().log(Level.WARNING, "Could not find treeFile: " + fn);
            return;
        }
        try {
            tbText = FileUtils.file2String(treeFile);
        }
        catch (IOException e1) {
            throw new AnalysisEngineProcessException((Throwable)e1);
        }
        StringBuffer fileText = new StringBuffer(jcas.getDocumentText());
        Matcher m = headerPatt.matcher(fileText);
        while (m.find()) {
            int headerLen = m.group().length();
            fileText.replace(m.start(), m.end(), THYMETreebankReader.getWhitespaceString(headerLen));
        }
        try {
            utilTrees = TreebankFormatParser.parseDocument((String)tbText, (int)0, (String)fileText.toString());
        }
        catch (Exception e) {
            this.getContext().getLogger().log(Level.WARNING, String.format("Skipping %s due to alignment problems", fn), (Throwable)e);
            return;
        }
        ArrayList sents = new ArrayList(JCasUtil.select((JCas)jcas, Sentence.class));
        for (Sentence sent : sents) {
            sent.removeFromIndexes();
        }
        HashMap<String, TOKEN_TYPE> tokMap = new HashMap<String, TOKEN_TYPE>();
        ArrayList toks = new ArrayList(JCasUtil.select((JCas)jcas, BaseToken.class));
        for (BaseToken tok : toks) {
            String key = THYMETreebankReader.getAnnotationKey((Annotation)tok);
            if (tok instanceof WordToken) {
                tokMap.put(key, TOKEN_TYPE.WORD);
            } else if (tok instanceof PunctuationToken) {
                tokMap.put(key, TOKEN_TYPE.PUNCT);
            } else if (tok instanceof SymbolToken) {
                tokMap.put(key, TOKEN_TYPE.SYMBOL);
            } else if (tok instanceof NumToken) {
                tokMap.put(key, TOKEN_TYPE.NUM);
            } else if (tok instanceof NewlineToken) {
                tokMap.put(key, TOKEN_TYPE.NEWLINE);
            } else if (tok instanceof ContractionToken) {
                tokMap.put(key, TOKEN_TYPE.CONTRACTION);
            }
            tok.removeFromIndexes();
        }
        for (TopTreebankNode utilTree : utilTrees) {
            org.apache.ctakes.typesystem.type.syntax.TopTreebankNode tree = THYMETreebankReader.convert(utilTree, jcas);
            Sentence sentence = new Sentence(jcas, tree.getBegin(), tree.getEnd());
            sentence.addToIndexes();
            for (int i = 0; i < tree.getTerminals().size(); ++i) {
                TerminalTreebankNode leaf = tree.getTerminals(i);
                if (leaf.getBegin() == leaf.getEnd()) continue;
                String key = THYMETreebankReader.getAnnotationKey((Annotation)leaf);
                BaseToken token = null;
                if (tokMap.containsKey(key)) {
                    TOKEN_TYPE tokType = (TOKEN_TYPE)((Object)tokMap.get(key));
                    switch (tokType) {
                        case CONTRACTION: {
                            token = new ContractionToken(jcas, leaf.getBegin(), leaf.getEnd());
                            break;
                        }
                        case NEWLINE: {
                            token = new NewlineToken(jcas, leaf.getBegin(), leaf.getEnd());
                            break;
                        }
                        case NUM: {
                            token = new NumToken(jcas, leaf.getBegin(), leaf.getEnd());
                            break;
                        }
                        case PUNCT: {
                            token = new PunctuationToken(jcas, leaf.getBegin(), leaf.getEnd());
                            break;
                        }
                        case SYMBOL: {
                            token = new SymbolToken(jcas, leaf.getBegin(), leaf.getEnd());
                            break;
                        }
                        case WORD: {
                            token = new WordToken(jcas, leaf.getBegin(), leaf.getEnd());
                            break;
                        }
                        default: {
                            token = new BaseToken(jcas, leaf.getBegin(), leaf.getEnd());
                            break;
                        }
                    }
                } else {
                    token = new BaseToken(jcas, leaf.getBegin(), leaf.getEnd());
                }
                token.setPartOfSpeech(leaf.getNodeType());
                token.addToIndexes();
            }
        }
    }

    private static String getWhitespaceString(int headerLen) {
        char[] chars = new char[headerLen];
        Arrays.fill(chars, ' ');
        return new String(chars);
    }

    private static org.apache.ctakes.typesystem.type.syntax.TopTreebankNode convert(TopTreebankNode inTree, JCas jcas) {
        org.apache.ctakes.typesystem.type.syntax.TopTreebankNode outTree = new org.apache.ctakes.typesystem.type.syntax.TopTreebankNode(jcas, inTree.getTextBegin(), inTree.getTextEnd());
        outTree.setTreebankParse(inTree.getTreebankParse());
        THYMETreebankReader.convert((org.cleartk.util.treebank.TreebankNode)inTree, jcas, (TreebankNode)outTree, null);
        THYMETreebankReader.initTerminalNodes(outTree, jcas);
        outTree.addToIndexes();
        return outTree;
    }

    public static void initTerminalNodes(org.apache.ctakes.typesystem.type.syntax.TopTreebankNode uimaNode, JCas jCas) {
        ArrayList<TerminalTreebankNode> terminals = new ArrayList<TerminalTreebankNode>();
        THYMETreebankReader._initTerminalNodes((TreebankNode)uimaNode, terminals);
        for (int i = 0; i < terminals.size(); ++i) {
            TerminalTreebankNode terminal = (TerminalTreebankNode)terminals.get(i);
            terminal.setIndex(i);
        }
        FSArray terminalsFSArray = new FSArray(jCas, terminals.size());
        terminalsFSArray.copyFromArray(terminals.toArray(new FeatureStructure[terminals.size()]), 0, 0, terminals.size());
        uimaNode.setTerminals(terminalsFSArray);
    }

    private static void _initTerminalNodes(TreebankNode node, List<TerminalTreebankNode> terminals) {
        FSArray children = node.getChildren();
        for (int i = 0; i < children.size(); ++i) {
            TreebankNode child = (TreebankNode)children.get(i);
            if (child instanceof TerminalTreebankNode) {
                terminals.add((TerminalTreebankNode)child);
                continue;
            }
            THYMETreebankReader._initTerminalNodes(child, terminals);
        }
    }

    public static TreebankNode convert(org.cleartk.util.treebank.TreebankNode pojoNode, JCas jCas, TreebankNode uimaNode, TreebankNode parentNode) {
        uimaNode.setNodeType(pojoNode.getType());
        uimaNode.setNodeTags(new StringArray(jCas, pojoNode.getTags().length));
        FSCollectionFactory.fillArrayFS((StringArrayFS)uimaNode.getNodeTags(), (String[])pojoNode.getTags());
        uimaNode.setNodeValue(pojoNode.getValue());
        uimaNode.setLeaf(pojoNode.isLeaf());
        uimaNode.setParent(parentNode);
        ArrayList<TreebankNode> uimaChildren = new ArrayList<TreebankNode>();
        for (org.cleartk.util.treebank.TreebankNode child : pojoNode.getChildren()) {
            Object childNode = child.isLeaf() ? new TerminalTreebankNode(jCas, child.getTextBegin(), child.getTextEnd()) : new TreebankNode(jCas, child.getTextBegin(), child.getTextEnd());
            uimaChildren.add(THYMETreebankReader.convert(child, jCas, (TreebankNode)childNode, uimaNode));
            childNode.addToIndexes();
        }
        FSArray uimaChildrenFSArray = new FSArray(jCas, uimaChildren.size());
        uimaChildrenFSArray.copyFromArray(uimaChildren.toArray(new FeatureStructure[uimaChildren.size()]), 0, 0, uimaChildren.size());
        uimaNode.setChildren(uimaChildrenFSArray);
        return uimaNode;
    }

    public static AnalysisEngineDescription getDescription(File treebankDirectory) throws ResourceInitializationException {
        return AnalysisEngineFactory.createEngineDescription(THYMETreebankReader.class, (Object[])new Object[]{TREEBANK_DIRECTORY, treebankDirectory});
    }

    public static void main(String[] args) {
        String testString = "[meta rev_date=\"02/20/2010\" start_date=\"02/20/2010\" rev=\"0002\"]\n\n[start section id=\"20112\"]\n\n#1 Dilated esophagus on CT-scan\n#2 Adenocarcinoma right colon\n#3 Symptomatic anemia\n#4 Hypothyroidism";
        Matcher m = headerPatt.matcher(testString);
        while (m.find()) {
            System.out.println("FOund match at: " + m.start() + "-" + m.end());
        }
    }

    public static final String getAnnotationKey(Annotation a) {
        return a.getBegin() + "-" + a.getEnd();
    }

    static enum TOKEN_TYPE {
        WORD,
        PUNCT,
        SYMBOL,
        NUM,
        NEWLINE,
        CONTRACTION;

    }
}

