/*
 * Decompiled with CFR 0.152.
 */
package org.apache.ctakes.gui.dictionary.umls;

import java.io.BufferedReader;
import java.io.IOException;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import org.apache.ctakes.gui.dictionary.umls.Concept;
import org.apache.ctakes.gui.dictionary.umls.CuiCodeUtil;
import org.apache.ctakes.gui.dictionary.umls.DoseUtil;
import org.apache.ctakes.gui.dictionary.umls.MrconsoIndex;
import org.apache.ctakes.gui.dictionary.umls.UmlsTermUtil;
import org.apache.ctakes.gui.dictionary.util.FileUtil;
import org.apache.ctakes.gui.dictionary.util.TextTokenizer;
import org.apache.log4j.Logger;

public final class MrconsoParser {
    private static final Logger LOGGER = Logger.getLogger((String)"MrConsoParser");
    private static final String MR_CONSO_SUB_PATH = "/META/MRCONSO.RRF";
    private static final String[] DEFAULT_EXCLUSIONS = new String[]{"FN", "CCS", "CA2", "CA3", "PSN", "TMSY", "SBD", "SBDC", "SBDF", "SBDG", "SCD", "SCDC", "SCDF", "SCDG", "BPCK", "GPCK", "XM"};
    private static final String[] SNOMED_OBSOLETES = new String[]{"OF", "MTH_OF", "OAP", "MTH_OAP", "OAF", "MTH_OAF", "IS", "MTH_IS", "OAS", "MTH_OAS", "OP", "MTH_OP"};
    private static final String[] GO_OBSOLETES = new String[]{"EOT"};
    private static final String[] LOINC_OBSOLETES = new String[]{"LO", "OLC", "MTH_LO", "OOSN"};
    private static final String[] MEDRA_OBSOLETES = new String[]{"OL", "MTH_OL"};
    private static final String[] MESH_EXCLUSIONS = new String[]{"N1", "EN", "PEN"};
    private static final String[] RXNORM_EXCLUSIONS = new String[]{"SY"};
    private static final String[] NCI_EXCLUSIONS = new String[]{"CSN"};
    private static final String[] UMDNS_EXCLUSIONS = new String[]{"RT"};

    private MrconsoParser() {
    }

    public static String[] getDefaultExclusions() {
        return DEFAULT_EXCLUSIONS;
    }

    public static String[] getSnomedExclusions() {
        String[] defaults = MrconsoParser.getDefaultExclusions();
        String[] exclusionTypes = Arrays.copyOf(defaults, defaults.length + SNOMED_OBSOLETES.length);
        System.arraycopy(SNOMED_OBSOLETES, 0, exclusionTypes, defaults.length, SNOMED_OBSOLETES.length);
        return exclusionTypes;
    }

    public static String[] getNonRxnormExclusions() {
        String[] snomeds = MrconsoParser.getSnomedExclusions();
        String[] exclusionTypes = Arrays.copyOf(snomeds, snomeds.length + GO_OBSOLETES.length + LOINC_OBSOLETES.length + MEDRA_OBSOLETES.length + MESH_EXCLUSIONS.length + NCI_EXCLUSIONS.length + UMDNS_EXCLUSIONS.length);
        int start = snomeds.length;
        System.arraycopy(GO_OBSOLETES, 0, exclusionTypes, start, GO_OBSOLETES.length);
        System.arraycopy(LOINC_OBSOLETES, 0, exclusionTypes, start += GO_OBSOLETES.length, LOINC_OBSOLETES.length);
        System.arraycopy(MEDRA_OBSOLETES, 0, exclusionTypes, start += LOINC_OBSOLETES.length, MEDRA_OBSOLETES.length);
        System.arraycopy(MESH_EXCLUSIONS, 0, exclusionTypes, start += MEDRA_OBSOLETES.length, MESH_EXCLUSIONS.length);
        System.arraycopy(NCI_EXCLUSIONS, 0, exclusionTypes, start += MESH_EXCLUSIONS.length, NCI_EXCLUSIONS.length);
        System.arraycopy(UMDNS_EXCLUSIONS, 0, exclusionTypes, start += NCI_EXCLUSIONS.length, UMDNS_EXCLUSIONS.length);
        return exclusionTypes;
    }

    public static Map<Long, Concept> parseAllConcepts(String umlsDirPath, Map<Long, Concept> conceptMap, Collection<String> wantedSources, Collection<String> wantedTargets, UmlsTermUtil umlsTermUtil, Collection<String> languages, boolean extractAbbreviations, int minCharLength, int maxCharLength, int maxWordCount, int maxSymCount) {
        String mrconsoPath = umlsDirPath + MR_CONSO_SUB_PATH;
        HashSet<String> invalidTypeSet = new HashSet<String>(Arrays.asList(MrconsoParser.getNonRxnormExclusions()));
        LOGGER.info((Object)("Compiling map of Concepts from " + mrconsoPath));
        long lineCount = 0L;
        long textCount = 0L;
        try (BufferedReader reader = FileUtil.createReader(mrconsoPath);){
            List<String> tokens = FileUtil.readBsvTokens(reader, mrconsoPath);
            while (tokens != null) {
                String tokenizedText;
                if (++lineCount % 100000L == 0L) {
                    LOGGER.info((Object)("File Line " + lineCount + "   Texts " + textCount));
                }
                if (!(MrconsoParser.isRowLengthOk(tokens) && MrconsoParser.isLanguageOk(tokens, languages) && MrconsoParser.isTermTypeOk(tokens, invalidTypeSet))) {
                    tokens = FileUtil.readBsvTokens(reader, mrconsoPath);
                    continue;
                }
                Long cuiCode = CuiCodeUtil.getInstance().getCuiCode(MrconsoParser.getToken(tokens, MrconsoIndex.CUI));
                Concept concept = conceptMap.get(cuiCode);
                if (concept == null) {
                    tokens = FileUtil.readBsvTokens(reader, mrconsoPath);
                    continue;
                }
                String text = MrconsoParser.getToken(tokens, MrconsoIndex.TEXT);
                if (MrconsoParser.isPreferredTerm(tokens)) {
                    concept.setPreferredText(text);
                }
                if ((tokenizedText = TextTokenizer.getTokenizedText(text)) == null || tokenizedText.isEmpty() || !umlsTermUtil.isTextValid(tokenizedText) || DoseUtil.hasUnit(tokenizedText)) {
                    tokens = FileUtil.readBsvTokens(reader, mrconsoPath);
                    continue;
                }
                String strippedText = umlsTermUtil.getStrippedText(tokenizedText);
                if (strippedText == null || strippedText.isEmpty() || UmlsTermUtil.isTextTooShort(strippedText, minCharLength) || UmlsTermUtil.isTextTooLong(strippedText, maxCharLength, maxWordCount, maxSymCount)) {
                    tokens = FileUtil.readBsvTokens(reader, mrconsoPath);
                    continue;
                }
                Collection<String> formattedTexts = umlsTermUtil.getFormattedTexts(strippedText, extractAbbreviations, minCharLength, maxCharLength, maxWordCount, maxSymCount);
                if (formattedTexts != null && !formattedTexts.isEmpty()) {
                    textCount += (long)concept.addTexts(formattedTexts);
                    String source = MrconsoParser.getToken(tokens, MrconsoIndex.SOURCE);
                    String code = MrconsoParser.getToken(tokens, MrconsoIndex.SOURCE_CODE);
                    if (wantedTargets.contains(source) && !code.equals("NOCODE")) {
                        concept.addCode(source, code);
                    }
                }
                tokens = FileUtil.readBsvTokens(reader, mrconsoPath);
            }
        }
        catch (IOException ioE) {
            LOGGER.error((Object)ioE.getMessage());
        }
        Collection empties = conceptMap.entrySet().stream().filter(e -> ((Concept)e.getValue()).isEmpty()).map(Map.Entry::getKey).collect(Collectors.toList());
        conceptMap.keySet().removeAll(empties);
        LOGGER.info((Object)("File Lines: " + lineCount + " Concepts: " + conceptMap.size() + "  Texts: " + textCount));
        return conceptMap;
    }

    private static boolean isRowLengthOk(List<String> tokens) {
        return tokens.size() >= MrconsoIndex.TEXT._index;
    }

    private static boolean isLanguageOk(List<String> tokens, Collection<String> languages) {
        return languages.contains(MrconsoParser.getToken(tokens, MrconsoIndex.LANGUAGE));
    }

    private static boolean isTermTypeOk(List<String> tokens, Collection<String> invalidTypeSet) {
        String type = MrconsoParser.getToken(tokens, MrconsoIndex.TERM_TYPE);
        if (invalidTypeSet.contains(type)) {
            return false;
        }
        String source = MrconsoParser.getToken(tokens, MrconsoIndex.SOURCE);
        return !source.equals("RXNORM") || !type.equals("SY");
    }

    private static boolean isSourceOk(List<String> tokens, Collection<String> wantedSources) {
        return wantedSources.contains(MrconsoParser.getToken(tokens, MrconsoIndex.SOURCE));
    }

    private static boolean isPreferredTerm(List<String> tokens) {
        return MrconsoParser.getToken(tokens, MrconsoIndex.STATUS).equals("P") && MrconsoParser.getToken(tokens, MrconsoIndex.FORM).equals("PF");
    }

    public static Collection<Long> getValidVocabularyCuis(String umlsDirPath, Collection<String> sourceVocabularies) {
        return MrconsoParser.getValidVocabularyCuis(umlsDirPath, sourceVocabularies, MrconsoParser.getNonRxnormExclusions());
    }

    private static Collection<Long> getValidVocabularyCuis(String umlsDirPath, Collection<String> sourceVocabularies, String ... invalidTypes) {
        String mrconsoPath = umlsDirPath + MR_CONSO_SUB_PATH;
        LOGGER.info((Object)("Compiling list of Cuis with wanted Vocabularies using " + mrconsoPath));
        HashMap<String, Long> sourceCuis = new HashMap<String, Long>(sourceVocabularies.size());
        for (String target : sourceVocabularies) {
            sourceCuis.put(target, 0L);
        }
        HashSet<Long> validCuis = new HashSet<Long>();
        long lineCount = 0L;
        try (BufferedReader reader = FileUtil.createReader(mrconsoPath);){
            List<String> tokens = FileUtil.readBsvTokens(reader, mrconsoPath);
            while (tokens != null) {
                if (++lineCount % 100000L == 0L) {
                    String cuis = sourceCuis.entrySet().stream().map(e -> (String)e.getKey() + " " + e.getValue()).collect(Collectors.joining(", "));
                    LOGGER.info((Object)("File Lines " + lineCount + "\t Cuis: " + cuis));
                }
                if (tokens.size() > MrconsoIndex.SOURCE._index) {
                    if (sourceVocabularies.stream().anyMatch(MrconsoParser.getToken(tokens, MrconsoIndex.SOURCE)::equals)) {
                        Long cuiCode;
                        if (Arrays.stream(invalidTypes).noneMatch(MrconsoParser.getToken(tokens, MrconsoIndex.TERM_TYPE)::equals) && validCuis.add(cuiCode = CuiCodeUtil.getInstance().getCuiCode(MrconsoParser.getToken(tokens, MrconsoIndex.CUI)))) {
                            String source = MrconsoParser.getToken(tokens, MrconsoIndex.SOURCE);
                            long cuis = (Long)sourceCuis.get(source);
                            sourceCuis.put(source, cuis + 1L);
                        }
                    }
                }
                tokens = FileUtil.readBsvTokens(reader, mrconsoPath);
            }
        }
        catch (IOException ioE) {
            LOGGER.error((Object)ioE.getMessage());
        }
        String cuis = sourceCuis.entrySet().stream().map(e -> (String)e.getKey() + " " + e.getValue()).collect(Collectors.joining(", "));
        LOGGER.info((Object)("File Lines " + lineCount + "\t Cuis: " + cuis));
        LOGGER.info((Object)("File Lines " + lineCount + "\t Valid Cuis " + validCuis.size() + "\t for wanted Vocabularies"));
        LOGGER.info((Object)"   Any Difference is caused by overlap of sources.");
        return validCuis;
    }

    private static String getToken(List<String> tokens, MrconsoIndex mrconsoIndex) {
        return tokens.get(mrconsoIndex._index);
    }
}

