/*
 * Decompiled with CFR 0.152.
 */
package org.languagetool.language.identifier;

import com.optimaize.langdetect.LanguageDetector;
import com.optimaize.langdetect.LanguageDetectorBuilder;
import com.optimaize.langdetect.ngram.NgramExtractor;
import com.optimaize.langdetect.ngram.NgramExtractors;
import com.optimaize.langdetect.profiles.LanguageProfile;
import com.optimaize.langdetect.profiles.LanguageProfileReader;
import com.optimaize.langdetect.text.RemoveMinorityScriptsTextFilter;
import com.optimaize.langdetect.text.TextFilter;
import com.optimaize.langdetect.text.TextObjectFactory;
import com.optimaize.langdetect.text.TextObjectFactoryBuilder;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.util.AbstractMap;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import org.jetbrains.annotations.Nullable;
import org.languagetool.DetectedLanguage;
import org.languagetool.JLanguageTool;
import org.languagetool.Language;
import org.languagetool.Languages;
import org.languagetool.language.identifier.LanguageIdentifier;
import org.languagetool.language.identifier.LanguageIdentifierService;
import org.languagetool.language.identifier.detector.FastTextDetector;
import org.languagetool.language.identifier.detector.NGramDetector;
import org.languagetool.noop.NoopLanguage;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class DefaultLanguageIdentifier
extends LanguageIdentifier {
    private static final Logger logger = LoggerFactory.getLogger(DefaultLanguageIdentifier.class);
    private static final double MINIMAL_CONFIDENCE = 0.9;
    private static final int SHORT_ALGO_THRESHOLD = 50;
    private static final int CONSIDER_ONLY_PREFERRED_THRESHOLD = 50;
    private static final List<String> ignoreLangCodes = Arrays.asList("ast", "gl");
    private static final List<String> externalLangCodes = Arrays.asList("eo");
    private static final float FASTTEXT_CONFIDENCE_THRESHOLD = 0.85f;
    private final LanguageDetector languageDetector;
    private final TextObjectFactory textObjectFactory;
    private FastTextDetector fastTextDetector;
    private NGramDetector ngram;

    DefaultLanguageIdentifier() {
        this(1000);
    }

    DefaultLanguageIdentifier(int maxLength) {
        super(maxLength);
        try {
            List<LanguageProfile> profiles = this.loadProfiles(DefaultLanguageIdentifier.getLanguageCodes());
            this.languageDetector = LanguageDetectorBuilder.create((NgramExtractor)NgramExtractors.standard()).minimalConfidence(0.9).shortTextAlgorithm(50).withProfiles(profiles).build();
            this.textObjectFactory = new TextObjectFactoryBuilder().maxTextLength(10000).withTextFilter(LanguageIdentifier.REMOVE_URL_FILTER).withTextFilter((TextFilter)RemoveMinorityScriptsTextFilter.forThreshold((double)0.3)).withTextFilter(LanguageIdentifier.REMOVE_EMAIL_SIGNATURE_FILTER).withTextFilter(LanguageIdentifier.REMOVE_MENTION_FILTER).withTextFilter(LanguageIdentifier.REMOVE_NON_BREAKING_SPACES_FILTER).build();
        }
        catch (IOException e) {
            throw new RuntimeException("Could not set up language identifier", e);
        }
    }

    void enableFasttext(File fasttextBinary, File fasttextModel) {
        if (fasttextBinary != null && fasttextModel != null) {
            try {
                this.fastTextDetector = new FastTextDetector(fasttextModel, fasttextBinary);
                logger.info("Started fasttext process for language identification: Binary " + fasttextBinary + " with model @ " + fasttextModel);
            }
            catch (IOException e) {
                throw new RuntimeException("Could not start fasttext process for language identification @ " + fasttextBinary + " with model @ " + fasttextModel, e);
            }
        }
    }

    public boolean isFastTextEnabled() {
        return this.fastTextDetector != null;
    }

    void enableNgrams(File ngramDir) {
        if (ngramDir != null) {
            try {
                logger.info("Loading ngram data for language identification from " + ngramDir + "...");
                this.ngram = new NGramDetector(ngramDir, 50);
                logger.info("Loaded ngram data for language identification from " + ngramDir);
            }
            catch (IOException e) {
                throw new RuntimeException("Could not load ngram data language identification from " + ngramDir, e);
            }
        }
    }

    private static List<String> getLanguageCodes() {
        ArrayList<String> langCodes = new ArrayList<String>();
        for (Language lang : Languages.get()) {
            String langCode = lang.getShortCode();
            boolean ignore = lang.isVariant() || ignoreLangCodes.contains(langCode) || externalLangCodes.contains(langCode);
            if (ignore) continue;
            if ("zh".equals(langCode)) {
                langCodes.add("zh-CN");
                langCodes.add("zh-TW");
                continue;
            }
            if (langCodes.contains(langCode)) continue;
            langCodes.add(langCode);
        }
        return langCodes;
    }

    private List<LanguageProfile> loadProfiles(List<String> langCodes) throws IOException {
        LanguageProfileReader profileReader = new LanguageProfileReader();
        List profiles = profileReader.read(langCodes);
        for (String externalLangCode : externalLangCodes) {
            String profilePath = "/" + externalLangCode + "/" + externalLangCode + ".profile";
            if (!JLanguageTool.getDataBroker().resourceExists(profilePath)) continue;
            InputStream profile = JLanguageTool.getDataBroker().getFromResourceDirAsStream(profilePath);
            Throwable throwable = null;
            try {
                profiles.add(new LanguageProfileReader().read(profile));
            }
            catch (Throwable throwable2) {
                throwable = throwable2;
                throw throwable2;
            }
            finally {
                if (profile == null) continue;
                if (throwable != null) {
                    try {
                        profile.close();
                    }
                    catch (Throwable throwable3) {
                        throwable.addSuppressed(throwable3);
                    }
                    continue;
                }
                profile.close();
            }
        }
        return profiles;
    }

    @Override
    @Nullable
    public Language detectLanguage(String cleanText) {
        DetectedLanguage detectedLanguage = this.detectLanguage(cleanText, Collections.emptyList(), Collections.emptyList());
        if (detectedLanguage == null) {
            return null;
        }
        return detectedLanguage.getDetectedLanguage();
    }

    @Override
    public DetectedLanguage detectLanguage(String cleanText, List<String> noopLangsTmp, List<String> preferredLangsTmp) {
        String text = cleanText;
        LanguageIdentifier.ParsedLanguageLists parsedLanguageLists = this.prepareDetectLanguage(text, noopLangsTmp, preferredLangsTmp);
        if (parsedLanguageLists == null) {
            return new DetectedLanguage(null, new NoopLanguage());
        }
        List<String> additionalLangs = parsedLanguageLists.getAdditionalLangs();
        List<String> preferredLangs = parsedLanguageLists.getPreferredLangs();
        Map.Entry<String, Double> result = null;
        boolean fasttextFailed = false;
        String source = "";
        if (this.fastTextDetector != null || this.ngram != null) {
            try {
                Map<String, Double> scores;
                boolean usingFastText = false;
                if ((text.length() <= 50 || this.fastTextDetector == null) && this.ngram != null) {
                    scores = this.ngram.detectLanguages(text.trim(), additionalLangs);
                    source = source + "ngram";
                } else {
                    usingFastText = true;
                    scores = this.fastTextDetector.runFasttext(text, additionalLangs);
                    source = source + "fasttext";
                }
                result = this.getHighestScoringResult(scores);
                if (usingFastText && result.getValue().floatValue() < 0.85f || result.getKey().equals("zz")) {
                    Map<Language, Integer> lang2Count = COMMON_WORDS_LANG_IDENTIFIER.getKnownWordsPerLanguage(text);
                    HashSet<String> baseLangAlreadyHandled = new HashSet<String>();
                    for (Map.Entry<Language, Integer> entry : lang2Count.entrySet()) {
                        String langCode = entry.getKey().getShortCode();
                        if (baseLangAlreadyHandled.contains(langCode)) continue;
                        baseLangAlreadyHandled.add(langCode);
                        if (scores.containsKey(langCode)) {
                            scores.put(langCode, scores.get(langCode) + Double.valueOf(entry.getValue().intValue()));
                            continue;
                        }
                        scores.put(langCode, (double)entry.getValue());
                    }
                    source = source + "+commonwords";
                    result = this.getHighestScoringResult(scores);
                }
                if (preferredLangs.contains("no") && !preferredLangs.contains("da")) {
                    scores.keySet().removeIf(k -> k.equals("da"));
                    result = this.getHighestScoringResult(scores);
                }
                if (text.length() <= 50 && preferredLangs.size() > 0) {
                    scores.keySet().removeIf(k -> !preferredLangs.contains(k));
                    result = this.getHighestScoringResult(scores);
                    source = source + "+prefLang";
                }
                double newScore = 0.99 / (30.0 / (double)Math.min(text.length(), 30));
                result = new AbstractMap.SimpleImmutableEntry<String, Double>(result.getKey(), newScore);
            }
            catch (FastTextDetector.FastTextException e) {
                if (e.isDisabled()) {
                    this.fastTextDetector = null;
                    logger.error("Fasttext disabled", (Throwable)e);
                } else {
                    logger.error("Fasttext failed, fallback used", (Throwable)e);
                    fasttextFailed = true;
                }
            }
            catch (Exception e) {
                this.fastTextDetector = null;
                logger.error("Fasttext disabled", (Throwable)e);
            }
        }
        if (this.fastTextDetector == null && this.ngram == null || fasttextFailed) {
            text = this.textObjectFactory.forText((CharSequence)text).toString();
            result = this.detectLanguageCode(text);
            if (additionalLangs.size() > 0) {
                logger.warn("Cannot consider noopLanguages because not in fastText mode: " + additionalLangs);
            }
        }
        if (result != null && result.getKey() != null && LanguageIdentifierService.INSTANCE.canLanguageBeDetected((String)result.getKey(), additionalLangs)) {
            return new DetectedLanguage(null, Languages.getLanguageForShortCode(result.getKey(), additionalLangs), result.getValue().floatValue(), source);
        }
        return null;
    }

    @Nullable
    private Map.Entry<String, Double> detectLanguageCode(String text) {
        List lang = this.languageDetector.getProbabilities((CharSequence)text);
        if (lang.size() > 0) {
            String code = ((com.optimaize.langdetect.DetectedLanguage)lang.get(0)).getLocale().getLanguage();
            double prob = ((com.optimaize.langdetect.DetectedLanguage)lang.get(0)).getProbability();
            return new AbstractMap.SimpleImmutableEntry<String, Double>(code, prob);
        }
        return null;
    }
}

