/*
 * Decompiled with CFR 0.152.
 */
package org.apache.ctakes.core.nlp.tokenizer;

import java.util.ArrayList;
import java.util.List;
import org.apache.ctakes.core.nlp.tokenizer.ContractionResult;
import org.apache.ctakes.core.nlp.tokenizer.ContractionsPTB;
import org.apache.ctakes.core.nlp.tokenizer.HyphenatedPTB;
import org.apache.ctakes.core.nlp.tokenizer.Token;
import org.apache.ctakes.core.nlp.tokenizer.Tokenizer;
import org.apache.ctakes.core.nlp.tokenizer.TokenizerHelper;
import org.apache.ctakes.typesystem.type.syntax.BaseToken;
import org.apache.ctakes.typesystem.type.syntax.ContractionToken;
import org.apache.ctakes.typesystem.type.syntax.NewlineToken;
import org.apache.ctakes.typesystem.type.syntax.NumToken;
import org.apache.ctakes.typesystem.type.syntax.PunctuationToken;
import org.apache.ctakes.typesystem.type.syntax.SymbolToken;
import org.apache.ctakes.typesystem.type.syntax.WordToken;
import org.apache.uima.jcas.JCas;

public class TokenizerPTB {
    static final String[] emptyStringList = new String[0];
    static final ArrayList<BaseToken> emptyTokenList = new ArrayList();
    private static char DASH = (char)45;
    private static String ellipsis = "...";
    static String[] nameStartingWithApostrophe = new String[]{"'assad", "'awarta", "'ashira"};
    private String possibleFinalPunctuation = "?!:";
    private String validOtherEmailAddressCharacters = "!#$%&'*+/=?^_`{|}~-";
    private static String[] urlStarters = new String[]{"http://", "https://", "ftp://", "mailto:"};
    static String[] testsForNumbers = new String[]{"2,000,123.For", "92,000,123.", "2,000,123.", "2,000,123.0", "2,000,13", "2", "2.", "2,", "22", "12345678901@4", "2.2.2."};
    static String[] testsForEmailAddress = new String[]{"masanz@mayo.edu", "masanz@mayo", "m@l", "m.@p", "m.n.@p", "3@4", "%@f", "R@@", "MASANZ@MAYO", "jk$jk@.m", "asdf@.m$", "masanz.james-mi@ibm.com.us", ".mn@p", ".@p", "@t"};

    /*
     * Enabled force condition propagation
     * Lifted jumps to return sites
     */
    public List<?> tokenizeTextSegment(JCas jcas, String textSegment, int offsetAdjustment, boolean includeTextNotJustOffsets) {
        String lowerCasedText = textSegment.toLowerCase();
        ArrayList<Object> tokens = new ArrayList<Object>();
        Class<NewlineToken> tokenClass = null;
        if (textSegment == null) return emptyTokenList;
        if (textSegment.length() == 0) {
            return emptyTokenList;
        }
        int currentPosition = 0;
        if ((currentPosition = this.findFirstCharOfNextToken(textSegment, currentPosition)) < 0) {
            return emptyTokenList;
        }
        while ((currentPosition = this.findFirstCharOfNextToken(textSegment, currentPosition)) >= 0) {
            Object bta;
            int len;
            int NOT_SET_INDICATOR;
            char firstCharOfToken = textSegment.charAt(currentPosition);
            int tokenLen = NOT_SET_INDICATOR = -999;
            if (currentPosition + 1 >= textSegment.length()) {
                tokenLen = 1;
                tokenClass = null;
            } else if (Character.isWhitespace(textSegment.charAt(currentPosition + 1))) {
                tokenLen = 1;
                tokenClass = null;
            } else if (firstCharOfToken == '\n') {
                tokenLen = 1;
                tokenClass = NewlineToken.class;
            } else if (firstCharOfToken == '\r') {
                char peekAhead = textSegment.charAt(currentPosition + 1);
                if (peekAhead != '\n') {
                    tokenLen = 1;
                    tokenClass = NewlineToken.class;
                } else {
                    tokenLen = 2;
                    tokenClass = NewlineToken.class;
                }
            } else if (firstCharOfToken == '.') {
                len = this.getLengthIfIsNumberThatStartsWithPeriod(currentPosition, textSegment);
                if (len > 0) {
                    tokenClass = NumToken.class;
                    tokenLen = len;
                } else if (this.isEllipsis(currentPosition, textSegment)) {
                    tokenLen = 3;
                    tokenClass = PunctuationToken.class;
                } else {
                    tokenLen = 1;
                    tokenClass = PunctuationToken.class;
                }
            } else if (firstCharOfToken == '-') {
                tokenLen = 1;
                tokenClass = PunctuationToken.class;
            } else if (firstCharOfToken == '\'') {
                len = this.getLengthIfNameStartingWithApostrophe(currentPosition, textSegment);
                if (len > 0) {
                    tokenLen = len;
                    tokenClass = WordToken.class;
                } else if (ContractionsPTB.isContractionThatStartsWithApostrophe(currentPosition, lowerCasedText)) {
                    tokenLen = 2;
                    tokenClass = ContractionToken.class;
                } else {
                    tokenLen = 1;
                    tokenClass = PunctuationToken.class;
                }
            } else if (TokenizerHelper.isPunctuation(firstCharOfToken)) {
                tokenLen = 1;
                tokenClass = PunctuationToken.class;
            } else if (Character.isLetterOrDigit(firstCharOfToken)) {
                char ch;
                boolean obviouslyIsWord = true;
                boolean obviouslyIsNumber = true;
                int nextWhitespaceOrEndOfSegment = -1;
                int nextNonLetterOrNonDigit = -1;
                int nextNonLetterDigitApostrophe = -1;
                int nextNonTelephoneOrPostalChar = -1;
                int nextNonNumericChar = -1;
                int nextNonDigit = -1;
                int i = currentPosition;
                do {
                    if (Character.isWhitespace(ch = textSegment.charAt(i))) {
                        if (nextNonLetterOrNonDigit < 0) {
                            nextNonLetterOrNonDigit = i;
                        }
                        if (nextNonLetterDigitApostrophe < 0) {
                            nextNonLetterDigitApostrophe = i;
                        }
                        if (nextNonDigit < 0) {
                            nextNonDigit = i;
                        }
                        if (nextNonTelephoneOrPostalChar < 0) {
                            nextNonTelephoneOrPostalChar = i;
                        }
                        if (nextNonNumericChar < 0) {
                            nextNonNumericChar = i;
                        }
                        nextWhitespaceOrEndOfSegment = i;
                        continue;
                    }
                    if (!Character.isLetterOrDigit(ch)) {
                        obviouslyIsWord = false;
                        obviouslyIsNumber = false;
                        if (nextNonLetterOrNonDigit < 0) {
                            nextNonLetterOrNonDigit = i;
                        }
                        if (nextNonLetterDigitApostrophe < 0 && ch != '\'') {
                            nextNonLetterDigitApostrophe = i;
                        }
                        if (nextNonDigit < 0) {
                            nextNonDigit = i;
                        }
                        if (nextNonTelephoneOrPostalChar < 0 && !this.isTelephoneNumberChar(ch)) {
                            nextNonTelephoneOrPostalChar = i;
                        }
                        if (nextNonNumericChar >= 0 || this.isNumericChar(ch)) continue;
                        nextNonNumericChar = i;
                        continue;
                    }
                    if (Character.isDigit(ch)) continue;
                    obviouslyIsNumber = false;
                    if (nextNonDigit < 0) {
                        nextNonDigit = i;
                    }
                    if (nextNonTelephoneOrPostalChar < 0 && !this.isTelephoneNumberChar(ch)) {
                        nextNonTelephoneOrPostalChar = i;
                    }
                    if (nextNonNumericChar >= 0 || this.isNumericChar(ch)) continue;
                    nextNonNumericChar = i;
                } while (++i < textSegment.length() && !Character.isWhitespace(ch));
                if (i >= textSegment.length()) {
                    if (nextWhitespaceOrEndOfSegment < 0) {
                        nextWhitespaceOrEndOfSegment = textSegment.length();
                    }
                    if (nextNonLetterOrNonDigit < 0) {
                        nextNonLetterOrNonDigit = textSegment.length();
                    }
                    if (nextNonLetterDigitApostrophe < 0) {
                        nextNonLetterDigitApostrophe = textSegment.length();
                    }
                    if (nextNonTelephoneOrPostalChar < 0) {
                        nextNonTelephoneOrPostalChar = textSegment.length();
                    }
                    if (nextNonNumericChar < 0) {
                        nextNonNumericChar = textSegment.length();
                    }
                }
                if (obviouslyIsNumber) {
                    tokenLen = nextWhitespaceOrEndOfSegment - currentPosition;
                    tokenClass = NumToken.class;
                } else if (obviouslyIsWord) {
                    String lowerCasedSubstring = textSegment.substring(currentPosition, nextWhitespaceOrEndOfSegment).toLowerCase();
                    int len2 = ContractionsPTB.lenOfFirstTokenInContraction(lowerCasedSubstring);
                    if (len2 > 0) {
                        tokenLen = len2;
                        tokenClass = WordToken.class;
                        bta = this.createToken(tokenClass, textSegment, jcas, currentPosition, currentPosition + tokenLen, offsetAdjustment);
                        tokens.add(bta);
                        currentPosition += tokenLen;
                        tokenLen = len2 = ContractionsPTB.lenOfSecondTokenInContraction(lowerCasedSubstring);
                        tokenClass = ContractionToken.class;
                        len2 = ContractionsPTB.lenOfThirdTokenInContraction(lowerCasedSubstring);
                        if (len2 > 0) {
                            bta = this.createToken(tokenClass, textSegment, jcas, currentPosition, currentPosition + tokenLen, offsetAdjustment);
                            tokens.add(bta);
                            currentPosition += tokenLen;
                            tokenLen = len2;
                            tokenClass = ContractionToken.class;
                        }
                    } else {
                        tokenLen = nextWhitespaceOrEndOfSegment - currentPosition;
                        tokenClass = WordToken.class;
                    }
                } else {
                    String lowerCasedSubstring;
                    int len3;
                    if (nextNonLetterOrNonDigit < lowerCasedText.length() && lowerCasedText.charAt(nextNonLetterOrNonDigit) == '\'' && (len3 = ContractionsPTB.tokenLengthCheckingForSingleQuoteWordsToKeepTogether(lowerCasedSubstring = lowerCasedText.substring(currentPosition, nextWhitespaceOrEndOfSegment))) > nextNonLetterOrNonDigit - currentPosition) {
                        tokenLen = len3;
                        tokenClass = this.wordTokenOrNumToken(lowerCasedText, currentPosition, tokenLen);
                    }
                    if (tokenLen == NOT_SET_INDICATOR) {
                        ContractionResult cr = ContractionsPTB.getLengthIfNextApostIsMiddleOfContraction(currentPosition, nextNonLetterOrNonDigit, lowerCasedText);
                        if (cr != null) {
                            tokenLen = len = cr.getWordTokenLen();
                            tokenClass = WordToken.class;
                            char c = lowerCasedText.charAt(currentPosition + len);
                            if (c != 'n' && c != '\'') throw new RuntimeException("ERROR: getLengthIfNextApostIsMiddleOfContraction returned " + len + " but the character (" + c + ") after that is not 'n' or apostrophe ");
                            if (tokenLen < 0) {
                                throw new RuntimeException("c = " + c + "tokenLen = " + tokenLen + " currentPosition = " + currentPosition);
                            }
                            if (tokenLen > 0) {
                                bta = this.createToken(tokenClass, textSegment, jcas, currentPosition, currentPosition + tokenLen, offsetAdjustment);
                                tokens.add(bta);
                                currentPosition += tokenLen;
                            }
                            tokenLen = cr.getContractionTokenLen();
                            tokenClass = ContractionToken.class;
                        } else {
                            len = this.lenIfIsTelephoneNumber(currentPosition, lowerCasedText, nextNonTelephoneOrPostalChar);
                            if (len > 0) {
                                tokenLen = len;
                                tokenClass = WordToken.class;
                            } else {
                                len = this.lenIfIsPostalCode(currentPosition, lowerCasedText, nextNonTelephoneOrPostalChar);
                                if (len > 0) {
                                    tokenLen = len;
                                    tokenClass = WordToken.class;
                                } else {
                                    len = this.lenIfIsUrl(currentPosition, lowerCasedText, nextWhitespaceOrEndOfSegment);
                                    if (len > 0) {
                                        tokenLen = len;
                                        tokenClass = WordToken.class;
                                    } else {
                                        len = this.lenIfIsEmailAddress(currentPosition, lowerCasedText, nextWhitespaceOrEndOfSegment);
                                        if (len > 0) {
                                            tokenLen = len;
                                            tokenClass = WordToken.class;
                                        } else {
                                            len = this.lenIfIsAbbreviation(currentPosition, textSegment, nextWhitespaceOrEndOfSegment);
                                            if (len > 0) {
                                                tokenLen = len;
                                                tokenClass = WordToken.class;
                                            } else if (nextNonLetterOrNonDigit < lowerCasedText.length() && lowerCasedText.charAt(nextNonLetterOrNonDigit) == '-') {
                                                lowerCasedSubstring = lowerCasedText.substring(currentPosition, nextWhitespaceOrEndOfSegment);
                                                len = HyphenatedPTB.tokenLengthCheckingForHyphenatedTerms(lowerCasedSubstring);
                                                tokenLen = len;
                                                if (tokenLen < 0) {
                                                    throw new RuntimeException("tokenLen = " + tokenLen + " currentPosition = " + currentPosition + " nextNonLetterOrNonDigit = " + nextNonLetterOrNonDigit);
                                                }
                                                tokenClass = this.wordTokenOrNumToken(lowerCasedText, currentPosition, tokenLen);
                                            } else if (nextNonNumericChar > 0 && (len = this.lenIfIsNumberContainingComma(currentPosition, lowerCasedText, nextNonNumericChar)) > 0) {
                                                tokenLen = len;
                                                tokenClass = NumToken.class;
                                            } else if (nextNonLetterDigitApostrophe < lowerCasedText.length() && lowerCasedText.charAt(nextNonLetterDigitApostrophe) == '.') {
                                                if (nextNonDigit == lowerCasedText.length() - 1) {
                                                    tokenLen = nextNonDigit - currentPosition;
                                                    tokenClass = NumToken.class;
                                                } else if (nextNonLetterDigitApostrophe == nextNonDigit) {
                                                    tokenLen = nextNonDigit + 1 + this.getLenToNextNonDigit(lowerCasedText, nextNonDigit + 1) - currentPosition;
                                                    tokenClass = NumToken.class;
                                                } else {
                                                    tokenLen = nextNonLetterOrNonDigit - currentPosition;
                                                    tokenClass = this.wordTokenOrNumToken(lowerCasedText, currentPosition, tokenLen);
                                                }
                                            } else {
                                                tokenLen = nextNonLetterOrNonDigit - currentPosition;
                                                tokenClass = this.wordTokenOrNumToken(lowerCasedText, currentPosition, tokenLen);
                                            }
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
            } else {
                tokenLen = 1;
                tokenClass = SymbolToken.class;
            }
            if (tokenLen < 0) {
                throw new RuntimeException("tokenLen = " + tokenLen + " currentPosition = " + currentPosition);
            }
            bta = this.createToken(tokenClass, textSegment, jcas, currentPosition, currentPosition + tokenLen, offsetAdjustment);
            tokens.add(bta);
            currentPosition += tokenLen;
        }
        return tokens;
    }

    public List<?> tokenize(String text) {
        int offsetAdjustment = 0;
        JCas jcas = null;
        return this.tokenizeTextSegment(jcas, text, offsetAdjustment, true);
    }

    private int lenIfIsNumberContainingComma(int currentPosition, String text, int nextNonNumericChar) {
        String s = text.substring(0, nextNonNumericChar);
        int commaPosition = s.indexOf(44, currentPosition);
        if (commaPosition < 0) {
            return -1;
        }
        if (commaPosition > nextNonNumericChar) {
            return -1;
        }
        int len = -1;
        int periodPosition = s.indexOf(46, currentPosition);
        int endOfWholeNumberPart = periodPosition;
        if (endOfWholeNumberPart < 0) {
            endOfWholeNumberPart = s.length();
        }
        if (commaPosition > endOfWholeNumberPart) {
            return -1;
        }
        if (commaPosition == 0) {
            return -1;
        }
        int position = commaPosition;
        boolean didNotFindExactlyThreeDigitsAfterComma = false;
        while (!didNotFindExactlyThreeDigitsAfterComma) {
            len = position - currentPosition;
            if (position < endOfWholeNumberPart && s.charAt(position) == ',') {
                ++position;
            }
            for (int i = 0; i < 3; ++i) {
                if (position < endOfWholeNumberPart && Character.isDigit(s.charAt(position))) {
                    ++position;
                    continue;
                }
                didNotFindExactlyThreeDigitsAfterComma = true;
            }
            if (position >= endOfWholeNumberPart || !Character.isDigit(s.charAt(position))) continue;
            didNotFindExactlyThreeDigitsAfterComma = true;
        }
        if (len <= 0) {
            return -1;
        }
        if (periodPosition != text.length() - 1 && periodPosition == currentPosition + len) {
            ++len;
            while (len < nextNonNumericChar - currentPosition && Character.isDigit(s.charAt(currentPosition + len))) {
                ++len;
            }
        }
        return len;
    }

    private int lenIfIsPostalCode(int currentPosition, String text, int nextNonPostalCodeChar) {
        if (nextNonPostalCodeChar < 0) {
            return nextNonPostalCodeChar;
        }
        int len = nextNonPostalCodeChar - currentPosition;
        String s = text.substring(currentPosition, nextNonPostalCodeChar);
        if (len == 10) {
            if (!Character.isDigit(s.charAt(0))) {
                return -1;
            }
            if (!Character.isDigit(s.charAt(1))) {
                return -1;
            }
            if (!Character.isDigit(s.charAt(2))) {
                return -1;
            }
            if (!Character.isDigit(s.charAt(3))) {
                return -1;
            }
            if (!Character.isDigit(s.charAt(4))) {
                return -1;
            }
            if (s.charAt(5) != DASH) {
                return -1;
            }
            if (!Character.isDigit(s.charAt(6))) {
                return -1;
            }
            if (!Character.isDigit(s.charAt(7))) {
                return -1;
            }
            if (!Character.isDigit(s.charAt(8))) {
                return -1;
            }
            if (!Character.isDigit(s.charAt(9))) {
                return -1;
            }
            return len;
        }
        return -1;
    }

    private int lenIfIsTelephoneNumber(int currentPosition, String text, int nextNonTelephoneNumberChar) {
        if (nextNonTelephoneNumberChar < 0) {
            return nextNonTelephoneNumberChar;
        }
        int len = nextNonTelephoneNumberChar - currentPosition;
        String s = text.substring(currentPosition, nextNonTelephoneNumberChar);
        if (len == 6) {
            if (!Character.isDigit(s.charAt(0))) {
                return -1;
            }
            if (s.charAt(1) != DASH) {
                return -1;
            }
            if (!Character.isDigit(s.charAt(2))) {
                return -1;
            }
            if (!Character.isDigit(s.charAt(3))) {
                return -1;
            }
            if (!Character.isDigit(s.charAt(4))) {
                return -1;
            }
            if (!Character.isDigit(s.charAt(5))) {
                return -1;
            }
            return len;
        }
        if (len == 8) {
            if (!Character.isDigit(s.charAt(0))) {
                return -1;
            }
            if (!Character.isDigit(s.charAt(1))) {
                return -1;
            }
            if (!Character.isDigit(s.charAt(2))) {
                return -1;
            }
            if (s.charAt(3) != DASH) {
                return -1;
            }
            if (!Character.isDigit(s.charAt(4))) {
                return -1;
            }
            if (!Character.isDigit(s.charAt(5))) {
                return -1;
            }
            if (!Character.isDigit(s.charAt(6))) {
                return -1;
            }
            if (!Character.isDigit(s.charAt(7))) {
                return -1;
            }
            return len;
        }
        if (len == 12) {
            if (!Character.isDigit(s.charAt(0))) {
                return this.checkFormat2(s);
            }
            if (!Character.isDigit(s.charAt(1))) {
                return this.checkFormat2(s);
            }
            if (!Character.isDigit(s.charAt(2))) {
                return this.checkFormat2(s);
            }
            if (s.charAt(3) != DASH) {
                return this.checkFormat2(s);
            }
            if (!Character.isDigit(s.charAt(4))) {
                return this.checkFormat2(s);
            }
            if (!Character.isDigit(s.charAt(5))) {
                return this.checkFormat2(s);
            }
            if (!Character.isDigit(s.charAt(6))) {
                return this.checkFormat2(s);
            }
            if (s.charAt(7) != DASH) {
                return this.checkFormat2(s);
            }
            if (!Character.isDigit(s.charAt(8))) {
                return this.checkFormat2(s);
            }
            if (!Character.isDigit(s.charAt(9))) {
                return this.checkFormat2(s);
            }
            if (!Character.isDigit(s.charAt(10))) {
                return this.checkFormat2(s);
            }
            if (!Character.isDigit(s.charAt(11))) {
                return this.checkFormat2(s);
            }
            return len;
        }
        if (len == 14) {
            if (!Character.isDigit(s.charAt(0))) {
                return -1;
            }
            if (s.charAt(1) != DASH) {
                return -1;
            }
            if (!Character.isDigit(s.charAt(2))) {
                return -1;
            }
            if (!Character.isDigit(s.charAt(3))) {
                return -1;
            }
            if (!Character.isDigit(s.charAt(4))) {
                return -1;
            }
            if (s.charAt(5) != DASH) {
                return -1;
            }
            if (!Character.isDigit(s.charAt(6))) {
                return -1;
            }
            if (!Character.isDigit(s.charAt(7))) {
                return -1;
            }
            if (!Character.isDigit(s.charAt(8))) {
                return -1;
            }
            if (s.charAt(9) != DASH) {
                return -1;
            }
            if (!Character.isDigit(s.charAt(10))) {
                return -1;
            }
            if (!Character.isDigit(s.charAt(11))) {
                return -1;
            }
            if (!Character.isDigit(s.charAt(12))) {
                return -1;
            }
            if (!Character.isDigit(s.charAt(13))) {
                return -1;
            }
            return len;
        }
        return -1;
    }

    private int checkFormat2(String s) {
        if (!Character.isDigit(s.charAt(0))) {
            return -1;
        }
        if (!Character.isDigit(s.charAt(1))) {
            return -1;
        }
        if (s.charAt(2) != DASH) {
            return -1;
        }
        if (!Character.isDigit(s.charAt(3))) {
            return -1;
        }
        if (!Character.isDigit(s.charAt(4))) {
            return -1;
        }
        if (!Character.isDigit(s.charAt(5))) {
            return -1;
        }
        if (!Character.isDigit(s.charAt(6))) {
            return -1;
        }
        if (s.charAt(7) != DASH) {
            return -1;
        }
        if (!Character.isDigit(s.charAt(8))) {
            return -1;
        }
        if (!Character.isDigit(s.charAt(9))) {
            return -1;
        }
        if (!Character.isDigit(s.charAt(10))) {
            return -1;
        }
        if (!Character.isDigit(s.charAt(11))) {
            return -1;
        }
        return -1;
    }

    private boolean isTelephoneNumberChar(char ch) {
        return Character.isDigit(ch) || ch == '-';
    }

    private boolean isNumericChar(char ch) {
        return Character.isDigit(ch) || ch == ',' || ch == '.';
    }

    private int getLenToNextNonDigit(String s, int startingPosition) {
        int i = 0;
        while (startingPosition + i < s.length()) {
            char ch = s.charAt(startingPosition + i);
            if (!Character.isDigit(ch)) {
                return i;
            }
            ++i;
        }
        return s.length() - startingPosition;
    }

    private Class<? extends BaseToken> wordTokenOrNumToken(String lowerCasedText, int currentPosition, int tokenLen) {
        if (this.containsLetter(lowerCasedText, currentPosition, tokenLen)) {
            return WordToken.class;
        }
        return NumToken.class;
    }

    private boolean containsLetter(String lowerCasedText, int currentPosition, int tokenLen) {
        for (int i = currentPosition; i < currentPosition + tokenLen; ++i) {
            char c = lowerCasedText.charAt(i);
            if (!Character.isLetter(c)) continue;
            return true;
        }
        return false;
    }

    private boolean isEllipsis(int currentPosition, String textSegment) {
        return textSegment.substring(currentPosition).startsWith(ellipsis);
    }

    private int getLengthIfNameStartingWithApostrophe(int currentPosition, String textSegment) {
        String textLowerCased = textSegment.substring(currentPosition).toLowerCase();
        if (textLowerCased.length() == 1) {
            return -1;
        }
        if (!Character.isLetter(textSegment.charAt(currentPosition + 1))) {
            return -1;
        }
        for (String s : nameStartingWithApostrophe) {
            if (!(s.length() == textLowerCased.length() ? textLowerCased.startsWith(s) : s.length() <= textLowerCased.length() && textLowerCased.startsWith(s))) continue;
            return s.length();
        }
        return -1;
    }

    private int getLengthIfIsNumberThatStartsWithPeriod(int currentPosition, String textSegment) {
        int len = textSegment.length() - currentPosition;
        if (len < 2) {
            return -1;
        }
        int index = currentPosition + 1;
        char ch = textSegment.charAt(index);
        if (!Character.isDigit(ch)) {
            return -1;
        }
        ++index;
        while (index < currentPosition + len) {
            ch = textSegment.charAt(index);
            if (!Character.isDigit(ch)) {
                return index - currentPosition;
            }
            ++index;
        }
        return len;
    }

    private int lenIfIsAbbreviation(int currentPosition, String mixedCaseText, int afterEndOfInputToConsider) {
        boolean containsLetter = false;
        if (afterEndOfInputToConsider - currentPosition >= 4 && mixedCaseText.substring(currentPosition, currentPosition + 4).toLowerCase().equals("www.")) {
            return -1;
        }
        for (int i = currentPosition; i < afterEndOfInputToConsider; ++i) {
            char ch = mixedCaseText.charAt(i);
            char peekAhead = i + 1 < afterEndOfInputToConsider ? (char)mixedCaseText.charAt(i + 1) : (char)' ';
            if (!Character.isLetter(ch)) {
                if (ch != '.') {
                    return -1;
                }
                if (!containsLetter || i + 1 == mixedCaseText.length()) {
                    return -1;
                }
                int soFar = i + 1 - currentPosition;
                int len = this.lenIfIsAbbreviation(i + 1, mixedCaseText, afterEndOfInputToConsider);
                if (len > 0) {
                    return soFar + len;
                }
                if (Character.isWhitespace(peekAhead) || this.isPossibleFinalPunctuation(peekAhead)) {
                    return soFar;
                }
                if (!Character.isLetterOrDigit(peekAhead)) {
                    return soFar - 1;
                }
                return -1;
            }
            containsLetter = true;
        }
        return -1;
    }

    private boolean isPossibleFinalPunctuation(char c) {
        return this.possibleFinalPunctuation.indexOf(c) > -1;
    }

    private int lenIfIsEmailAddress(int currentPosition, String lowerCasedText, int endOfInputToConsider) {
        int maxLenLocalPart = 64;
        int maxTotalLen = 320;
        int len = -1;
        int AT = 64;
        char PERIOD = '.';
        int indexOfAt = lowerCasedText.substring(currentPosition, endOfInputToConsider).indexOf(AT);
        if (indexOfAt < 1 || currentPosition + indexOfAt + 1 == endOfInputToConsider || indexOfAt > maxLenLocalPart) {
            return -1;
        }
        for (int i = currentPosition; i < currentPosition + indexOfAt; ++i) {
            char ch = lowerCasedText.charAt(i);
            CharSequence cs = lowerCasedText.subSequence(i, i + 1);
            if (!Character.isLetterOrDigit(ch) && !this.validOtherEmailAddressCharacters.contains(cs)) {
                return -1;
            }
            if (ch != PERIOD || i != currentPosition && i != currentPosition + indexOfAt - 1) continue;
            return -1;
        }
        char prev = '@';
        for (int i = currentPosition + indexOfAt + 1; i < endOfInputToConsider; ++i) {
            char ch = lowerCasedText.charAt(i);
            if (Character.isLetterOrDigit(ch)) continue;
            if (ch == '-' || ch == PERIOD) {
                if (i + 1 < endOfInputToConsider && Character.isLetterOrDigit(lowerCasedText.charAt(i + 1))) continue;
                if (Character.isLetterOrDigit(prev)) {
                    return i - currentPosition - 1;
                }
                return -1;
            }
            if (Character.isLetterOrDigit(prev)) {
                return i - currentPosition - 1;
            }
            return -1;
        }
        len = endOfInputToConsider - currentPosition;
        if (len > maxTotalLen) {
            return -1;
        }
        return len;
    }

    private int lenIfIsUrl(int currentPosition, String lowerCasedText, int endOfInputToConsider) {
        String potentialUrl = lowerCasedText.substring(currentPosition, endOfInputToConsider);
        for (String s : urlStarters) {
            if (!potentialUrl.startsWith(s) || potentialUrl.length() <= s.length()) continue;
            return endOfInputToConsider - currentPosition;
        }
        return -1;
    }

    private Class<? extends BaseToken> determineTokenType(String s, int begin, int end) {
        if (s == null || s.length() < end || begin + 1 != end) {
            Exception e = new Exception("ERROR: s not at least one char:  s= " + s + " begin, end = " + begin + "," + end);
            e.printStackTrace();
            return null;
        }
        char ch = s.charAt(begin);
        if (ch == '\n' || ch == '\r') {
            return NewlineToken.class;
        }
        if (Character.isDigit(ch)) {
            return NumToken.class;
        }
        if (Character.isLetter(ch)) {
            return WordToken.class;
        }
        if (this.isContraction(ch)) {
            return ContractionToken.class;
        }
        if (TokenizerHelper.isPunctuation(ch)) {
            return PunctuationToken.class;
        }
        return SymbolToken.class;
    }

    private boolean isContraction(char c) {
        return false;
    }

    private boolean verify(int begin, int end, int offsetAdjustment) {
        Exception e = new Exception(begin + " " + end + " " + offsetAdjustment);
        if (begin < 0) {
            System.err.println("ERROR: begin = " + begin);
            e.printStackTrace();
            return false;
        }
        if (end < 0) {
            System.err.println("ERROR: end = " + end);
            e.printStackTrace();
            return false;
        }
        if (end < begin) {
            System.err.println("ERROR: end < begin " + end + " < " + begin);
            e.printStackTrace();
            return false;
        }
        if (offsetAdjustment < 0) {
            System.err.println("ERROR: offsetAdjustment = " + offsetAdjustment);
            e.printStackTrace();
            return false;
        }
        return true;
    }

    private Object createToken(Class<? extends BaseToken> clas, String s, JCas jcas, int begin, int end, int offsetAdjustment) {
        Object token;
        int beginFromStartOfDocument = begin + offsetAdjustment;
        int endFromStartOfDocument = end + offsetAdjustment;
        boolean ok = this.verify(beginFromStartOfDocument, endFromStartOfDocument, offsetAdjustment);
        if (!ok) {
            System.err.println("ERROR: so creating a BaseToken with begin = 0 end = 0 just to avoid exception");
            Token token2 = jcas != null ? new BaseToken(jcas, 0, 0) : new Token(0, 0);
            return token2;
        }
        if (clas == null) {
            Class<? extends BaseToken> clss = this.determineTokenType(s, begin, end);
            if (clss == null) {
                throw new RuntimeException(" still is null");
            }
            if (jcas != null) {
                token = this.createToken(clss, s, jcas, begin, end, offsetAdjustment);
            } else {
                token = new Token(begin, end);
                ((Token)token).setText(s.substring(begin, end));
            }
        } else if (clas.equals(NewlineToken.class)) {
            if (jcas != null) {
                token = new NewlineToken(jcas, beginFromStartOfDocument, endFromStartOfDocument);
            } else {
                token = new Token(beginFromStartOfDocument, endFromStartOfDocument);
                ((Token)token).setText(s.substring(begin, end));
            }
        } else if (clas.equals(NumToken.class)) {
            if (jcas != null) {
                token = new NumToken(jcas, beginFromStartOfDocument, endFromStartOfDocument);
                String tokenText = s.substring(begin, end);
                this.setNumType((NumToken)token, tokenText);
            } else {
                token = new Token(beginFromStartOfDocument, endFromStartOfDocument);
                ((Token)token).setText(s.substring(begin, end));
            }
        } else if (clas.equals(WordToken.class)) {
            if (jcas != null) {
                token = new WordToken(jcas, beginFromStartOfDocument, endFromStartOfDocument);
                String tokenText = s.substring(begin, end);
                this.setCapitalization((WordToken)token, tokenText);
                this.setNumPosition((WordToken)token, tokenText);
            } else {
                token = new Token(beginFromStartOfDocument, endFromStartOfDocument);
                ((Token)token).setText(s.substring(begin, end));
            }
        } else if (clas.equals(SymbolToken.class)) {
            if (jcas != null) {
                token = new SymbolToken(jcas, beginFromStartOfDocument, endFromStartOfDocument);
            } else {
                token = new Token(beginFromStartOfDocument, endFromStartOfDocument);
                ((Token)token).setText(s.substring(begin, end));
            }
        } else if (clas.equals(PunctuationToken.class)) {
            if (jcas != null) {
                token = new PunctuationToken(jcas, beginFromStartOfDocument, endFromStartOfDocument);
            } else {
                token = new Token(beginFromStartOfDocument, endFromStartOfDocument);
                ((Token)token).setText(s.substring(begin, end));
            }
        } else if (clas.equals(ContractionToken.class)) {
            if (jcas != null) {
                token = new ContractionToken(jcas, beginFromStartOfDocument, endFromStartOfDocument);
            } else {
                token = new Token(beginFromStartOfDocument, endFromStartOfDocument);
                ((Token)token).setText(s.substring(begin, end));
            }
        } else if (clas.equals(BaseToken.class)) {
            if (jcas != null) {
                token = new BaseToken(jcas, beginFromStartOfDocument, endFromStartOfDocument);
            } else {
                token = new Token(beginFromStartOfDocument, endFromStartOfDocument);
                ((Token)token).setText(s.substring(begin, end));
            }
        } else if (jcas != null) {
            System.err.println("clas=" + clas + " and need to add more code here to support that class");
            token = null;
        } else {
            token = new Token(beginFromStartOfDocument, endFromStartOfDocument);
            ((Token)token).setText(s.substring(begin, end));
        }
        return token;
    }

    private void setNumType(NumToken nta, String tokenText) {
        if (Tokenizer.isNumber(tokenText) && !tokenText.contains(".")) {
            nta.setNumType(1);
        } else {
            nta.setNumType(2);
        }
    }

    private void setNumPosition(WordToken wta, String tokenText) {
        if (tokenText.isEmpty()) {
            wta.setNumPosition(0);
            return;
        }
        if (Character.isDigit(tokenText.charAt(0))) {
            wta.setNumPosition(1);
        } else if (Character.isDigit(tokenText.charAt(tokenText.length() - 1))) {
            wta.setNumPosition(3);
        } else {
            boolean containsDigit = false;
            for (int i = 0; i < tokenText.length(); ++i) {
                char ch = tokenText.charAt(i);
                if (!Character.isDigit(ch)) continue;
                containsDigit = true;
            }
            if (containsDigit) {
                wta.setNumPosition(2);
            } else {
                wta.setNumPosition(0);
            }
        }
    }

    private void setCapitalization(WordToken wta, String tokenText) {
        int countUpperCase = 0;
        boolean containsNonUpperCase = false;
        for (int i = 0; i < tokenText.length(); ++i) {
            char ch = tokenText.charAt(i);
            if (Character.isUpperCase(ch)) {
                ++countUpperCase;
                continue;
            }
            containsNonUpperCase = true;
        }
        if (countUpperCase == 0) {
            wta.setCapitalization(0);
        } else if (!containsNonUpperCase) {
            wta.setCapitalization(3);
        } else if (countUpperCase == 1 && Character.isUpperCase(tokenText.charAt(0))) {
            wta.setCapitalization(1);
        } else {
            wta.setCapitalization(2);
        }
    }

    public int findFirstCharOfNextToken(String s, int startPosition) {
        for (int position = startPosition; position < s.length(); ++position) {
            char c;
            if (position < 0) {
                System.out.println("position = " + position);
            }
            if (!Character.isWhitespace(c = s.charAt(position))) {
                return position;
            }
            if (!this.isEndOfLine(c)) continue;
            return position;
        }
        return -1;
    }

    private boolean isEndOfLine(char c) {
        return c == '\n' || c == '\r';
    }

    public static void main(String[] args) {
        TokenizerPTB.runEmailTests();
        TokenizerPTB.runNumberTests();
    }

    static void runNumberTests() {
        TokenizerPTB tester = new TokenizerPTB();
        for (String s : testsForNumbers) {
            int len = tester.lenIfIsNumberContainingComma(0, s, Math.min(s.length(), 11));
            System.out.println("========== Test NumberWithComma ========== ");
            System.out.println(s);
            System.out.println(len);
        }
    }

    static void runEmailTests() {
        TokenizerPTB tester = new TokenizerPTB();
        for (String s : testsForEmailAddress) {
            int i = tester.lenIfIsEmailAddress(0, s, s.length());
            String prepend = "XYZ";
            int j = tester.lenIfIsEmailAddress(prepend.length(), prepend + s, s.length() + prepend.length());
            System.out.println("========== Test ========== ");
            System.out.println("      0123456789ABCDEF");
            System.out.println("  s = " + s + "\t  and prepend+s = " + prepend + s);
            System.out.println("  lenIfIsEmailAddress = " + i + "\t   and if prepend, len = " + j);
        }
    }
}

