/*
 * Decompiled with CFR 0.152.
 */
package org.apache.uima.annotator;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_component.CasAnnotator_ImplBase;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.cas.CAS;
import org.apache.uima.cas.FeatureStructure;
import org.apache.uima.cas.Type;
import org.apache.uima.cas.TypeSystem;
import org.apache.uima.cas.text.AnnotationFS;
import org.apache.uima.resource.ResourceInitializationException;
import org.apache.uima.util.Level;
import org.apache.uima.util.Logger;

public class WhitespaceTokenizer
extends CasAnnotator_ImplBase {
    private static final int CH_SPECIAL = 0;
    private static final int CH_NUMBER = 1;
    private static final int CH_LETTER = 2;
    private static final int CH_WHITESPACE = 4;
    private static final int CH_PUNCTUATION = 5;
    private static final int CH_NEWLINE = 6;
    private static final int UNDEFINED = -1;
    private static final int INVALID_CHAR = 0;
    public static final String TOKEN_ANNOTATION_NAME = "org.apache.uima.TokenAnnotation";
    public static final String SENTENCE_ANNOTATION_NAME = "org.apache.uima.SentenceAnnotation";
    public static final String TOKEN_TYPE_FEATURE_NAME = "tokenType";
    private Type tokenType;
    private Type sentenceType;
    private CAS cas = null;
    private Logger logger;
    private String[] sofaNames;
    private static List<String> punctuations = Arrays.asList(".", "!", "?");
    public static final String MESSAGE_BUNDLE = "org.apache.uima.annotator.whitespaceTokenizerMessages";

    public void process(CAS aCas) throws AnalysisEngineProcessException {
        this.logger.logrb(Level.INFO, "WhitespaceTokenizer", "process", MESSAGE_BUNDLE, "whitespace_tokenizer_info_start_processing");
        ArrayList<CAS> casList = new ArrayList<CAS>();
        if (this.sofaNames != null && this.sofaNames.length > 0) {
            for (int i = 0; i < this.sofaNames.length; ++i) {
                Iterator it = aCas.getViewIterator(this.sofaNames[i]);
                while (it.hasNext()) {
                    casList.add((CAS)it.next());
                }
            }
        } else {
            casList.add(aCas);
        }
        for (int x = 0; x < casList.size(); ++x) {
            int currentCharPos;
            this.cas = (CAS)casList.get(x);
            char[] textContent = this.cas.getDocumentText().toCharArray();
            int tokenStart = -1;
            int sentenceStart = 0;
            int nextCharType = -1;
            char nextChar = '\u0000';
            for (currentCharPos = 0; currentCharPos < textContent.length; ++currentCharPos) {
                char currentChar = textContent[currentCharPos];
                int currentCharType = WhitespaceTokenizer.getCharacterType(currentChar);
                if (currentCharPos + 1 < textContent.length) {
                    nextChar = textContent[currentCharPos + 1];
                    nextCharType = WhitespaceTokenizer.getCharacterType(nextChar);
                } else {
                    nextCharType = -1;
                    nextChar = '\u0000';
                }
                if (currentCharType == 2 || currentCharType == 1) {
                    if (tokenStart != -1) continue;
                    tokenStart = currentCharPos;
                    continue;
                }
                if (currentCharType == 4) {
                    if (tokenStart == -1) continue;
                    this.createAnnotation(this.tokenType, tokenStart, currentCharPos);
                    tokenStart = -1;
                    continue;
                }
                if (currentCharType == 0) {
                    if (tokenStart != -1) {
                        this.createAnnotation(this.tokenType, tokenStart, currentCharPos);
                        tokenStart = -1;
                    }
                    this.createAnnotation(this.tokenType, currentCharPos, currentCharPos + 1);
                    continue;
                }
                if (currentCharType == 6) {
                    if (tokenStart == -1) continue;
                    this.createAnnotation(this.tokenType, tokenStart, currentCharPos);
                    tokenStart = -1;
                    continue;
                }
                if (currentCharType != 5) continue;
                if (tokenStart != -1) {
                    this.createAnnotation(this.tokenType, tokenStart, currentCharPos);
                    tokenStart = -1;
                }
                if ((nextCharType == 4 || nextCharType == 6) && punctuations.contains(new String(new char[]{currentChar}))) {
                    this.createAnnotation(this.sentenceType, sentenceStart, currentCharPos + 1);
                    sentenceStart = currentCharPos + 1;
                }
                this.createAnnotation(this.tokenType, currentCharPos, currentCharPos + 1);
            }
            if (tokenStart != -1) {
                this.createAnnotation(this.tokenType, tokenStart, currentCharPos);
                tokenStart = -1;
            }
            if (sentenceStart == -1) continue;
            this.createAnnotation(this.sentenceType, sentenceStart, currentCharPos);
            sentenceStart = -1;
        }
        this.logger.logrb(Level.INFO, "WhitespaceTokenizer", "process", MESSAGE_BUNDLE, "whitespace_tokenizer_info_stop_processing");
    }

    private void createAnnotation(Type annotationType, int startPos, int endPos) {
        AnnotationFS annot = this.cas.createAnnotation(annotationType, startPos, endPos);
        this.cas.addFsToIndexes((FeatureStructure)annot);
    }

    private static int getCharacterType(char character) {
        switch (Character.getType(character)) {
            case 1: 
            case 2: 
            case 3: 
            case 4: 
            case 5: 
            case 6: 
            case 7: 
            case 8: 
            case 18: 
            case 19: 
            case 27: {
                return 2;
            }
            case 9: 
            case 10: 
            case 11: {
                return 1;
            }
            case 12: {
                return 4;
            }
            case 20: 
            case 21: 
            case 22: 
            case 24: {
                return 5;
            }
            case 13: 
            case 14: {
                return 6;
            }
            case 15: {
                if (character == '\n' || character == '\r') {
                    return 6;
                }
                if (Character.isWhitespace(character)) {
                    return 4;
                }
                return 0;
            }
        }
        if (Character.isWhitespace(character)) {
            return 4;
        }
        return 0;
    }

    public void typeSystemInit(TypeSystem typeSystem) throws AnalysisEngineProcessException {
        super.typeSystemInit(typeSystem);
        this.tokenType = typeSystem.getType(TOKEN_ANNOTATION_NAME);
        this.sentenceType = typeSystem.getType(SENTENCE_ANNOTATION_NAME);
        this.logger.logrb(Level.INFO, "WhitespaceTokenizer", "typeSystemInit", MESSAGE_BUNDLE, "whitespace_tokenizer_info_typesystem_initialized");
    }

    public void initialize(UimaContext context) throws ResourceInitializationException {
        super.initialize(context);
        this.sofaNames = (String[])this.getContext().getConfigParameterValue("SofaNames");
        this.logger = context.getLogger();
        this.logger.logrb(Level.INFO, "WhitespaceTokenizer", "initialize", MESSAGE_BUNDLE, "whitespace_tokenizer_info_initialized");
    }
}

