/*
 * Decompiled with CFR 0.152.
 */
package org.apache.sysds.runtime.transform.tokenize;

import java.util.ArrayList;
import org.apache.sysds.runtime.DMLRuntimeException;
import org.apache.sysds.runtime.transform.tokenize.Tokenizer;
import org.apache.sysds.runtime.transform.tokenize.TokenizerPost;
import org.apache.sysds.runtime.transform.tokenize.TokenizerPostCount;
import org.apache.sysds.runtime.transform.tokenize.TokenizerPostHash;
import org.apache.sysds.runtime.transform.tokenize.TokenizerPostPosition;
import org.apache.sysds.runtime.transform.tokenize.TokenizerPre;
import org.apache.sysds.runtime.transform.tokenize.TokenizerPreNgram;
import org.apache.sysds.runtime.transform.tokenize.TokenizerPreWhitespaceSplit;
import org.apache.wink.json4j.JSONArray;
import org.apache.wink.json4j.JSONObject;

public class TokenizerFactory {
    public static Tokenizer createTokenizer(String spec, int maxTokens) {
        Tokenizer tokenizer = null;
        try {
            TokenizerPost tokenizerPost;
            TokenizerPre tokenizerPre;
            JSONObject jSpec = new JSONObject(spec);
            String algo = jSpec.getString("algo");
            JSONObject algoParams = null;
            if (jSpec.has("algo_params")) {
                algoParams = jSpec.getJSONObject("algo_params");
            }
            String out = jSpec.getString("out");
            JSONObject outParams = null;
            if (jSpec.has("out_params")) {
                outParams = jSpec.getJSONObject("out_params");
            }
            int tokenizeCol = jSpec.getInt("tokenize_col");
            ArrayList<Integer> idCols = new ArrayList<Integer>();
            JSONArray idColsJsonArray = jSpec.getJSONArray("id_cols");
            for (int i = 0; i < idColsJsonArray.length(); ++i) {
                idCols.add(idColsJsonArray.getInt(i));
            }
            int numIdCols = idCols.size();
            boolean wideFormat = false;
            if (jSpec.has("format_wide")) {
                wideFormat = jSpec.getBoolean("format_wide");
            }
            switch (algo) {
                case "split": {
                    tokenizerPre = new TokenizerPreWhitespaceSplit(idCols, tokenizeCol, algoParams);
                    break;
                }
                case "ngram": {
                    tokenizerPre = new TokenizerPreNgram(idCols, tokenizeCol, algoParams);
                    break;
                }
                default: {
                    throw new IllegalArgumentException("Algorithm {algo=" + algo + "} is not supported.");
                }
            }
            switch (out) {
                case "count": {
                    tokenizerPost = new TokenizerPostCount(outParams, numIdCols, maxTokens, wideFormat);
                    break;
                }
                case "position": {
                    tokenizerPost = new TokenizerPostPosition(outParams, numIdCols, maxTokens, wideFormat);
                    break;
                }
                case "hash": {
                    tokenizerPost = new TokenizerPostHash(outParams, numIdCols, maxTokens, wideFormat);
                    break;
                }
                default: {
                    throw new IllegalArgumentException("Output representation {out=" + out + "} is not supported.");
                }
            }
            tokenizer = new Tokenizer(tokenizerPre, tokenizerPost);
        }
        catch (Exception ex) {
            throw new DMLRuntimeException(ex);
        }
        return tokenizer;
    }
}

