/*
 * Decompiled with CFR 0.152.
 */
package org.apache.kylin.engine.spark.job;

import com.google.common.base.Joiner;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Locale;
import org.apache.hadoop.fs.Path;
import org.apache.kylin.engine.spark.application.SparkApplication;
import org.apache.kylin.metadata.TableMetadataManager;
import org.apache.kylin.metadata.model.ColumnDesc;
import org.apache.kylin.metadata.model.TableDesc;
import org.apache.kylin.metadata.project.ProjectManager;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.hive.utils.ResourceDetectUtils;
import org.apache.spark.sql.types.StructType;
import org.apache.spark.sql.utils.SparkTypeUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import scala.collection.Iterator;
import scala.collection.JavaConversions;
import scala.collection.JavaConverters;
import scala.collection.Seq;

public class ResourceDetectBeforeSampling
extends SparkApplication {
    private static final Logger logger = LoggerFactory.getLogger(ResourceDetectBeforeSampling.class);

    @Override
    protected void doExecute() throws Exception {
        String tableName = this.getParam("table");
        String project = this.getParam("project");
        TableDesc tableDesc = TableMetadataManager.getInstance(this.config).getTableDesc(tableName, project);
        LinkedHashMap<String, String> params = ProjectManager.getInstance(this.config).getProject(project).getOverrideKylinProps();
        long rowCount = Long.parseLong(this.getParam("maxSampleCount"));
        params.put("maxSampleCount", String.valueOf(rowCount));
        Dataset<Row> dataset = this.getRowDataset(tableDesc);
        List paths = JavaConversions.seqAsJavaList(ResourceDetectUtils.getPaths(dataset.queryExecution().sparkPlan()));
        HashMap resourceSize = Maps.newHashMap();
        resourceSize.put(String.valueOf(tableName), ResourceDetectUtils.getResourceSize((Seq<Path>)((Iterator)JavaConverters.asScalaIteratorConverter(paths.iterator()).asScala()).toSeq()));
        HashMap tableLeafTaskNums = Maps.newHashMap();
        tableLeafTaskNums.put(tableName, ResourceDetectUtils.getPartitions(dataset.queryExecution().executedPlan()));
        ResourceDetectUtils.write(new Path(this.config.getJobTmpShareDir(project, this.jobId), tableName + "_" + ResourceDetectUtils.fileName()), resourceSize);
        ResourceDetectUtils.write(new Path(this.config.getJobTmpShareDir(project, this.jobId), tableName + "_" + ResourceDetectUtils.samplingDetectItemFileSuffix()), tableLeafTaskNums);
    }

    private Dataset<Row> getRowDataset(TableDesc tableDesc) {
        ColumnDesc[] columns = tableDesc.getColumns();
        ArrayList tblColNames = Lists.newArrayListWithCapacity((int)columns.length);
        StructType kylinSchema = new StructType();
        for (ColumnDesc columnDesc : columns) {
            if (columnDesc.isComputedColumn()) continue;
            kylinSchema = kylinSchema.add(columnDesc.getName(), SparkTypeUtil.toSparkType(columnDesc.getType(), false), true);
            tblColNames.add("`" + columnDesc.getName() + "`");
        }
        Object[] colNames = tblColNames.toArray(new String[0]);
        String colString = Joiner.on((String)",").join(colNames);
        String sql = String.format(Locale.ROOT, "select %s from %s", colString, tableDesc.getIdentity());
        Dataset df = this.ss.sql(sql);
        StructType sparkSchema = df.schema();
        logger.debug("Source data sql is: {}", (Object)sql);
        logger.debug("Kylin schema: {}", (Object)kylinSchema.treeString());
        return df.select(SparkTypeUtil.alignDataType(sparkSchema, kylinSchema));
    }

    public static void main(String[] args) {
        ResourceDetectBeforeSampling detect = new ResourceDetectBeforeSampling();
        detect.execute(args);
    }
}

