/* Ergo, version 3.3, a program for linear scaling electronic structure
 * calculations.
 * Copyright (C) 2013 Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek.
 * 
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 * 
 * Primary academic reference:
 * Kohn−Sham Density Functional Theory Electronic Structure Calculations 
 * with Linearly Scaling Computational Time and Memory Usage,
 * Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek,
 * J. Chem. Theory Comput. 7, 340 (2011),
 * <http://dx.doi.org/10.1021/ct100611z>
 * 
 * For further information about Ergo, see <http://www.ergoscf.org>.
 */
#ifdef USE_CHUNKS_AND_TASKS

#include <cstdio>
#include "BasisInfoStructChunk.h"
#include "compute_overlap_task_implementations.h"
#include "integrals_general.h"

const ergo_real MATRIX_ELEMENT_THRESHOLD = 1e-15;

class TaskTypeComputeOnePartOfOverlapMatrix : public cht::Task {
public:
  cht::ID execute(const BasisInfoStructChunk &, const chttl::ChunkVector<double> &, const chttl::ChunkBasic<int> &, const chttl::ChunkVector<int> &, const chttl::ChunkBasic<int> &);
  CHT_TASK_INPUT((BasisInfoStructChunk, chttl::ChunkVector<double>, chttl::ChunkBasic<int>, chttl::ChunkVector<int>, chttl::ChunkBasic<int>));
  CHT_TASK_OUTPUT((CHTMLMatType));
  CHT_TASK_TYPE_DECLARATION;
};

CHT_TASK_TYPE_IMPLEMENTATION((TaskTypeComputeOnePartOfOverlapMatrix));

ergo_real compute_one_element_of_overlap_mat(const BasisInfoStruct & b, int i, int j) {
  int n = b.noOfBasisFuncs;
  if( i < 0 || i >= n || j < 0 || j >= n ) {
    printf("compute_one_element_of_overlap_mat: i = %d, j = %d, n = %d\n", i, j, n);
    return 0;
    throw std::runtime_error("Error in compute_one_element_of_overlap_mat: index out of bounds.");
  }
  static const int maxDistrsInTempList = 40000;
  std::vector<DistributionSpecStruct> tempList(maxDistrsInTempList);
  int nPrimitives = 
    get_product_simple_primitives(b, i,
				  b, j,
				  &tempList[0],
				  maxDistrsInTempList,
				  0);
  if(nPrimitives <= 0) {
    printf("compute_one_element_of_overlap_mat: i = %d, j = %d, n = %d\n", i, j, n);
    throw std::runtime_error("Error in compute_one_element_of_overlap_mat: (nPrimitives <= 0).");
  }
  ergo_real sum = 0;
  for(int k = 0; k < nPrimitives; k++) {
    DistributionSpecStruct* currDistr = &tempList[k];
    sum += compute_integral_of_simple_prim(currDistr);
  }
  return sum;
}

cht::ID TaskTypeComputeOnePartOfOverlapMatrix::execute(const BasisInfoStructChunk & basisInfo, 
						       const chttl::ChunkVector<double> & extentList, 
						       const chttl::ChunkBasic<int> & blockSize, 
						       const chttl::ChunkVector<int> & permutation, 
						       const chttl::ChunkBasic<int> & rowIndex) {
  // Compute one row of overlap matrix.
  int n = basisInfo.b.noOfBasisFuncs;
  int i = rowIndex.x;

  double extent_i = extentList[permutation[i]];
  double xi = basisInfo.b.basisFuncList[permutation[i]].centerCoords[0];
  double yi = basisInfo.b.basisFuncList[permutation[i]].centerCoords[1];
  double zi = basisInfo.b.basisFuncList[permutation[i]].centerCoords[2];

  // We know the overlap matrix is symmetric, so loop can start at i.
  std::vector<ergo_real> rowValueList(n);
  for(int j = i; j < n; j++) {
    double extent_j = extentList[permutation[j]];
    double xj = basisInfo.b.basisFuncList[permutation[j]].centerCoords[0];
    double yj = basisInfo.b.basisFuncList[permutation[j]].centerCoords[1];
    double zj = basisInfo.b.basisFuncList[permutation[j]].centerCoords[2];
    double dx = xj - xi;
    double dy = yj - yi;
    double dz = zj - zi;
    double dist = std::sqrt(dx*dx + dy*dy + dz*dz);
    if(dist > (extent_i + extent_j))
      rowValueList[j] = 0;
    else
      rowValueList[j] = compute_one_element_of_overlap_mat(basisInfo.b, permutation[i], permutation[j]);
  }

  // Check how many elements are needed in lists.
  int count = 0;
  for(int j = i; j < n; j++) {
    if(fabs(rowValueList[j]) > MATRIX_ELEMENT_THRESHOLD) {
      count++;
      if(i != j)
	count++;
    }
  }
  
  // Create and populate three vectors: rows, cols, values.
  std::vector<int> rows(count);
  std::vector<int> cols(count);
  std::vector<ergo_real> values(count);
  int count2 = 0;
  for(int j = i; j < n; j++) {
    if(fabs(rowValueList[j]) > MATRIX_ELEMENT_THRESHOLD) {
      rows[count2] = i;
      cols[count2] = j;
      values[count2] = rowValueList[j];
      count2++;
      if(i != j) {
	rows[count2] = j;
	cols[count2] = i;
	values[count2] = rowValueList[j];
	count2++;
      }
    }
  }

  // Create corresponding chunk objects.
  cht::ChunkID cid_rows = registerChunk(new chttl::ChunkVector<int>(rows));
  cht::ChunkID cid_cols = registerChunk(new chttl::ChunkVector<int>(cols));
  cht::ChunkID cid_values = registerChunk(new chttl::ChunkVector<double>(values));

  // Create a matrix from the three vectors, and return that matrix chunk.
  // First prepare a params chunk
  int M = n;
  int N = n;
  int leavesSizeMax = blockSize.x;
  typename LeafMatType::Params leaf_params;
  // NOTE: internal blocksize does not exist for basic matrix lib, so we cannot set it then.
#ifdef USE_CHUNKS_AND_TASKS_BSM
  // FIXME: This is not so nice, the internal block size should be set
  //        from the main program.  We do it this easy way since we
  //        expect that the recursive variant of the overlap matrix
  //        construction is the one that will be used.
  leaf_params.blocksize = 1; 
#endif
  cht::ChunkID cid_param = registerChunk(new chtml::MatrixParams<LeafMatType>(M, N, leavesSizeMax, 0, 0, leaf_params));
  // Now register task, and return resulting TaskID.
  return registerTask<chtml::MatrixAssignFromSparse<LeafMatType> >(cid_param, cid_rows, cid_cols, cid_values, cht::persistent);
}


CHT_TASK_TYPE_IMPLEMENTATION((TaskTypeComputeOverlapMatrix));

cht::ID TaskTypeComputeOverlapMatrix::execute(const cht::ChunkID & cid_basisInfo, 
					      const cht::ChunkID & cid_extentList, 
					      const cht::ChunkID & cid_blockSize, 
					      const cht::ChunkID & cid_permutation, 
					      const chttl::ChunkBasic<int> & startIndex, 
					      const chttl::ChunkBasic<int> & nRowsToCompute) {
  if(nRowsToCompute.x < 1)
    throw std::runtime_error("Error: (nRowsToCompute.x < 1).");

  if(nRowsToCompute.x == 1) {
    // OK, only one row to compute. Register a new task to do the job.
    cht::ChunkID cid_rowidx = registerChunk(new chttl::ChunkBasic<int>(startIndex.x));
    return registerTask<TaskTypeComputeOnePartOfOverlapMatrix>(cid_basisInfo, cid_extentList, cid_blockSize, cid_permutation, cid_rowidx, cht::persistent);
  }
  else {
    // More than one row to do.
    // In this case we divide the work into two parts and register new tasks to take care of each part.
    int nTot = nRowsToCompute.x;
    int n1 = nTot / 2;
    int n2 = nTot - n1;

    int startIdx1 = startIndex.x;
    int startIdx2 = startIndex.x + n1;
    // Create chunks representing the parameters needed for the new tasks.
    cht::ChunkID cid_n1 = registerChunk(new chttl::ChunkBasic<int>(n1));
    cht::ChunkID cid_n2 = registerChunk(new chttl::ChunkBasic<int>(n2));
    cht::ChunkID cid_startIdx1 = registerChunk(new chttl::ChunkBasic<int>(startIdx1));
    cht::ChunkID cid_startIdx2 = registerChunk(new chttl::ChunkBasic<int>(startIdx2));
    // Register task for first part.
    cht::TaskID tid_1 = registerTask<TaskTypeComputeOverlapMatrix>(cid_basisInfo, cid_extentList, cid_blockSize, cid_permutation, cid_startIdx1, cid_n1);
    // Register task for second part.
    cht::TaskID tid_2 = registerTask<TaskTypeComputeOverlapMatrix>(cid_basisInfo, cid_extentList, cid_blockSize, cid_permutation, cid_startIdx2, cid_n2);
    // Register another task here to add the resulting matrices, giving tid_1 and tid_2 as input.
    return registerTask<chtml::MatrixAdd<LeafMatType> >(tid_1, tid_2, cht::persistent);
  }

} // end execute

#endif
