const fs = require("fs");
const csv = require("csv-parser");
import dotenv from 'dotenv';

dotenv.config(); // safe to call even if .env (local environment variables) doesn't exist

// Load dataset ID list from environment variable (JSON array)
const datasetIds = JSON.parse(process.env.RAISE_DATASET_ID_LIST || "[]");

if (!Array.isArray(datasetIds) || datasetIds.length === 0) {
  console.error("RAISE_DATASET_ID_LIST is missing or empty.");
  process.exit(1);
}

const csvFilePath = `${datasetIds[0]}/datafile.csv`;

if (!fs.existsSync(csvFilePath)) {
  console.error(`CSV file not found: ${csvFilePath}`);
  process.exit(1);
}

console.log(`Loading CSV from: ${csvFilePath}`);

const rows = [];

// Load CSV
fs.createReadStream(csvFilePath)
  .pipe(csv())
  .on("data", (row) => {
    row.sepal_length = parseFloat(row.sepal_length);
    row.sepal_width  = parseFloat(row.sepal_width);
    row.petal_length = parseFloat(row.petal_length);
    row.petal_width  = parseFloat(row.petal_width);
    rows.push(row);
  })
  .on("end", () => {
    console.log("Loaded rows:", rows.length);

    if (!fs.existsSync("results")) {
      fs.mkdirSync("results", { recursive: true });
    }

    // Summary helpers
    function mean(arr) {
      return arr.reduce((a, b) => a + b, 0) / arr.length;
    }

    function min(arr) {
      let m = Infinity;
      for (const v of arr) if (v < m) m = v;
      return m;
    }

    function max(arr) {
      let m = -Infinity;
      for (const v of arr) if (v > m) m = v;
      return m;
    }

    let summaryText = `Total rows: ${rows.length}\n`;

    const numericColumns = [
      "sepal_length",
      "sepal_width",
      "petal_length",
      "petal_width"
    ];

    numericColumns.forEach(col => {
      const values = rows.map(r => r[col]).filter(v => !isNaN(v));

      summaryText += `\n${col}:\n`;
      summaryText += `  mean: ${mean(values)}\n`;
      summaryText += `  min:  ${min(values)}\n`;
      summaryText += `  max:  ${max(values)}\n`;
    });

    fs.writeFileSync("results/summary_stats.txt", summaryText);

    // Species group means
    const speciesGroups = {};
    rows.forEach(r => {
      const s = r.species;
      if (!speciesGroups[s]) speciesGroups[s] = [];
      speciesGroups[s].push(r);
    });

    let speciesMeansText = "";

    for (const species in speciesGroups) {
      const group = speciesGroups[species];

      speciesMeansText += `${species}:\n`;
      speciesMeansText += `  sepal_length: ${mean(group.map(r => r.sepal_length))}\n`;
      speciesMeansText += `  sepal_width:  ${mean(group.map(r => r.sepal_width))}\n`;
      speciesMeansText += `  petal_length: ${mean(group.map(r => r.petal_length))}\n`;
      speciesMeansText += `  petal_width:  ${mean(group.map(r => r.petal_width))}\n\n`;
    }

    fs.writeFileSync("results/species_means.txt", speciesMeansText);

    // Correlation
    function correlation(xs, ys) {
      const xMean = mean(xs), yMean = mean(ys);

      let num = 0;
      let sx = 0;
      let sy = 0;

      for (let i = 0; i < xs.length; i++) {
        const dx = xs[i] - xMean;
        const dy = ys[i] - yMean;
        num += dx * dy;
        sx += dx * dx;
        sy += dy * dy;
      }

      return num / Math.sqrt(sx * sy);
    }

    const corrValue = correlation(
      rows.map(r => r.sepal_length),
      rows.map(r => r.petal_length)
    );

    fs.writeFileSync(
      "results/correlation_matrix.txt",
      `Correlation (sepal_length vs petal_length): ${corrValue}\n`
    );

    console.log("Results written to results/ directory.");
  })
  .on("error", (err) => {
    console.error("Error reading CSV:", err);
  });
