NodeJS script

How to create a NodeJS script for multiple datasets execution

This code is an implementation of a NodeJS main script. It demonstrates how to read data from files, process it, and generate results in different formats.

In this example, we will have this directory structure:

|-- main.js
|-- package.json
|-- package-lock.json

The main.js, package.json and package-lock.json files are mandatory and must have these specific filenames.
Other files can be added to use different nodejs modules

Import Required Libraries

There are no specific required libraries, you can import any library for your own use.

Load Datasets Dynamically

Only when working locally, you’ll have to download and organize your data files according to the required folder structure. (see also Dynamic Dataset(s) Accessing.) Each data file should be stored in a folder named after its ID (the data file’s UUID can be found in its metadata) in the same directory with the main.py script. Then, create a .env file that defines the variable RAISE_DATASET_ID_LIST. This variable should contain an ordered list of the dataset UUIDs in the following format:

RAISE_DATASET_ID_LIST=["00000000-0000-0000-0000-000000000001","00000000-0000-0000-0000-000000000002"]

Read Data Files

Once your experimental environment is ready, you can read the files:

import dotenv from 'dotenv';
dotenv.config(); // safe to call even if .env (local environment variables) doesn't exist

// Load dataset ID list from environment variable (JSON array)
const datasetIds = JSON.parse(process.env.RAISE_DATASET_ID_LIST || "[]");

if (!Array.isArray(datasetIds) || datasetIds.length === 0) {
  console.error("RAISE_DATASET_ID_LIST is missing or empty.");
  process.exit(1);
}

const csvFilePath = `${datasetIds[0]}/datafile.csv`;

if (!fs.existsSync(csvFilePath)) {
  console.error(`CSV file not found: ${csvFilePath}`);
  process.exit(1);
}

console.log(`Loading CSV from: ${csvFilePath}`);

The data file must always be named “datafile” with the appropriate extension: e.g., .csv, .txt, .edf, .json… There is no limit to the number of datasets that can be loaded.

Gather Results

Any type of file can be taken as a result -> images, CSV, text… Moreover, the number of results is not limited. It is worth noting that the results must now be stored under the “results” directory (it is at the same level as the main file execution).

if (!fs.existsSync("results")) {
      fs.mkdirSync("results", { recursive: true });
    }
fs.writeFileSync("results/summary_stats.txt", summaryText);

Complete main.js script (download)

const fs = require("fs");
const csv = require("csv-parser");
import dotenv from 'dotenv';

dotenv.config(); // safe to call even if .env (local environment variables) doesn't exist

// Load dataset ID list from environment variable (JSON array)
const datasetIds = JSON.parse(process.env.RAISE_DATASET_ID_LIST || "[]");

if (!Array.isArray(datasetIds) || datasetIds.length === 0) {
  console.error("RAISE_DATASET_ID_LIST is missing or empty.");
  process.exit(1);
}

const csvFilePath = `${datasetIds[0]}/datafile.csv`;

if (!fs.existsSync(csvFilePath)) {
  console.error(`CSV file not found: ${csvFilePath}`);
  process.exit(1);
}

console.log(`Loading CSV from: ${csvFilePath}`);

const rows = [];

// Load CSV
fs.createReadStream(csvFilePath)
  .pipe(csv())
  .on("data", (row) => {
    row.sepal_length = parseFloat(row.sepal_length);
    row.sepal_width  = parseFloat(row.sepal_width);
    row.petal_length = parseFloat(row.petal_length);
    row.petal_width  = parseFloat(row.petal_width);
    rows.push(row);
  })
  .on("end", () => {
    console.log("Loaded rows:", rows.length);

    if (!fs.existsSync("results")) {
      fs.mkdirSync("results", { recursive: true });
    }

    // Summary helpers
    function mean(arr) {
      return arr.reduce((a, b) => a + b, 0) / arr.length;
    }

    function min(arr) {
      let m = Infinity;
      for (const v of arr) if (v < m) m = v;
      return m;
    }

    function max(arr) {
      let m = -Infinity;
      for (const v of arr) if (v > m) m = v;
      return m;
    }

    let summaryText = `Total rows: ${rows.length}\n`;

    const numericColumns = [
      "sepal_length",
      "sepal_width",
      "petal_length",
      "petal_width"
    ];

    numericColumns.forEach(col => {
      const values = rows.map(r => r[col]).filter(v => !isNaN(v));

      summaryText += `\n${col}:\n`;
      summaryText += `  mean: ${mean(values)}\n`;
      summaryText += `  min:  ${min(values)}\n`;
      summaryText += `  max:  ${max(values)}\n`;
    });

    fs.writeFileSync("results/summary_stats.txt", summaryText);

    // Species group means
    const speciesGroups = {};
    rows.forEach(r => {
      const s = r.species;
      if (!speciesGroups[s]) speciesGroups[s] = [];
      speciesGroups[s].push(r);
    });

    let speciesMeansText = "";

    for (const species in speciesGroups) {
      const group = speciesGroups[species];

      speciesMeansText += `${species}:\n`;
      speciesMeansText += `  sepal_length: ${mean(group.map(r => r.sepal_length))}\n`;
      speciesMeansText += `  sepal_width:  ${mean(group.map(r => r.sepal_width))}\n`;
      speciesMeansText += `  petal_length: ${mean(group.map(r => r.petal_length))}\n`;
      speciesMeansText += `  petal_width:  ${mean(group.map(r => r.petal_width))}\n\n`;
    }

    fs.writeFileSync("results/species_means.txt", speciesMeansText);

    // Correlation
    function correlation(xs, ys) {
      const xMean = mean(xs), yMean = mean(ys);

      let num = 0;
      let sx = 0;
      let sy = 0;

      for (let i = 0; i < xs.length; i++) {
        const dx = xs[i] - xMean;
        const dy = ys[i] - yMean;
        num += dx * dy;
        sx += dx * dx;
        sy += dy * dy;
      }

      return num / Math.sqrt(sx * sy);
    }

    const corrValue = correlation(
      rows.map(r => r.sepal_length),
      rows.map(r => r.petal_length)
    );

    fs.writeFileSync(
      "results/correlation_matrix.txt",
      `Correlation (sepal_length vs petal_length): ${corrValue}\n`
    );

    console.log("Results written to results/ directory.");
  })
  .on("error", (err) => {
    console.error("Error reading CSV:", err);
  });

package.json file

{
  "name": "iris-analysis",
  "version": "1.0.0",
  "type": "commonjs",
  "dependencies": {
    "csv-parser": "^3.0.0",
    "dotenv": "^16.0.0"
  }
}

package-lock.json file

{
  "name": "iris-analysis",
  "version": "1.0.0",
  "lockfileVersion": 3,
  "requires": true,
  "packages": {
    "": {
      "name": "iris-analysis",
      "version": "1.0.0",
      "dependencies": {
        "csv-parser": "^3.0.0"
      }
    },
    "node_modules/csv-parser": {
      "version": "3.2.0",
      "resolved": "https://registry.npmjs.org/csv-parser/-/csv-parser-3.2.0.tgz",
      "integrity": "sha512-fgKbp+AJbn1h2dcAHKIdKNSSjfp43BZZykXsCjzALjKy80VXQNHPFJ6T9Afwdzoj24aMkq8GwDS7KGcDPpejrA==",
      "license": "MIT",
      "bin": {
        "csv-parser": "bin/csv-parser"
      },
      "engines": {
        "node": ">= 10"
      }
    },
    "node_modules/dotenv": {
      "version": "16.4.7",
      "resolved": "https://registry.npmjs.org/dotenv/-/dotenv-16.4.7.tgz",
      "integrity": "sha512-47qPchRCykZC03FhkYAhrvwU4xDBFIj1QPqaarj6mdM/hgUzfPHcpkHJOn3mJAufFeeAxAzeGsr5X0M4k6fLZQ==",
      "license": "BSD-2-Clause",
      "engines": {
        "node": ">=12"
      },
      "funding": {
        "url": "https://dotenvx.com"
      }
    }
  }
}

Logs

Finally, the log system has been improved. In the event that the experiment does not fail, the user’s “console.log()” in the script will be logged. In the case of an error, the logs save the exact error that caused the execution to fail.
In case the main.js execution fails, you will be able to see the exact reason of the failure (wrongly defined variables, unexpected indentations…).
In the case where the creation of the child container is not successful the logs will contain the reason for the failure (incompatible versions in the requirements txt, non-existing package versions…).\

You can find some examples at the templates section.