lab-intent-classifier/utils/convert_to_csv.js

34 lines
996 B
JavaScript
Raw Normal View History

2025-01-07 21:59:48 -08:00
const fs = require("fs");
const path = require("path");
// Input and output file paths
const inputFilePath = path.join(__dirname, "data/gpt-dataset.txt");
const outputFilePath = path.join(__dirname, "data/dataset.csv");
// Function to convert labeled data to CSV format
function convertToCSV(inputFile, outputFile) {
try {
// Read the input file
const data = fs.readFileSync(inputFile, "utf8");
// Use regex to extract labels and text
const matches = [...data.matchAll(/__(label__[^ ]+) (.+)/g)];
// Create CSV content
let csvContent = "Label,Text\n";
matches.forEach((match) => {
const [, label, text] = match;
csvContent += `${label},"${text.replace(/"/g, '""')}"\n`;
});
// Write to the output CSV file
fs.writeFileSync(outputFile, csvContent);
console.log(`CSV file has been saved at ${outputFile}`);
} catch (error) {
console.error("Error:", error);
}
}
// Run the function
convertToCSV(inputFilePath, outputFilePath);