-
-
Notifications
You must be signed in to change notification settings - Fork 2.6k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
98 changed files
with
2,917 additions
and
839 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,113 @@ | ||
const { v4 } = require("uuid"); | ||
const xlsx = require("node-xlsx").default; | ||
const path = require("path"); | ||
const fs = require("fs"); | ||
const { | ||
createdDate, | ||
trashFile, | ||
writeToServerDocuments, | ||
} = require("../../utils/files"); | ||
const { tokenizeString } = require("../../utils/tokenizer"); | ||
const { default: slugify } = require("slugify"); | ||
|
||
function convertToCSV(data) { | ||
return data | ||
.map((row) => | ||
row | ||
.map((cell) => { | ||
if (cell === null || cell === undefined) return ""; | ||
if (typeof cell === "string" && cell.includes(",")) | ||
return `"${cell}"`; | ||
return cell; | ||
}) | ||
.join(",") | ||
) | ||
.join("\n"); | ||
} | ||
|
||
async function asXlsx({ fullFilePath = "", filename = "" }) { | ||
const documents = []; | ||
const folderName = slugify(`${path.basename(filename)}-${v4().slice(0, 4)}`, { | ||
lower: true, | ||
trim: true, | ||
}); | ||
|
||
const outFolderPath = | ||
process.env.NODE_ENV === "development" | ||
? path.resolve( | ||
__dirname, | ||
`../../../server/storage/documents/${folderName}` | ||
) | ||
: path.resolve(process.env.STORAGE_DIR, `documents/${folderName}`); | ||
|
||
try { | ||
const workSheetsFromFile = xlsx.parse(fullFilePath); | ||
if (!fs.existsSync(outFolderPath)) | ||
fs.mkdirSync(outFolderPath, { recursive: true }); | ||
|
||
for (const sheet of workSheetsFromFile) { | ||
try { | ||
const { name, data } = sheet; | ||
const content = convertToCSV(data); | ||
|
||
if (!content?.length) { | ||
console.warn(`Sheet "${name}" is empty. Skipping.`); | ||
continue; | ||
} | ||
|
||
console.log(`-- Processing sheet: ${name} --`); | ||
const sheetData = { | ||
id: v4(), | ||
url: `file://${path.join(outFolderPath, `${slugify(name)}.csv`)}`, | ||
title: `${filename} - Sheet:${name}`, | ||
docAuthor: "Unknown", | ||
description: `Spreadsheet data from sheet: ${name}`, | ||
docSource: "an xlsx file uploaded by the user.", | ||
chunkSource: "", | ||
published: createdDate(fullFilePath), | ||
wordCount: content.split(/\s+/).length, | ||
pageContent: content, | ||
token_count_estimate: tokenizeString(content).length, | ||
}; | ||
|
||
const document = writeToServerDocuments( | ||
sheetData, | ||
`sheet-${slugify(name)}`, | ||
outFolderPath | ||
); | ||
documents.push(document); | ||
console.log( | ||
`[SUCCESS]: Sheet "${name}" converted & ready for embedding.` | ||
); | ||
} catch (err) { | ||
console.error(`Error processing sheet "${name}":`, err); | ||
continue; | ||
} | ||
} | ||
} catch (err) { | ||
console.error("Could not process xlsx file!", err); | ||
return { | ||
success: false, | ||
reason: `Error processing ${filename}: ${err.message}`, | ||
documents: [], | ||
}; | ||
} finally { | ||
trashFile(fullFilePath); | ||
} | ||
|
||
if (documents.length === 0) { | ||
console.error(`No valid sheets found in ${filename}.`); | ||
return { | ||
success: false, | ||
reason: `No valid sheets found in ${filename}.`, | ||
documents: [], | ||
}; | ||
} | ||
|
||
console.log( | ||
`[SUCCESS]: ${filename} fully processed. Created ${documents.length} document(s).\n` | ||
); | ||
return { success: true, reason: null, documents }; | ||
} | ||
|
||
module.exports = asXlsx; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2326,6 +2326,13 @@ node-html-parser@^6.1.13: | |
css-select "^5.1.0" | ||
he "1.2.0" | ||
|
||
node-xlsx@^0.24.0: | ||
version "0.24.0" | ||
resolved "https://registry.yarnpkg.com/node-xlsx/-/node-xlsx-0.24.0.tgz#a6a365acb18ad37c66c2b254b6ebe0c22dc9dc6f" | ||
integrity sha512-1olwK48XK9nXZsyH/FCltvGrQYvXXZuxVitxXXv2GIuRm51aBi1+5KwR4rWM4KeO61sFU+00913WLZTD+AcXEg== | ||
dependencies: | ||
xlsx "https://cdn.sheetjs.com/xlsx-0.20.2/xlsx-0.20.2.tgz" | ||
|
||
[email protected]: | ||
version "6.9.13" | ||
resolved "https://registry.yarnpkg.com/nodemailer/-/nodemailer-6.9.13.tgz#5b292bf1e92645f4852ca872c56a6ba6c4a3d3d6" | ||
|
@@ -3528,6 +3535,10 @@ [email protected]: | |
resolved "https://registry.yarnpkg.com/ws/-/ws-8.14.2.tgz#6c249a806eb2db7a20d26d51e7709eab7b2e6c7f" | ||
integrity sha512-wEBG1ftX4jcglPxgFCMJmZ2PLtSbJ2Peg6TmpJFTbe9GZYOQCDPdMYu/Tm0/bGZkw8paZnJY45J4K2PZrLYq8g== | ||
|
||
"xlsx@https://cdn.sheetjs.com/xlsx-0.20.2/xlsx-0.20.2.tgz": | ||
version "0.20.2" | ||
resolved "https://cdn.sheetjs.com/xlsx-0.20.2/xlsx-0.20.2.tgz#0f64eeed3f1a46e64724620c3553f2dbd3cd2d7d" | ||
|
||
xml2js@^0.6.2: | ||
version "0.6.2" | ||
resolved "https://registry.yarnpkg.com/xml2js/-/xml2js-0.6.2.tgz#dd0b630083aa09c161e25a4d0901e2b2a929b499" | ||
|
Oops, something went wrong.