Skip to content

Commit 2c3b888

Browse files
authored
perf: csv import (#746)
* feat: using worker parse csv * fix: import multiple column error * feat: update webpack config for import worker * fix: vitest worker file path error * fix: excel import missing key * feat: using `convertCellValue2DBValue` transfer cellvalue * feat: add workerId escape conflict * fix: sqlite e2e error * feat: compact filter input
1 parent 1dc5216 commit 2c3b888

File tree

12 files changed

+482
-98
lines changed

12 files changed

+482
-98
lines changed

apps/nestjs-backend/package.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,12 +74,12 @@
7474
"@types/oauth2orize": "1.11.5",
7575
"@types/papaparse": "5.3.14",
7676
"@types/passport": "1.0.16",
77-
"@types/passport-openidconnect": "0.1.3",
7877
"@types/passport-github2": "1.2.9",
7978
"@types/passport-google-oauth20": "2.0.14",
8079
"@types/passport-jwt": "4.0.1",
8180
"@types/passport-local": "1.0.38",
8281
"@types/passport-oauth2-client-password": "0.1.5",
82+
"@types/passport-openidconnect": "0.1.3",
8383
"@types/pause": "0.1.3",
8484
"@types/sharedb": "3.3.10",
8585
"@types/ws": "8.5.10",
@@ -152,6 +152,7 @@
152152
"cookie-parser": "1.4.6",
153153
"cors": "2.8.5",
154154
"dayjs": "1.11.10",
155+
"esbuild": "0.23.0",
155156
"express": "4.19.1",
156157
"express-session": "1.18.0",
157158
"fs-extra": "11.2.0",

apps/nestjs-backend/src/features/import/open-api/import-open-api.service.ts

Lines changed: 96 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
1+
import { join } from 'path';
2+
import { Worker } from 'worker_threads';
13
import { Injectable, Logger, BadRequestException } from '@nestjs/common';
24
import type { IFieldRo } from '@teable/core';
3-
import { FieldType, FieldKeyType } from '@teable/core';
5+
import { FieldType, FieldKeyType, getRandomString } from '@teable/core';
46
import { PrismaService } from '@teable/db-main-prisma';
57
import type {
68
IAnalyzeRo,
@@ -172,77 +174,106 @@ export class ImportOpenApiService {
172174
sourceColumnMap?: Record<string, number | null>;
173175
}
174176
) {
175-
const { skipFirstNLines, sheetKey, notification } = options;
177+
const { sheetKey, notification } = options;
176178
const { columnInfo, fields, sourceColumnMap } = recordsCal;
177179

178-
importer.parse(
179-
{
180-
skipFirstNLines,
181-
key: sheetKey,
180+
const workerId = `worker_${getRandomString(8)}`;
181+
const worker = new Worker(join(process.cwd(), 'dist', 'worker', 'parse.js'), {
182+
workerData: {
183+
config: importer.getConfig(),
184+
options: {
185+
key: options.sheetKey,
186+
notification: options.notification,
187+
skipFirstNLines: options.skipFirstNLines,
188+
},
189+
id: workerId,
182190
},
183-
async (result) => {
184-
const currentResult = result[sheetKey];
185-
// fill data
186-
const records = currentResult.map((row) => {
187-
const res: { fields: Record<string, unknown> } = {
188-
fields: {},
189-
};
190-
// import new table
191-
if (columnInfo) {
192-
columnInfo.forEach((col, index) => {
193-
const { sourceColumnIndex } = col;
194-
// empty row will be return void row value
195-
const value = Array.isArray(row) ? row[sourceColumnIndex] : null;
196-
res.fields[fields[index].id] = value?.toString();
197-
});
198-
}
199-
// inplace records
200-
if (sourceColumnMap) {
201-
for (const [key, value] of Object.entries(sourceColumnMap)) {
202-
if (value !== null) {
203-
const { type } = fields.find((f) => f.id === key) || {};
204-
// link value should be string
205-
res.fields[key] = type === FieldType.Link ? toString(row[value]) : row[value];
191+
});
192+
193+
worker.on('message', async (result) => {
194+
const { type, data, chunkId, id } = result;
195+
switch (type) {
196+
case 'chunk': {
197+
const currentResult = (data as Record<string, unknown[][]>)[sheetKey];
198+
// fill data
199+
const records = currentResult.map((row) => {
200+
const res: { fields: Record<string, unknown> } = {
201+
fields: {},
202+
};
203+
// import new table
204+
if (columnInfo) {
205+
columnInfo.forEach((col, index) => {
206+
const { sourceColumnIndex } = col;
207+
// empty row will be return void row value
208+
const value = Array.isArray(row) ? row[sourceColumnIndex] : null;
209+
res.fields[fields[index].id] = value?.toString();
210+
});
211+
}
212+
// inplace records
213+
if (sourceColumnMap) {
214+
for (const [key, value] of Object.entries(sourceColumnMap)) {
215+
if (value !== null) {
216+
const { type } = fields.find((f) => f.id === key) || {};
217+
// link value should be string
218+
res.fields[key] = type === FieldType.Link ? toString(row[value]) : row[value];
219+
}
206220
}
207221
}
208-
}
209-
return res;
210-
});
211-
if (records.length === 0) {
212-
return;
213-
}
214-
try {
215-
const createFn = columnInfo
216-
? this.recordOpenApiService.createRecordsOnlySql.bind(this.recordOpenApiService)
217-
: this.recordOpenApiService.multipleCreateRecords.bind(this.recordOpenApiService);
218-
await createFn(table.id, {
219-
fieldKeyType: FieldKeyType.Id,
220-
typecast: true,
221-
records,
222+
return res;
222223
});
223-
} catch (e) {
224-
this.logger.error((e as Error)?.message, (e as Error)?.stack);
225-
throw e;
224+
if (records.length === 0) {
225+
return;
226+
}
227+
try {
228+
const createFn = columnInfo
229+
? this.recordOpenApiService.createRecordsOnlySql.bind(this.recordOpenApiService)
230+
: this.recordOpenApiService.multipleCreateRecords.bind(this.recordOpenApiService);
231+
workerId === id &&
232+
(await createFn(table.id, {
233+
fieldKeyType: FieldKeyType.Id,
234+
typecast: true,
235+
records,
236+
}));
237+
worker.postMessage({ type: 'done', chunkId });
238+
} catch (e) {
239+
this.logger.error((e as Error)?.message, (e as Error)?.stack);
240+
throw e;
241+
}
242+
break;
226243
}
227-
},
228-
() => {
229-
notification &&
230-
this.notificationService.sendImportResultNotify({
231-
baseId,
232-
tableId: table.id,
233-
toUserId: userId,
234-
message: `🎉 ${table.name} ${sourceColumnMap ? 'inplace' : ''} imported successfully`,
235-
});
236-
},
237-
(error) => {
238-
notification &&
239-
this.notificationService.sendImportResultNotify({
240-
baseId,
241-
tableId: table.id,
242-
toUserId: userId,
243-
message: `❌ ${table.name} import failed: ${error}`,
244-
});
244+
case 'finished':
245+
workerId === id &&
246+
notification &&
247+
this.notificationService.sendImportResultNotify({
248+
baseId,
249+
tableId: table.id,
250+
toUserId: userId,
251+
message: `🎉 ${table.name} ${sourceColumnMap ? 'inplace' : ''} imported successfully`,
252+
});
253+
break;
254+
case 'error':
255+
workerId === id &&
256+
notification &&
257+
this.notificationService.sendImportResultNotify({
258+
baseId,
259+
tableId: table.id,
260+
toUserId: userId,
261+
message: `❌ ${table.name} import failed: ${data}`,
262+
});
263+
break;
245264
}
246-
);
265+
});
266+
worker.on('error', (e) => {
267+
notification &&
268+
this.notificationService.sendImportResultNotify({
269+
baseId,
270+
tableId: table.id,
271+
toUserId: userId,
272+
message: `❌ ${table.name} import failed: ${e.message}`,
273+
});
274+
});
275+
worker.on('exit', (code) => {
276+
this.logger.log(`Worker stopped with exit code ${code}`);
277+
});
247278
}
248279
}

apps/nestjs-backend/src/features/import/open-api/import.class.ts

Lines changed: 31 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -33,10 +33,11 @@ const validateZodSchemaMap: Record<IValidateTypes, ZodType> = {
3333
[FieldType.SingleLineText]: z.string(),
3434
};
3535

36-
interface IImportConstructorParams {
36+
export interface IImportConstructorParams {
3737
url: string;
3838
type: SUPPORTEDTYPE;
3939
maxRowCount?: number;
40+
fileName?: string;
4041
}
4142

4243
interface IParseResult {
@@ -74,6 +75,14 @@ export abstract class Importer {
7475
]
7576
): Promise<IParseResult>;
7677

78+
private setFileNameFromHeader(fileName: string) {
79+
this.config.fileName = fileName;
80+
}
81+
82+
getConfig() {
83+
return this.config;
84+
}
85+
7786
async getFile() {
7887
const { url, type } = this.config;
7988
const { body: stream, headers } = await fetch(url);
@@ -97,12 +106,29 @@ export abstract class Importer {
97106
);
98107
}
99108

100-
return stream;
109+
const contentDisposition = headers.get('content-disposition');
110+
let fileName = 'Import Table.csv';
111+
112+
if (contentDisposition) {
113+
const fileNameMatch =
114+
contentDisposition.match(/filename\*=UTF-8''([^;]+)/) ||
115+
contentDisposition.match(/filename="?([^"]+)"?/);
116+
if (fileNameMatch) {
117+
fileName = fileNameMatch[1];
118+
}
119+
}
120+
121+
const finalFileName = fileName.split('.').shift() as string;
122+
123+
this.setFileNameFromHeader(decodeURIComponent(finalFileName));
124+
125+
return { stream, fileName: finalFileName };
101126
}
102127

103128
async genColumns() {
104129
const supportTypes = Importer.SUPPORTEDTYPE;
105130
const parseResult = await this.parse();
131+
const { fileName, type } = this.config;
106132
const result: IAnalyzeVo['worksheets'] = {};
107133

108134
for (const [sheetName, cols] of Object.entries(parseResult)) {
@@ -154,7 +180,7 @@ export abstract class Importer {
154180
});
155181

156182
result[sheetName] = {
157-
name: sheetName,
183+
name: type === SUPPORTEDTYPE.EXCEL ? sheetName : fileName ? fileName : sheetName,
158184
columns: calculatedColumnHeaders,
159185
};
160186
}
@@ -185,7 +211,7 @@ export class CsvImporter extends Importer {
185211
]
186212
): Promise<unknown> {
187213
const [options, chunkCb, onFinished, onError] = args;
188-
const stream = await this.getFile();
214+
const { stream } = await this.getFile();
189215

190216
// chunk parse
191217
if (options && chunkCb) {
@@ -299,7 +325,7 @@ export class ExcelImporter extends Importer {
299325
onFinished?: () => void,
300326
onError?: (errorMsg: string) => void
301327
): Promise<unknown> {
302-
const fileSteam = await this.getFile();
328+
const { stream: fileSteam } = await this.getFile();
303329

304330
const asyncRs = async (stream: NodeJS.ReadableStream): Promise<IParseResult> =>
305331
new Promise((res, rej) => {

apps/nestjs-backend/src/features/record/record.service.ts

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -676,18 +676,19 @@ export class RecordService {
676676
await this.creditCheck(tableId);
677677
const dbTableName = await this.getDbTableName(tableId);
678678
const fields = await this.getFieldsByProjection(tableId);
679-
const fieldsMap = fields.reduce(
679+
const fieldInstanceMap = fields.reduce(
680680
(map, curField) => {
681-
map[curField.id] = curField.dbFieldName;
681+
map[curField.id] = curField;
682682
return map;
683683
},
684-
{} as Record<string, string>
684+
{} as Record<string, IFieldInstance>
685685
);
686686

687687
const newRecords = records.map((record) => {
688688
const fieldsValues: Record<string, unknown> = {};
689689
Object.entries(record.fields).forEach(([fieldId, value]) => {
690-
fieldsValues[fieldsMap[fieldId]] = value;
690+
const fieldInstance = fieldInstanceMap[fieldId];
691+
fieldsValues[fieldInstance.dbFieldName] = fieldInstance.convertCellValue2DBValue(value);
691692
});
692693
return {
693694
__id: generateRecordId(),
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
import { parentPort, workerData } from 'worker_threads';
2+
import { getRandomString } from '@teable/core';
3+
import type { IImportConstructorParams } from '../features/import/open-api/import.class';
4+
import { importerFactory } from '../features/import/open-api/import.class';
5+
6+
const parse = () => {
7+
const { config, options, id } = { ...workerData } as {
8+
config: IImportConstructorParams;
9+
options: {
10+
skipFirstNLines: number;
11+
key: string;
12+
};
13+
id: string;
14+
};
15+
const importer = importerFactory(config.type, config);
16+
importer.parse(
17+
{ ...options },
18+
async (chunk) => {
19+
return await new Promise((resolve) => {
20+
const chunkId = `chunk_${getRandomString(8)}`;
21+
parentPort?.postMessage({ type: 'chunk', data: chunk, chunkId, id });
22+
parentPort?.on('message', (result) => {
23+
const { type, chunkId: tunnelChunkId } = result;
24+
if (type === 'done' && tunnelChunkId === chunkId) {
25+
resolve();
26+
}
27+
});
28+
});
29+
},
30+
() => {
31+
parentPort?.postMessage({ type: 'finished', id });
32+
parentPort?.close();
33+
},
34+
(error) => {
35+
parentPort?.postMessage({ type: 'error', data: error, id });
36+
parentPort?.close();
37+
}
38+
);
39+
};
40+
41+
parse();

apps/nestjs-backend/test/table-import.e2e-spec.ts

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,7 @@ const genTestFiles = async () => {
124124

125125
const {
126126
data: { presignedUrl },
127-
} = await apiNotify(token);
127+
} = await apiNotify(token, undefined, 'Import Table.csv');
128128

129129
result[format] = {
130130
path: tmpPath,
@@ -222,6 +222,8 @@ describe('OpenAPI ImportController (e2e)', () => {
222222
});
223223

224224
describe('/import/{baseId} OpenAPI ImportController (e2e) (Post)', () => {
225+
const delay = (ms: number) => new Promise((resolve) => setTimeout(resolve, ms));
226+
225227
it.each(testFileFormats.filter((format) => format !== TestFileFormat.TXT))(
226228
'should create a new Table from %s file',
227229
async (format) => {
@@ -266,10 +268,15 @@ describe('OpenAPI ImportController (e2e)', () => {
266268
name: field.name,
267269
}));
268270

269-
await apiGetTableById(baseId, table.data[0].id);
271+
await delay(1000);
272+
273+
const { records } = await apiGetTableById(baseId, table.data[0].id, {
274+
includeContent: true,
275+
});
270276

271277
bases.push([baseId, id]);
272278

279+
expect(records?.length).toBe(2);
273280
expect(createdFields).toEqual(assertHeaders);
274281
}
275282
);

0 commit comments

Comments
 (0)