-
-
Notifications
You must be signed in to change notification settings - Fork 100
Description
Hi, I'm working on a tool that extracts info from an xlsx file (3 columns: filename, character and timeconstraint, for use in game audio localisation, the columns come from the script) and uses this to check a delivery of audio fles or prepare batches of audio files based on those 3 values. When working with files of about 2500 rows in the xlsx file, the app runs fine, but when the files get bigger, the speed decrease rapidly. It takes a full minute to load the data from a file with 47.000 rows for example. Any thoughts on how this process can be sped up? Right now I'm parsing the data to a Core Data Model in SwiftUI/Xcode for a macOS app, using the following code:
func readXLSXFile(fileURL: URL, filenameColumn: String, characterColumn: String, timeConstraintColumn: String, completion: @escaping () -> Void) {
isParsing = true
DispatchQueue.global(qos: .userInitiated).async { [unowned self] in
do {
guard let file = XLSXFile(filepath: fileURL.path),
let sharedStrings = try file.parseSharedStrings(),
let firstWorksheet = try file.parseWorksheetPaths().first else {
DispatchQueue.main.async {
print("Error: File could not be opened or worksheet not found.")
self.isParsing = false
completion()
}
return
}
let worksheet = try file.parseWorksheet(at: firstWorksheet)
let rows = Array(worksheet.data?.rows.dropFirst(headerRows) ?? [])
let rowCount = rows.count
let chunkSize = 2500 // Adjust chunk size based on your needs
DispatchQueue.concurrentPerform(iterations: (rowCount + chunkSize - 1) / chunkSize) { index in
let start = index * chunkSize
let end = min(start + chunkSize, rowCount)
if start < rowCount { // Ensure the start index is within the row count
let range = start..<end
let chunkRows = Array(rows[range])
let context = PersistenceController.shared.newBackgroundContext()
context.performAndWait {
for row in chunkRows {
self.processRow(row, in: context, with: sharedStrings, filenameColumn: filenameColumn, characterColumn: characterColumn, timeConstraintColumn: timeConstraintColumn)
}
try? context.save()
context.reset()
}
}
}
DispatchQueue.main.async {
self.isParsing = false
completion() // Signal that the operation has completed
}
} catch {
DispatchQueue.main.async {
print("Error parsing file: \(error)")
self.isParsing = false
completion()
}
}
}
}
private func processRow(_ row: Row, in context: NSManagedObjectContext, with sharedStrings: SharedStrings, filenameColumn: String, characterColumn: String, timeConstraintColumn: String) {
let filenameCell = row.cells.first(where: { $0.reference.column == ColumnReference(filenameColumn) })
let characterCell = row.cells.first(where: { $0.reference.column == ColumnReference(characterColumn) })
let timeConstraintCell = row.cells.first(where: { $0.reference.column == ColumnReference(timeConstraintColumn) })
let filename = filenameCell?.stringValue(sharedStrings) ?? ""
let character = characterCell?.stringValue(sharedStrings) ?? ""
let timeConstraint = timeConstraintCell?.stringValue(sharedStrings) ?? ""
let record = Record(context: context) // Create a new Record instance
record.filename = filename
record.character = character
record.timeConstraint = timeConstraint
}
private func batchInsertRecords(records: [Record], into context: NSManagedObjectContext) {
for record in records {
context.insert(record)
}
do {
try context.save()
context.reset() // Clear the context to free memory
} catch {
print("Failed to batch insert records: \(error)")
}
}