Skip to content

Commit f7a7055

Browse files
authored
Merge pull request #46 from datasets/scripts
[fix][m] Fixing newline behaviour \r\n to \n
2 parents edc2dcd + 34755c5 commit f7a7055

File tree

1 file changed

+9
-1
lines changed

1 file changed

+9
-1
lines changed

scripts/prepare.py

+9-1
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,14 @@
99

1010
data_file_path = os.path.join('data', 'subdivision-codes.csv')
1111

12+
def fix_crlf(csv_file, output_file):
13+
# Read the CSV file and replace \r\n with \n
14+
with open(csv_file, "rb") as f:
15+
content = f.read().replace(b"\r\n", b"\n")
16+
17+
with open(output_file, "wb") as f:
18+
f.write(content)
19+
1220
def fix_multiline_csv(file_path):
1321
with open(file_path, 'r', encoding='utf-8') as infile:
1422
lines = infile.readlines()
@@ -168,7 +176,7 @@ def process(extracted_files):
168176
# Loop over the file paths and call remove_double_quotes for each
169177
for file_path in file_paths:
170178
remove_double_quotes(file_path)
171-
179+
fix_crlf(file_path, file_path)
172180
fix_multiline_csv(data_file_path)
173181

174182
for file_path in cleaned_files:

0 commit comments

Comments
 (0)