Skip to content

Commit 4020ce4

Browse files
committedFeb 16, 2023
skip rows
1 parent 3fed19e commit 4020ce4

File tree

5 files changed

+26
-3
lines changed

5 files changed

+26
-3
lines changed
 

‎tap_sftp/singer_encodings/csv_handler.py

+3
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,9 @@ def get_row_iterator(iterable, options=None):
3131
which can be used to yield CSV rows."""
3232
options = options or {}
3333

34+
for i in range(options.get('skip_rows', 0)):
35+
iterable.__next__()
36+
3437
# Replace any NULL bytes in the line given to the DictReader
3538
reader = csv.DictReader(
3639
io.TextIOWrapper(iterable, encoding=options.get('encoding', 'utf-8')),

‎tap_sftp/singer_encodings/json_schema.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,8 @@ def sample_file(conn, table_spec, f, sample_rate, max_records, config):
4242
'delimiter': table_spec.get('delimiter', ','),
4343
'file_name': f['filepath'],
4444
'encoding': table_spec.get('encoding', 'utf-8'),
45-
'sanitize_header': table_spec.get('sanitize_header', False)}
45+
'sanitize_header': table_spec.get('sanitize_header', False),
46+
'skip_rows': table_spec.get('skip_rows', 0)}
4647

4748
readers = csv_handler.get_row_iterators(file_handle, options=opts, infer_compression=True)
4849

‎tap_sftp/sync.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,8 @@ def sync_file(sftp_file_spec, stream, table_spec, config, sftp_client):
6464
'delimiter': table_spec.get('delimiter', ','),
6565
'file_name': sftp_file_spec['filepath'],
6666
'encoding': table_spec.get('encoding', 'utf-8'),
67-
'sanitize_header': table_spec.get('sanitize_header', False)}
67+
'sanitize_header': table_spec.get('sanitize_header', False),
68+
'clean_colnames': table_spec.get('clean_colnames', False)}
6869

6970
readers = csv_handler.get_row_iterators(file_handle, options=opts, infer_compression=True)
7071

+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
# Skip reading this row
2+
id,col1
3+
data1,data2

‎tests/tox_tests/test_med_00_parse_csv.py

+16-1
Original file line numberDiff line numberDiff line change
@@ -29,4 +29,19 @@ def test_no_sanitized_headers():
2929

3030
parser = csv_handler.get_row_iterator(file, options=options)
3131

32-
assert parser.fieldnames == ['id', 'Col($2)']
32+
assert parser.fieldnames == ['id', 'Col($2)']
33+
34+
35+
def test_skip_header():
36+
"""Test the parser."""
37+
options = {
38+
'delimiter': ',',
39+
'key_properties': ['id'],
40+
'skip_rows': 1,
41+
}
42+
43+
with open(get_sample_file_path('skip_rows_file.csv'), 'rb') as file:
44+
45+
parser = csv_handler.get_row_iterator(file, options=options)
46+
47+
assert parser.fieldnames == ['id', 'col1']

0 commit comments

Comments
 (0)
Please sign in to comment.