Skip to content

Commit 9d837f9

Browse files
committed
add: skip flag on commands
1 parent 24dbf20 commit 9d837f9

File tree

6 files changed

+144
-107
lines changed

6 files changed

+144
-107
lines changed

openvariant/commands/openvar.py

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,10 @@ def openvar():
2424
help="Annotation path. eg: /path/annotation_vcf.yaml")
2525
@click.option('--header', is_flag=True, help="Show the result header.")
2626
@click.option('--output', '-o', default=None, help="File to write the output.")
27-
def cat(input_path: str, where: str or None, annotations: str or None, header: bool, output: str or None):
27+
@click.option('--skip', is_flag=True, help="Skip files and directories that are unreadable.")
28+
def cat(input_path: str, where: str or None, annotations: str or None, header: bool, output: str or None, skip: bool):
2829
"""Print the parsed files on the stdout/"output"."""
29-
cat_task(input_path, annotations, where, header, output)
30+
cat_task(input_path, annotations, where, header, output, skip)
3031

3132

3233
@openvar.command(name="count", short_help='Number of rows that matches a specified criterion.')
@@ -38,10 +39,11 @@ def cat(input_path: str, where: str or None, annotations: str or None, header: b
3839
@click.option('--cores', '-c', type=click.INT, default=cpu_count(), help='Maximum processes to run in parallel.')
3940
@click.option('--quite', '-q', is_flag=True, help="Don't show the progress.")
4041
@click.option('--output', '-o', default=None, help="File to write the output.")
42+
@click.option('--skip', is_flag=True, help="Skip files and directories that are unreadable.")
4143
def count(input_path: str, where: str, group_by: str, cores: int, quite: bool, annotations: str or None,
42-
output: str or None) -> None:
44+
output: str or None, skip: bool) -> None:
4345
"""Print on the stdout/"output" the number of rows that meets the criteria."""
44-
result = count_task(input_path, annotations, group_by=group_by, where=where, cores=cores, quite=quite)
46+
result = count_task(input_path, annotations, group_by=group_by, where=where, cores=cores, quite=quite, skip_files=skip)
4547
out_file = None
4648
if output:
4749
out_file = open(output, "w")
@@ -74,14 +76,15 @@ def count(input_path: str, where: str, group_by: str, cores: int, quite: bool, a
7476
@click.option('--cores', '-c', type=click.INT, default=cpu_count(), help='Maximum processes to run in parallel.')
7577
@click.option('--quite', '-q', is_flag=True, help="Don't show the progress.")
7678
@click.option('--output', '-o', help="File to write the output.", default=None)
79+
@click.option('--skip', is_flag=True, help="Skip files and directories that are unreadable.")
7780
def groupby(input_path: str, script: str, where: str, group_by: str, cores: int, quite: bool, annotations: str or None,
78-
header: bool, show: bool, output: str or None):
81+
header: bool, show: bool, output: str or None, skip: bool):
7982
"""Print on the stdout/"output" the parsed files group by a specified field."""
8083
out_file = None
8184
if output:
8285
out_file = open(output, 'w')
8386
for group_key, group_result, command in group_by_task(input_path, annotations, script, key_by=group_by, where=where,
84-
cores=cores, quite=quite, header=header):
87+
cores=cores, quite=quite, header=header, skip_files=skip):
8588
for r in group_result:
8689
if command:
8790
if output:

openvariant/find_files/find_files.py

Lines changed: 24 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from os import listdir
88
from os.path import isfile, join, isdir, dirname
99
from typing import Generator
10+
import warnings
1011

1112
from openvariant.annotation.annotation import Annotation
1213
from openvariant.annotation.config_annotation import ANNOTATION_EXTENSION
@@ -23,7 +24,7 @@ def _get_annotation(file_path, annotation):
2324
raise AttributeError("Unable to parse annotation file, check its location.")
2425

2526

26-
def _scan_files(base_path: str, annotation: Annotation, fix: bool):
27+
def _scan_files(base_path: str, annotation: Annotation, fix: bool, skip_files: bool):
2728
"""Recursive exploration from a base path"""
2829
if isdir(base_path):
2930
if not fix:
@@ -34,22 +35,32 @@ def _scan_files(base_path: str, annotation: Annotation, fix: bool):
3435
for file_name in list_files:
3536
file_path = join(base_path, file_name)
3637
try:
37-
for f, a in _scan_files(file_path, annotation, fix):
38+
for f, a in _scan_files(file_path, annotation, fix, skip_files):
3839
yield f, a
39-
except PermissionError as e:
40-
raise PermissionError(f"Unable to open {file_name}: {e}")
40+
except PermissionError:
41+
if skip_files:
42+
warnings.warn(f"Permission denied on {file_path}", UserWarning)
43+
else:
44+
raise PermissionError(f"Permission denied on {file_path}")
4145
elif isfile(base_path):
4246
file_path = base_path
4347
try:
4448
for f, a in _get_annotation(file_path, annotation):
4549
yield f, a
46-
except PermissionError as e:
47-
raise PermissionError(f"Unable to open {base_path}: {e}")
50+
except PermissionError:
51+
if skip_files:
52+
warnings.warn(f"Permission denied on {file_path}", UserWarning)
53+
else:
54+
raise PermissionError(f"Permission denied on {file_path}")
4855
else:
49-
raise FileNotFoundError(f"Unable to open {base_path}, it's not a file nor a directory.")
56+
if skip_files:
57+
warnings.warn(f"Unable to open {base_path}, it's not a file nor a directory.", UserWarning)
58+
else:
59+
raise PermissionError(f"Unable to open {base_path}, it's not a file nor a directory.")
60+
5061

5162

52-
def _find_files(base_path: str, annotation: Annotation or None, fix: bool) -> Generator[str, Annotation, None]:
63+
def _find_files(base_path: str, annotation: Annotation or None, fix: bool, skip_files: bool) -> Generator[str, Annotation, None]:
5364
"""Recursive exploration from a base path distinct if there's a fix annotation or no"""
5465
if not fix:
5566
if isfile(base_path):
@@ -59,11 +70,11 @@ def _find_files(base_path: str, annotation: Annotation or None, fix: bool) -> Ge
5970
for annotation_file in glob.iglob(join(annotation_path, "*.{}".format(ANNOTATION_EXTENSION))):
6071
annotation = Annotation(annotation_file)
6172

62-
for f, a in _scan_files(base_path, annotation, fix):
73+
for f, a in _scan_files(base_path, annotation, fix, skip_files):
6374
yield f, a
6475

6576

66-
def findfiles(base_path: str, annotation_path: str or None = None) -> Generator[str, Annotation, None]:
77+
def findfiles(base_path: str, annotation_path: str or None = None, skip_files: bool = False) -> Generator[str, Annotation, None]:
6778
"""Get each file and its proper annotation object.
6879
6980
Parameters
@@ -72,6 +83,8 @@ def findfiles(base_path: str, annotation_path: str or None = None) -> Generator[
7283
Base path of input folder/file.
7384
annotation_path : str or None
7485
Path of annotation file.
86+
skip_files : bool
87+
Skip unreadable files and directories.
7588
7689
Yields
7790
-------
@@ -81,5 +94,5 @@ def findfiles(base_path: str, annotation_path: str or None = None) -> Generator[
8194
The proper schema of each input file.
8295
"""
8396
annotation, fix = (Annotation(annotation_path), True) if annotation_path is not None else (None, False)
84-
for f, a in _find_files(base_path, annotation, fix):
97+
for f, a in _find_files(base_path, annotation, fix, skip_files):
8598
yield f, a

openvariant/tasks/cat.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ def _format_line(line: List[str], out_format: str) -> str:
1616

1717

1818
def cat(base_path: str, annotation_path: str or None = None, where: str = None, header_show: bool = True,
19-
output: str or None = None) -> None:
19+
output: str or None = None, skip_files: bool = False) -> None:
2020
"""Print on the stdout/"output" the parsed files.
2121
2222
It will parse the input files with its proper annotation schema, and it'll show the result on the stdout.
@@ -34,12 +34,14 @@ def cat(base_path: str, annotation_path: str or None = None, where: str = None,
3434
Shows header on the output.
3535
output : str or None
3636
Save output on a file.
37+
skip_files : bool
38+
Skip unreadable files and directories.
3739
"""
3840
out_file = None
3941
if output:
4042
out_file = open(output, "w")
41-
for file, annotation in findfiles(base_path, annotation_path):
42-
result = Variant(file, annotation)
43+
for file, annotation in findfiles(base_path, annotation_path, skip_files):
44+
result = Variant(file, annotation, skip_files)
4345
header = result.header
4446
if header_show:
4547
if output:

openvariant/tasks/count.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,13 +16,13 @@
1616
from openvariant.variant.variant import Variant
1717

1818

19-
def _count_task(selection: [str, str], group_by: str, where: str) -> Tuple[int, Union[dict, None]]:
19+
def _count_task(selection: [str, str], group_by: str, where: str, skip_files: bool) -> Tuple[int, Union[dict, None]]:
2020
"""Main functionality for count task"""
2121

2222
i = 0
2323
input_file, input_annotations = selection
2424
annotation = Annotation(input_annotations)
25-
result = Variant(input_file, annotation)
25+
result = Variant(input_file, annotation, skip_files)
2626

2727
if group_by is None:
2828
for _ in result.read(where=where):
@@ -42,7 +42,7 @@ def _count_task(selection: [str, str], group_by: str, where: str) -> Tuple[int,
4242

4343

4444
def count(base_path: str, annotation_path: str or None, group_by: str = None, where: str = None,
45-
cores: int = cpu_count(), quite: bool = False) -> Tuple[int, Union[None, dict]]:
45+
cores: int = cpu_count(), quite: bool = False, skip_files: bool = False) -> Tuple[int, Union[None, dict]]:
4646
"""Print on the stdout the count result.
4747
4848
It'll parse the input files with its proper annotation schema, and it'll show the count result on the stdout.
@@ -62,6 +62,8 @@ def count(base_path: str, annotation_path: str or None, group_by: str = None, wh
6262
Discard progress bar.
6363
cores : int
6464
Number of cores to parallelize the task.
65+
skip_files : bool
66+
Skip unreadable files and directories.
6567
6668
Returns
6769
----------
@@ -76,7 +78,7 @@ def count(base_path: str, annotation_path: str or None, group_by: str = None, wh
7678

7779
with Pool(cores) as pool:
7880
groups = {}
79-
task = partial(_count_task, group_by=group_by, where=where)
81+
task = partial(_count_task, group_by=group_by, where=where, skip_files=skip_files)
8082
map_method = pool.imap_unordered if len(selection) > 1 else map
8183
total = 0
8284
for c, g in tqdm(map_method(task, selection),

openvariant/tasks/groupby.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ def _group(base_path: str, annotation_path: str or None, key_by: str) -> List[Tu
4949
return results_by_groups
5050

5151

52-
def _group_by_task(selection, where=None, key_by=None, script='', header=False) -> Tuple[str, List, bool]:
52+
def _group_by_task(selection, where=None, key_by=None, script='', header=False, skip_files=False) -> Tuple[str, List, bool]:
5353
"""Main functionality for group by task"""
5454
group_key, group_values = selection
5555

@@ -59,7 +59,7 @@ def _group_by_task(selection, where=None, key_by=None, script='', header=False)
5959
for value in group_values:
6060
input_file = value[0]
6161
annotation = Annotation(value[1])
62-
result = Variant(input_file, annotation)
62+
result = Variant(input_file, annotation, skip_files)
6363
columns = result.annotation.columns if len(result.annotation.columns) != 0 else result.header
6464

6565
if header:
@@ -83,7 +83,7 @@ def _group_by_task(selection, where=None, key_by=None, script='', header=False)
8383
for value in group_values:
8484
input_file = value[0]
8585
annotation = Annotation(value[1])
86-
result = Variant(input_file, annotation)
86+
result = Variant(input_file, annotation, skip_files)
8787
columns = result.annotation.columns if len(result.annotation.columns) != 0 else result.header
8888

8989
if header:
@@ -116,7 +116,7 @@ def _group_by_task(selection, where=None, key_by=None, script='', header=False)
116116

117117

118118
def group_by(base_path: str, annotation_path: str or None, script: str or None, key_by: str, where: str or None = None,
119-
cores=cpu_count(), quite=False, header: bool = False) -> Generator[Tuple[str, List, bool], None, None]:
119+
cores=cpu_count(), quite=False, header: bool = False, skip_files: bool = False) -> Generator[Tuple[str, List, bool], None, None]:
120120
"""Print on the stdout the group by result.
121121
122122
It'll parse the input files with its proper annotation schema, and it'll show the parsed result separated for each
@@ -141,6 +141,9 @@ def group_by(base_path: str, annotation_path: str or None, script: str or None,
141141
Number of cores to parallelize the task.
142142
header : bool
143143
Number of cores to parallelize the task.
144+
skip_files : bool
145+
Skip unreadable files and directories.
146+
144147
Returns
145148
----------
146149
int
@@ -150,7 +153,7 @@ def group_by(base_path: str, annotation_path: str or None, script: str or None,
150153
"""
151154
selection = _group(base_path, annotation_path, key_by)
152155
with Pool(cores) as pool:
153-
task = partial(_group_by_task, where=where, key_by=key_by, script=script, header=header)
156+
task = partial(_group_by_task, where=where, key_by=key_by, script=script, header=header, skip_files=skip_files)
154157
map_method = map if cores == 1 or len(selection) <= 1 else pool.imap_unordered
155158

156159
for group_key, group_result, command in tqdm(

0 commit comments

Comments
 (0)