Skip to content

Commit 0358f81

Browse files
authored
Refactor and Improve CLI Functionality (#13)
* remove --show-ui * Update test_cli.py * Exclude source models from expanded models in DbtColumnLineageExtractor * lint * logger * Update utils.py * remove verbose * Enhance model matching logic to prioritize exact name matches and improve logging for multiple potential matches * lint * Improve logging for lineage extraction by adding warnings for skipped output generation when no matches or lineage information is found. * self ref * seff ref * bump to 0.1.7b2
1 parent b45aca0 commit 0358f81

File tree

9 files changed

+448
-255
lines changed

9 files changed

+448
-255
lines changed

py_package/dbt_column_lineage_extractor/__init__.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,8 @@
44
read_json,
55
pretty_print_dict,
66
write_dict_to_file,
7-
read_dict_from_file
7+
read_dict_from_file,
8+
setup_logging
89
)
910

1011
__all__ = [
@@ -15,4 +16,5 @@
1516
"pretty_print_dict",
1617
"write_dict_to_file",
1718
"read_dict_from_file",
19+
"setup_logging",
1820
]

py_package/dbt_column_lineage_extractor/cli_direct.py

+56-31
Original file line numberDiff line numberDiff line change
@@ -2,16 +2,29 @@
22
import dbt_column_lineage_extractor.utils as utils
33
from dbt_column_lineage_extractor import DbtColumnLineageExtractor
44

5+
56
def main():
67
parser = argparse.ArgumentParser(description="DBT Column Lineage Extractor CLI")
7-
parser.add_argument('--manifest', default='./inputs/manifest.json', help='Path to the manifest.json file, default to ./inputs/manifest.json')
8-
parser.add_argument('--catalog', default='./inputs/catalog.json', help='Path to the catalog.json file, default to ./inputs/catalog.json')
9-
parser.add_argument('--dialect', default='snowflake', help='SQL dialect to use, default is snowflake, more dialects at https://github.com/tobymao/sqlglot/tree/v25.24.5/sqlglot/dialects')
108
parser.add_argument(
11-
'--model',
12-
nargs='*',
13-
default=[],
14-
help='''List of models to extract lineage for using dbt-style selectors:
9+
"--manifest",
10+
default="./inputs/manifest.json",
11+
help="Path to the manifest.json file, default to ./inputs/manifest.json",
12+
)
13+
parser.add_argument(
14+
"--catalog",
15+
default="./inputs/catalog.json",
16+
help="Path to the catalog.json file, default to ./inputs/catalog.json",
17+
)
18+
parser.add_argument(
19+
"--dialect",
20+
default="snowflake",
21+
help="SQL dialect to use, default is snowflake, more dialects at https://github.com/tobymao/sqlglot/tree/v25.24.5/sqlglot/dialects",
22+
)
23+
parser.add_argument(
24+
"--model",
25+
nargs="*",
26+
default=[],
27+
help="""List of models to extract lineage for using dbt-style selectors:
1528
- Simple model names: model_name
1629
- Include ancestors: +model_name (include upstream/parent models)
1730
- Include descendants: model_name+ (include downstream/child models)
@@ -20,15 +33,28 @@ def main():
2033
- Tag filtering: tag:my_tag (models with specific tag)
2134
- Path filtering: path:models/finance (models in specific path)
2235
- Package filtering: package:my_package (models in specific package)
23-
Default behavior extracts lineage for all models.'''
36+
Default behavior extracts lineage for all models.""",
37+
)
38+
parser.add_argument(
39+
"--model-list-json",
40+
help="Path to a JSON file containing a list of models to extract lineage for. If specified, this takes precedence over --model",
41+
)
42+
parser.add_argument(
43+
"--output-dir",
44+
default="./outputs",
45+
help="Directory to write output json files, default to ./outputs",
46+
)
47+
parser.add_argument(
48+
"--continue-on-error",
49+
action="store_true",
50+
help="Continue processing even if some models fail",
2451
)
25-
parser.add_argument('--model-list-json', help='Path to a JSON file containing a list of models to extract lineage for. If specified, this takes precedence over --model')
26-
parser.add_argument('--output-dir', default='./outputs', help='Directory to write output json files, default to ./outputs')
27-
parser.add_argument('--show-ui', action='store_true', help='Flag to show lineage outputs in the console')
28-
parser.add_argument('--continue-on-error', action='store_true', help='Continue processing even if some models fail')
2952

3053
args = parser.parse_args()
3154

55+
# Set up logging
56+
logger = utils.setup_logging()
57+
3258
try:
3359
selected_models = args.model
3460
if args.model_list_json:
@@ -37,7 +63,7 @@ def main():
3763
if not isinstance(selected_models, list):
3864
raise ValueError("The JSON file must contain a list of model names")
3965
except Exception as e:
40-
print(f"Error reading model list from JSON file: {e}")
66+
logger.error(f"Error reading model list from JSON file: {e}")
4167
return 1
4268

4369
extractor = DbtColumnLineageExtractor(
@@ -47,17 +73,19 @@ def main():
4773
dialect=args.dialect,
4874
)
4975

50-
print(f"Processing {len(extractor.selected_models)} models after selector expansion")
51-
76+
logger.info(f"Processing {len(extractor.selected_models)} models after selector expansion")
77+
5278
try:
5379
lineage_map = extractor.build_lineage_map()
54-
80+
5581
if not lineage_map:
56-
print("Warning: No valid lineage was generated. Check for errors above.")
82+
logger.warning("Warning: No valid lineage was generated. Check for errors above.")
5783
if not args.continue_on_error:
5884
return 1
59-
60-
lineage_to_direct_parents = extractor.get_columns_lineage_from_sqlglot_lineage_map(lineage_map)
85+
86+
lineage_to_direct_parents = extractor.get_columns_lineage_from_sqlglot_lineage_map(
87+
lineage_map
88+
)
6189
lineage_to_direct_children = (
6290
extractor.get_lineage_to_direct_children_from_lineage_to_direct_parents(
6391
lineage_to_direct_parents
@@ -72,27 +100,24 @@ def main():
72100
lineage_to_direct_children, f"{args.output_dir}/lineage_to_direct_children.json"
73101
)
74102

75-
if args.show_ui:
76-
print("===== Lineage to Direct Parents =====")
77-
utils.pretty_print_dict(lineage_to_direct_parents)
78-
print("===== Lineage to Direct Children =====")
79-
utils.pretty_print_dict(lineage_to_direct_children)
80-
81-
print("Lineage extraction complete. Output files written to output directory.")
103+
logger.info("Lineage extraction complete. Output files written to output directory.")
82104
return 0
83-
105+
84106
except Exception as e:
85-
print(f"Error during lineage extraction: {str(e)}")
107+
logger.error(f"Error during lineage extraction: {str(e)}")
86108
if not args.continue_on_error:
87109
raise
88110
return 1
89-
111+
90112
except Exception as e:
91-
print(f"Error: {str(e)}")
113+
logger.error(f"Error: {str(e)}")
92114
import traceback
115+
93116
traceback.print_exc()
94117
return 1
95118

96-
if __name__ == '__main__':
119+
120+
if __name__ == "__main__":
97121
import sys
122+
98123
sys.exit(main())

0 commit comments

Comments
 (0)