2
2
from bdikit .mapping_recommendation .scope_reducing_manager import ScopeReducingManager
3
3
from bdikit .mapping_recommendation .value_mapping_manager import ValueMappingManager
4
4
from bdikit .mapping_recommendation .column_mapping_manager import ColumnMappingManager
5
- from bdikit .visualization .mappings import plot_reduce_scope , plot_column_mappings , plot_value_mappings
5
+ from bdikit .visualization .mappings import (
6
+ plot_reduce_scope ,
7
+ plot_column_mappings ,
8
+ plot_value_mappings ,
9
+ )
6
10
from bdikit .utils import get_gdc_data
7
11
from os .path import join , dirname
8
12
import os
9
13
10
- os .environ ["TOKENIZERS_PARALLELISM" ] = "false" # Disable huggingface messages
14
+ os .environ ["TOKENIZERS_PARALLELISM" ] = "false" # Disable huggingface messages
11
15
12
- GDC_DATA_PATH = join (dirname (__file__ ), ' ./resource/gdc_table.csv' )
16
+ GDC_DATA_PATH = join (dirname (__file__ ), " ./resource/gdc_table.csv" )
13
17
14
18
15
- class APIManager ():
16
-
17
- def __init__ (self ,):
19
+ class APIManager :
20
+ def __init__ (
21
+ self ,
22
+ ):
18
23
# TODO: move into database object (in data_ingestion folder)
19
24
self .dataset = None
20
25
# TODO: move into database object (in data_ingestion folder)
@@ -23,8 +28,8 @@ def __init__(self,):
23
28
self .reduced_scope = None
24
29
self .column_manager = None
25
30
self .value_manager = None
26
- self .column_mappings = None # TODO move this to a property in column_manager
27
- self .value_mappings = None # TODO move this to a property in value_manager
31
+ self .column_mappings = None # TODO move this to a property in column_manager
32
+ self .value_mappings = None # TODO move this to a property in value_manager
28
33
29
34
def load_global_table (self , global_table_path = None ):
30
35
if global_table_path is None :
@@ -45,41 +50,58 @@ def reduce_scope(self):
45
50
self .reduced_scope = self .scope_manager .reduce ()
46
51
return plot_reduce_scope (self .reduced_scope , self .dataset )
47
52
48
- def map_columns (self , algorithm = 'SimFloodAlgorithm' ):
49
- self .column_manager = ColumnMappingManager (self .dataset , self .global_table , algorithm )
53
+ def map_columns (self , algorithm = "SimFloodAlgorithm" ):
54
+ self .column_manager = ColumnMappingManager (
55
+ self .dataset , self .global_table , algorithm
56
+ )
50
57
self .column_manager .reduced_scope = self .reduced_scope
51
58
self .column_mappings = self .column_manager .map ()
52
59
plot_column_mappings (self .column_mappings )
53
60
54
61
return self .column_mappings
55
62
56
- def map_values (self , algorithm = ' EditAlgorithm' ):
63
+ def map_values (self , algorithm = " EditAlgorithm" ):
57
64
self .global_table_all = get_gdc_data (self .column_mappings .values ())
58
- self .value_manager = ValueMappingManager (self .dataset , self .column_mappings , self .global_table_all , algorithm )
65
+ self .value_manager = ValueMappingManager (
66
+ self .dataset , self .column_mappings , self .global_table_all , algorithm
67
+ )
59
68
self .value_mappings = self .value_manager .map ()
60
69
plot_value_mappings (self .value_mappings )
61
70
62
71
return self .value_mappings
63
72
64
- def update_reduced_scope (self , original_column , new_candidate_name , new_candidate_sim = 1.0 ):
73
+ def update_reduced_scope (
74
+ self , original_column , new_candidate_name , new_candidate_sim = 1.0
75
+ ):
65
76
for index in range (len (self .reduced_scope )):
66
- if self .reduced_scope [index ]['Candidate column' ] == original_column :
67
- self .reduced_scope [index ]['Top k columns' ].append ((new_candidate_name , new_candidate_sim ))
68
- print ('Reduced scope updated!' )
77
+ if self .reduced_scope [index ]["Candidate column" ] == original_column :
78
+ self .reduced_scope [index ]["Top k columns" ].append (
79
+ (new_candidate_name , new_candidate_sim )
80
+ )
81
+ print ("Reduced scope updated!" )
69
82
plot_reduce_scope (self .reduced_scope )
70
83
break
71
84
72
85
def update_column_mappings (self , new_mappings ):
73
86
for original_column , new_target_column in new_mappings :
74
87
self .column_mappings [original_column ] = new_target_column
75
88
76
- print (' Column mapping updated!' )
89
+ print (" Column mapping updated!" )
77
90
plot_column_mappings (self .column_mappings )
78
91
79
- def update_value_mappings (self , original_column , original_value , new_target_value , new_similarity = 1.0 ):
80
- for index in range (len (self .value_mappings [original_column ]['matches' ])):
81
- if self .value_mappings [original_column ]['matches' ][index ][0 ] == original_value :
82
- self .value_mappings [original_column ]['matches' ][index ] = (original_value , new_target_value , new_similarity )
83
- print ('Value mapping updated!' )
92
+ def update_value_mappings (
93
+ self , original_column , original_value , new_target_value , new_similarity = 1.0
94
+ ):
95
+ for index in range (len (self .value_mappings [original_column ]["matches" ])):
96
+ if (
97
+ self .value_mappings [original_column ]["matches" ][index ][0 ]
98
+ == original_value
99
+ ):
100
+ self .value_mappings [original_column ]["matches" ][index ] = (
101
+ original_value ,
102
+ new_target_value ,
103
+ new_similarity ,
104
+ )
105
+ print ("Value mapping updated!" )
84
106
plot_value_mappings (self .value_mappings )
85
107
break
0 commit comments