Merge branch 'master' into spark2

Ben-Epstein · Ben-Epstein · commit aef15c38c55c · 2021-02-23T20:06:16.000-05:00
diff --git a/splicemachine/features/feature_store.py b/splicemachine/features/feature_store.py
@@ -298,7 +298,8 @@ def get_training_set_from_view(self, training_view: str, features: Union[List[Fe
         r = make_request(self._FS_URL, Endpoints.TRAINING_SET_FROM_VIEW, RequestType.POST, self._basic_auth, { "view": training_view }, 
                         { "features": features, "start_time": start_time, "end_time": end_time })
         sql = r["sql"]
-        tvw = r["training_view"]
+        tvw = TrainingView(**r["training_view"])
+        features = [Feature(**f) for f in r["features"]]
 
         # Link this to mlflow for model deployment
         if self.mlflow_ctx and not return_sql:
@@ -443,8 +444,8 @@ def describe_feature_sets(self) -> None:
         
         print('Available feature sets')
         for desc in r:
-            fset = FeatureSet(**desc["feature_set"])
-            features = [Feature(**feature) for feature in desc["features"]]
+            features = [Feature(**feature) for feature in desc.pop('features')]
+            fset = FeatureSet(**desc)
             print('-' * 23)
             self._feature_set_describe(fset, features)
 
@@ -466,8 +467,8 @@ def describe_feature_set(self, schema_name: str, table_name: str) -> None:
         if not descs: raise SpliceMachineException(
             f"Feature Set {schema_name}.{table_name} not found. Check name and try again.")
         desc = descs[0]
-        fset = FeatureSet(**desc["feature_set"])
-        features = [Feature(**feature) for feature in desc["features"]]
+        features = [Feature(**feature) for feature in desc.pop("features")]
+        fset = FeatureSet(**desc)
         self._feature_set_describe(fset, features)
 
     def _feature_set_describe(self, fset: FeatureSet, features: List[Feature]):
@@ -487,8 +488,8 @@ def describe_training_views(self) -> None:
 
         print('Available training views')
         for desc in r:
-            tcx = TrainingView(**desc["training_view"])
-            features = [Feature(**f) for f in desc["features"]]
+            features = [Feature(**f) for f in desc.pop('features')]
+            tcx = TrainingView(**desc)
             print('-' * 23)
             self._training_view_describe(tcx, features)
 
@@ -504,8 +505,8 @@ def describe_training_view(self, training_view: str) -> None:
         descs = r
         if not descs: raise SpliceMachineException(f"Training view {training_view} not found. Check name and try again.")
         desc = descs[0]
-        tcx = TrainingView(**desc['training_view'])
-        feats = [Feature(**f) for f in desc['features']]
+        feats = [Feature(**f) for f in desc.pop('features')]
+        tcx = TrainingView(**desc)
         self._training_view_describe(tcx, feats)
 
     def _training_view_describe(self, tcx: TrainingView, feats: List[Feature]):
@@ -533,16 +534,19 @@ def get_training_set_from_deployment(self, schema_name: str, table_name: str):
 
         r = make_request(self._FS_URL, Endpoints.TRAINING_SET_FROM_DEPLOYMENT, RequestType.GET, self._basic_auth, 
             { "schema": schema_name, "table": table_name })
-        metadata = r["metadata"]
         
-        sql = r["sql"]
-        features = metadata['FEATURES'].split(',')
-        tv_name = metadata['NAME']
-        start_time = metadata['TRAINING_SET_START_TS']
-        end_time = metadata['TRAINING_SET_END_TS']
+        metadata = r['metadata']
+        sql = r['sql']
+
+        tv_name = metadata['name']
+        start_time = metadata['training_set_start_ts']
+        end_time = metadata['training_set_end_ts']
+
+        tv = TrainingView(**r['training_view']) if 'training_view' in r else None
+        features = [Feature(**f) for f in r['features']]
 
         if self.mlflow_ctx:
-            self.link_training_set_to_mlflow(features, start_time, end_time, tv_name)
+            self.link_training_set_to_mlflow(features, start_time, end_time, tv)
         return self.splice_ctx.df(sql)
 
     def remove_feature(self, name: str):
@@ -556,6 +560,28 @@ def remove_feature(self, name: str):
         """
         make_request(self._FS_URL, Endpoints.FEATURES, RequestType.DELETE, self._basic_auth, { "name": name })
 
+    def get_deployments(self, schema_name: str = None, table_name: str = None, training_set: str = None):
+        """
+        Returns a list of all (or specified) available deployments
+        :param schema_name: model schema name
+        :param table_name: model table name
+        :param training_set: training set name
+        :return: List[Deployment] the list of Deployments as dicts
+        """
+        return make_request(self._FS_URL, Endpoints.DEPLOYMENTS, RequestType.GET, self._basic_auth, 
+            { 'schema': schema_name, 'table': table_name, 'name': training_set })
+      
+    def get_training_set_features(self, training_set: str = None):
+        """
+        Returns a list of all features from an available Training Set, as well as details about that Training Set
+        :param training_set: training set name
+        :return: TrainingSet as dict
+        """
+        r = make_request(self._FS_URL, Endpoints.TRAINING_SET_FEATURES, RequestType.GET, self._basic_auth, 
+            { 'name': training_set })
+        r['features'] = [Feature(**f) for f in r['features']]
+        return r
+
     def _retrieve_model_data_sets(self, schema_name: str, table_name: str):
         """
         Returns the training set dataframe and model table dataframe for a given deployed model.
@@ -790,6 +816,7 @@ def link_training_set_to_mlflow(self, features: Union[List[Feature], List[str]],
 
         self.mlflow_ctx._active_training_set: TrainingSet = ts
         ts._register_metadata(self.mlflow_ctx)
+
     
     def set_feature_store_url(self, url: str):
         self._FS_URL = url
diff --git a/splicemachine/features/training_set.py b/splicemachine/features/training_set.py
@@ -48,7 +48,8 @@ def _register_metadata(self, mlflow_ctx):
                                              "Training Set was logged to the current active run. If you call "
                                              "fs.get_training_set or fs.get_training_set_from_view before starting an "
                                              "mlflow run, all following runs will assume that Training Set to be the "
-                                             "active Training Set, and will log the Training Set as metadata. For more "
-                                             "information, refer to the documentation. If you'd like to use a new "
-                                             "Training Set, end the current run, call one of the mentioned functions, "
-                                             "and start your new run.") from None
+                                             "active Training Set (until the next call to either of those functions), "
+                                             "and will log the Training Set as metadata. For more information, "
+                                             "refer to the documentation. If you'd like to use a new Training Set, "
+                                             "end the current run, call one of the mentioned functions, and start "
+                                             "your new run. Or, call mlflow.remove_active_training_set()") from None
diff --git a/splicemachine/features/utils/http_utils.py b/splicemachine/features/utils/http_utils.py
@@ -28,13 +28,15 @@ class Endpoints:
     """
     Enum for Feature Store Endpoints
     """
+    DEPLOYMENTS: str = "deployments"
     FEATURES: str = "features"
     FEATURE_SETS: str = "feature-sets"
     FEATURE_SET_DESCRIPTIONS: str = "feature-set-descriptions"
     DEPLOY_FEATURE_SET: str = "deploy-feature-set"
     FEATURE_VECTOR: str = "feature-vector"
     FEATURE_VECTOR_SQL: str = "feature-vector-sql"
     TRAINING_SETS: str = "training-sets"
+    TRAINING_SET_FEATURES: str = "training-set-features"
     TRAINING_SET_FROM_DEPLOYMENT: str = "training-set-from-deployment"
     TRAINING_SET_FROM_VIEW: str = "training-set-from-view"
     TRAINING_VIEWS: str = "training-views"
diff --git a/splicemachine/mlflow_support/mlflow_support.py b/splicemachine/mlflow_support/mlflow_support.py
@@ -439,6 +439,16 @@ def _start_run(run_id=None, tags=None, experiment_id=None, run_name=None, nested
 
     return SpliceActiveRun(active_run)
 
+@_mlflow_patch('remove_active_training_set')
+def _remove_active_training_set():
+    """
+    Removes the active training set from mlflow. This function deletes mlflows active training set (retrieved from
+    the feature store), which will in turn stop the automated logging of features to the active mlflow run. To recreate
+    an active training set, call fs.get_training_set or fs.get_training_set_from_view in the Feature Store.
+    """
+    if hasattr(mlflow,'_active_training_set'):
+        del mlflow._active_training_set
+
 
 @_mlflow_patch('log_pipeline_stages')
 def _log_pipeline_stages(pipeline):
@@ -1003,7 +1013,8 @@ def apply_patches():
     targets = [_register_feature_store, _register_splice_context, _lp, _lm, _timer, _log_artifact, _log_feature_transformations,
                _log_model_params, _log_pipeline_stages, _log_model, _load_model, _download_artifact,
                _start_run, _current_run_id, _current_exp_id, _deploy_aws, _deploy_azure, _deploy_db, _login_director,
-               _get_run_ids_by_name, _get_deployed_models, _deploy_kubernetes, _fetch_logs, _watch_job, _end_run, _set_mlflow_uri]
+               _get_run_ids_by_name, _get_deployed_models, _deploy_kubernetes, _fetch_logs, _watch_job, _end_run,
+               _set_mlflow_uri, _remove_active_training_set]
 
     for target in targets:
         gorilla.apply(gorilla.Patch(mlflow, target.__name__.lstrip('_'), target, settings=_GORILLA_SETTINGS))