You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
{{ message }}
This repository was archived by the owner on Apr 15, 2022. It is now read-only.
@@ -28,24 +34,40 @@ This section covers importing and instantiating the Native Spark DataSource
28
34
29
35
.. tab:: Native Spark DataSource
30
36
31
-
To use the Native Spark DataSource inside of the cloud service, first create a Spark Session and then import your PySpliceContext
37
+
To use the Native Spark DataSource inside of the `cloud service<https://cloud.splicemachine.io/register?utm_source=pydocs&utm_medium=header&utm_campaign=sandbox>`_., first create a Spark Session and then import your PySpliceContext
32
38
33
39
.. code-block:: Python
34
40
35
41
from pyspark.sql import SparkSession
36
42
from splicemachine.spark import PySpliceContext
43
+
from splicemachine.mlflow_support import*# Connects your MLflow session automatically
44
+
from splicemachine.features import FeatureStore # Splice Machine Feature Store
45
+
37
46
spark = SparkSession.builder.getOrCreate()
38
-
splice = PySpliceContext(spark)
47
+
splice = PySpliceContext(spark) # The Native Spark Datasource (PySpliceContext) takes a Spark Session
48
+
fs = FeatureStore(splice) # Create your Feature Store
49
+
mlflow.register_splice_context(splice) # Gives mlflow native DB connection
50
+
mlflow.register_feature_store(fs) # Tracks Feature Store work in Mlflow automatically
51
+
39
52
40
53
.. tab:: External Native Spark DataSource
41
54
42
-
To use the External Native Spark DataSource, create a Spark Session with your external Jars configured. Then, import your ExtPySpliceContext and set the necessary parameters
55
+
To use the External Native Spark DataSource, create a Spark Session with your external Jars configured. Then, import your ExtPySpliceContext and set the necessary parameters.
56
+
Once created, the functionality is identical to the internal Native Spark Datasource (PySpliceContext)
43
57
44
58
.. code-block:: Python
45
59
46
60
from pyspark.sql import SparkSession
47
61
from splicemachine.spark import ExtPySpliceContext
62
+
from splicemachine.mlflow_support import*# Connects your MLflow session automatically
63
+
from splicemachine.features import FeatureStore # Splice Machine Feature Store
JDBC_URL=''#Set your JDBC URL here. You can get this from the Cloud Manager UI. Make sure to append ';user=<USERNAME>;password=<PASSWORD>' after ';ssl=basic' so you can authenticate in
50
-
kafka_server ='kafka-broker-0-'+JDBC_URL.split('jdbc:splice://jdbc-')[1].split(':1527')[0] +':19092'# Formatting kafka URL from JDBC
67
+
# The ExtPySpliceContext communicates with the database via Kafka
68
+
kafka_server ='kafka-broker-0-'+JDBC_URL.split('jdbc:splice://jdbc-')[1].split(':1527')[0] +':19092'# Formatting kafka URL from JDBC
This describes the Python representation of a Feature. A feature is a column of a table with particular metadata
55
+
This describes the Python representation of a Feature. A Feature is a column of a Feature Set table with particular metadata.
56
+
A Feature is the smallest unit in the Feature Store, and each Feature within a Feature Set is individually tracked for changes
57
+
to enable full time travel and point-in-time consistent training datasets. Features' names are unique and case insensitive.
58
+
To see the full contents of your Feature, you can print, return, or .__dict__ your Feature object.
52
59
53
60
.. automodule:: splicemachine.features.feature
54
61
:members:
55
62
:undoc-members:
56
63
:show-inheritance:
57
64
58
-
splicemachine.features.training_context
65
+
splicemachine.features.training_view
59
66
----------------------------------
60
67
61
-
This describes the Python representation of a Training Context. A Training Context is a SQL statement defining an event of interest, and metadata around how to create a training dataset with that context
68
+
This describes the Python representation of a Training View. A Training View is a SQL statement defining an event of interest, and metadata around how to create a training dataset with that view.
69
+
To see the full contents of your Training View, you can print, return, or .__dict__ your Training View object.
join {FEATURE_STORE_SCHEMA}.feature_set fset on f.feature_set_id=fset.feature_set_id
61
+
where {{where}}
59
62
"""
60
63
61
64
get_features_in_feature_set=f"""
@@ -72,21 +75,33 @@ class SQL:
72
75
FROM {FEATURE_STORE_SCHEMA}.feature_set_key GROUP BY 1) p
73
76
ON fset.feature_set_id=p.feature_set_id
74
77
"""
75
-
get_training_contexts=f"""
76
-
SELECT tc.context_id, tc.Name, tc.Description, CAST(SQL_text AS VARCHAR(1000)) context_sql,
78
+
79
+
get_training_views=f"""
80
+
SELECT tc.view_id, tc.Name, tc.Description, CAST(SQL_text AS VARCHAR(1000)) view_sql,
77
81
p.pk_columns,
78
82
ts_column, label_column,
79
-
c.context_columns
80
-
FROM {FEATURE_STORE_SCHEMA}.training_context tc
83
+
c.join_columns
84
+
FROM {FEATURE_STORE_SCHEMA}.training_view tc
81
85
INNER JOIN
82
-
(SELECT context_id, STRING_AGG(key_column_name,',') pk_columns FROM {FEATURE_STORE_SCHEMA}.training_context_key WHERE key_type='P' GROUP BY 1) p ON tc.context_id=p.context_id
86
+
(SELECT view_id, STRING_AGG(key_column_name,',') pk_columns FROM {FEATURE_STORE_SCHEMA}.training_view_key WHERE key_type='P' GROUP BY 1) p ON tc.view_id=p.view_id
83
87
INNER JOIN
84
-
(SELECT context_id, STRING_AGG(key_column_name,',') context_columns FROM {FEATURE_STORE_SCHEMA}.training_context_key WHERE key_type='C' GROUP BY 1) c ON tc.context_id=c.context_id
88
+
(SELECT view_id, STRING_AGG(key_column_name,',') join_columns FROM {FEATURE_STORE_SCHEMA}.training_view_key WHERE key_type='J' GROUP BY 1) c ON tc.view_id=c.view_id
89
+
"""
90
+
91
+
get_feature_set_join_keys=f"""
92
+
SELECT fset.feature_set_id, schema_name, table_name, pk_columns FROM {FEATURE_STORE_SCHEMA}.feature_set fset
0 commit comments