splicemachine
diff --git a/‎.gitignore‎
Lines changed: 14 additions & 0 deletions b/‎.gitignore‎
Lines changed: 14 additions & 0 deletions
diff --git a/‎splicemachine/spark/test/.coverage‎
Lines changed: 1 addition & 0 deletions b/‎splicemachine/spark/test/.coverage‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎splicemachine/spark/test/__init__.py‎ b/‎splicemachine/spark/test/__init__.py‎
diff --git a/‎splicemachine/spark/test/context_it.py‎
Lines changed: 155 additions & 0 deletions b/‎splicemachine/spark/test/context_it.py‎
Lines changed: 155 additions & 0 deletions
diff --git a/‎splicemachine/spark/test/resources/spark2.2/python/.gitignore‎
Lines changed: 5 additions & 0 deletions b/‎splicemachine/spark/test/resources/spark2.2/python/.gitignore‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎splicemachine/spark/test/resources/spark2.2/python/MANIFEST.in‎
Lines changed: 24 additions & 0 deletions b/‎splicemachine/spark/test/resources/spark2.2/python/MANIFEST.in‎
Lines changed: 24 additions & 0 deletions
diff --git a/‎splicemachine/spark/test/resources/spark2.2/python/README.md‎
Lines changed: 32 additions & 0 deletions b/‎splicemachine/spark/test/resources/spark2.2/python/README.md‎
Lines changed: 32 additions & 0 deletions
@@ -1,4 +1,18 @@
 /.idea
+/splicemachine/spark/test/__pycache__/context_it.cpython-36.pyc
+/splicemachine/spark/__pycache__/__init__.cpython-36.pyc
+/splicemachine/__pycache__/__init__.cpython-36.pyc
+/.DS_Store
+/splicemachine/.DS_Store
+/splicemachine/spark/.DS_Store
+/splicemachine/spark/test/.DS_Store
+/splicemachine/spark/__pycache__/context.cpython-36.pyc
+/splicemachine/spark/__pycache__/test_context.cpython-36.pyc
+/splicemachine/spark/__pycache__/utils.cpython-36.pyc
+/splicemachine/spark/test/nose2-junit.xml
+/splicemachine/spark/test/resources/.DS_Store
+/splicemachine/spark/test/resources/spark2.2/.DS_Store
+/splicemachine/spark/test/resources/spark2.2/python/.DS_Store
 /.vscode
 .DS_Store
 splicemachine/.DS_Store
 
@@ -0,0 +1 @@
+!coverage.py: This is a private format, don't read it directly!{"lines":{}}
@@ -0,0 +1,155 @@
+import unittest
+import logging
+import time
+from pyspark.sql import SparkSession
+from splicemachine.spark.context import PySpliceContext
+import uuid
+import tempfile
+import os
+
+
+class PySpliceTest(unittest.TestCase):
+
+
+    @classmethod
+    def create_spark_session(cls):
+        spark_session = SparkSession.builder.getOrCreate()
+        #spark_session.sparkContext.setLogLevel("ERROR")
+        logger = spark_session.sparkContext._jvm.org.apache.log4j
+        logger.LogManager.getLogger("org").setLevel(logger.Level.OFF)
+        logger.LogManager.getLogger("akka").setLevel(logger.Level.OFF)
+        return spark_session
+
+    @classmethod
+    def create_testing_pysplice_session(cls,spark_session):
+        db_url = 'jdbc:splice://localhost:1527/splicedb;user=splice;password=admin'
+        splice_context = PySpliceContext(db_url,spark_session)
+        return splice_context
+
+    @classmethod
+    def setUp(cls):
+        cls.spark_session = cls.create_spark_session()
+        cls.splice_context = cls.create_testing_pysplice_session(spark_session=cls.spark_session)
+
+
+    @classmethod
+    def tearDown(cls):
+        cls.spark_session.stop()
+
+
+class Test(PySpliceTest):
+    def test_analyzeSchema(self):
+        self.splice_context.analyzeSchema("splice")
+        assert True
+
+
+    def test_analyzeTable(self):
+        self.splice_context.analyzeTable("sys.systables")
+        assert True
+
+    def test_executeUpdate(self):
+        self.splice_context.executeUpdate("drop table if exists splice.systables")
+        self.splice_context.executeUpdate("create table systables as select * from sys.systables")
+        assert self.splice_context.tableExists("SPLICE.SYSTABLES")
+
+    def test_dropTable(self):
+        self.splice_context.executeUpdate("drop table if exists splice.pysplice_test_droptable")
+        self.splice_context.executeUpdate("create table pysplice_test_droptable ( COL1 int primary key)")
+        self.splice_context.dropTable("splice.pysplice_test_droptable")
+        cnt = self.splice_context.df("select count(*) as cnt from sys.sysschemas a join sys.systables b on a.SCHEMAID = b.SCHEMAID where a.SCHEMANAME = 'SPLICE' and b.TABLENAME = 'PYSPLICE_TEST_DROPTABLE'").collect()[0]['CNT']
+        assert cnt == 0
+
+    def test_df(self):
+        self.splice_context.executeUpdate("drop table if exists splice.pysplice_test_df")
+        test_df_df = self.spark_session.createDataFrame([[1],[2]], "COL1: int")
+        self.splice_context.executeUpdate("create table pysplice_test_df ( COL1 int primary key)")
+        self.splice_context.insert(test_df_df,"splice.pysplice_test_df")
+        cnt = self.splice_context.df("select count(*) as cnt from splice.pysplice_test_df").collect()[0]['CNT']
+        assert cnt == 2
+
+    def test_delete(self):
+        self.splice_context.executeUpdate("drop table if exists splice.pysplice_test")
+        test_delete_df = self.spark_session.createDataFrame([[1],[2]], "COL1: int")
+        self.splice_context.executeUpdate("create table pysplice_test ( COL1 int primary key)")
+        self.splice_context.insert(test_delete_df,"splice.pysplice_test")
+        self.splice_context.delete(test_delete_df,"splice.pysplice_test")
+        cnt = self.splice_context.df("select count(*) as cnt from splice.pysplice_test").collect()[0]['CNT']
+        self.splice_context.dropTable("splice.pysplice_test")
+        assert cnt == 0
+
+    def test_execute(self):
+        self.splice_context.execute("select count(*) from sys.systables")
+        assert True
+
+    def test_export(self):
+        test_export_df = self.spark_session.createDataFrame([[1],[2]], "COL1:int")
+        temp_dir = tempfile.gettempdir()
+        file = os.path.join(temp_dir,str(uuid.uuid4()) + '.csv')
+        print(file)
+        self.splice_context.export(test_export_df,file)
+        test_export_load_df = self.spark_session.read.option("timestampFormat", "yyyy/MM/dd HH:mm:ss ZZ").csv(file,inferSchema=True)
+        assert test_export_df.count() == test_export_load_df.count()
+
+    def test_exportBinary(self):
+        test_exportBinary_df = self.spark_session.createDataFrame([[1],[2]], "COL1:int")
+        temp_dir = tempfile.gettempdir()
+        file = os.path.join(temp_dir,str(uuid.uuid4()) + '.parquet')
+        self.splice_context.exportBinary(test_exportBinary_df,file,False,"parquet")
+        load_df = self.spark_session.read.parquet(file)
+        assert test_exportBinary_df.count() == load_df.count()
+
+    def test_getSchema(self):
+        systables_schema_from_df = self.splice_context.df("select * from sys.systables").schema
+        systables_schema = self.splice_context.getSchema("sys.systables")
+        assert systables_schema_from_df == systables_schema
+
+    def test_insert(self):
+        self.splice_context.executeUpdate("drop table if exists splice.pysplice_test_insert")
+        test_insert_df = self.spark_session.createDataFrame([[1],[2]], "COL1 : int")
+        self.splice_context.executeUpdate("create table splice.pysplice_test_insert ( col1 int primary key)")
+        time.sleep(10)
+        self.splice_context.insert(test_insert_df,"splice.pysplice_test_insert")
+        cnt = self.splice_context.df("select count(*) as cnt from splice.pysplice_test_insert").collect()[0]['CNT']
+        assert cnt == 2
+
+    def test_internalDf(self):
+        self.splice_context.executeUpdate("drop table if exists splice.pysplice_test")
+        test_internalDf_df = self.spark_session.createDataFrame([[1],[2]], "COL1 : int")
+        self.splice_context.executeUpdate("create table splice.pysplice_test ( col1 int primary key)")
+        self.splice_context.insert(test_internalDf_df,"splice.pysplice_test")
+        cnt = self.splice_context.internalDf("select count(*) as cnt from splice.pysplice_test").collect()[0]['CNT']
+        assert cnt == 2
+
+    def test_tableExists(self):
+        self.splice_context.executeUpdate("drop table if exists splice.pysplice_test")
+        self.splice_context.executeUpdate("create table pysplice_test ( COL1 int primary key)")
+        return self.splice_context.tableExists("splice.pysplice_test")
+
+    def test_truncateTable(self):
+        self.splice_context.executeUpdate("drop table if exists splice.pysplice_test")
+        test_truncateTable_df = self.spark_session.createDataFrame([[1],[2]], "COL1: int")
+        self.splice_context.executeUpdate("create table pysplice_test ( col1 int primary key)")
+        self.splice_context.insert(test_truncateTable_df,"splice.pysplice_test")
+        self.splice_context.truncateTable("splice.pysplice_test")
+        cnt = self.splice_context.df("select count(*) as cnt from splice.pysplice_test").collect()[0]['CNT']
+        assert cnt == 0
+
+    def test_update(self):
+        test_update_df = self.spark_session.createDataFrame([[1,2],[2,3]], "COL1:int,COL2:int")
+        test_update_update_df = self.spark_session.createDataFrame([[1,2],[2,4]],"COL1:int,COL2:int")
+        self.splice_context.executeUpdate("drop table if exists splice.pysplice_test_update")
+        self.splice_context.executeUpdate("create table pysplice_test_update ( COL1 int primary key,COL2 int)")
+        self.splice_context.insert(test_update_df,"splice.pysplice_test_update")
+        self.splice_context.update(test_update_update_df,"splice.pysplice_test_update")
+        cnt = self.splice_context.df("select count(*) as cnt from splice.pysplice_test_update where col2 = 4").collect()[0]["CNT"]
+        assert cnt == 1
+
+    def test_upsert(self):
+        test_upsert_df = self.spark_session.createDataFrame([[1,2],[2,3]], "COL1:int,COL2:int")
+        test_upsert_upsert_df = self.spark_session.createDataFrame([[1,2],[2,4],[3,3]],"COL1:int,COL2:int")
+        self.splice_context.executeUpdate("drop table if exists splice.pysplice_test_upsert")
+        self.splice_context.executeUpdate("create table pysplice_test_upsert ( COL1 int primary key,COL2 int)")
+        self.splice_context.insert(test_upsert_df,"splice.pysplice_test_upsert")
+        self.splice_context.upsert(test_upsert_upsert_df,"splice.pysplice_test_upsert")
+        cnt = self.splice_context.df("select count(*) as cnt from splice.pysplice_test_upsert where col2 = 2 or col1 = 3").collect()[0]["CNT"]
+        assert cnt == 2
@@ -0,0 +1,5 @@
+*.pyc
+docs/_build/
+pyspark.egg-info
+build/
+dist/
@@ -0,0 +1,24 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+global-exclude *.py[cod] __pycache__ .DS_Store
+recursive-include deps/jars *.jar
+graft deps/bin
+recursive-include deps/data *.data *.txt
+recursive-include deps/licenses *.txt
+recursive-include deps/examples *.py
+recursive-include lib *.zip
+include README.md
@@ -0,0 +1,32 @@
+# Apache Spark
+
+Spark is a fast and general cluster computing system for Big Data. It provides
+high-level APIs in Scala, Java, Python, and R, and an optimized engine that
+supports general computation graphs for data analysis. It also supports a
+rich set of higher-level tools including Spark SQL for SQL and DataFrames,
+MLlib for machine learning, GraphX for graph processing,
+and Spark Streaming for stream processing.
+
+<http://spark.apache.org/>
+
+## Online Documentation
+
+You can find the latest Spark documentation, including a programming
+guide, on the [project web page](http://spark.apache.org/documentation.html)
+
+
+## Python Packaging
+
+This README file only contains basic information related to pip installed PySpark.
+This packaging is currently experimental and may change in future versions (although we will do our best to keep compatibility).
+Using PySpark requires the Spark JARs, and if you are building this from source please see the builder instructions at
+["Building Spark"](http://spark.apache.org/docs/latest/building-spark.html).
+
+The Python packaging for Spark is not intended to replace all of the other use cases. This Python packaged version of Spark is suitable for interacting with an existing cluster (be it Spark standalone, YARN, or Mesos) - but does not contain the tools required to setup your own standalone Spark cluster. You can download the full version of Spark from the [Apache Spark downloads page](http://spark.apache.org/downloads.html).
+
+
+**NOTE:** If you are using this with a Spark standalone cluster you must ensure that the version (including minor version) matches or you may experience odd errors.
+
+## Python Requirements
+
+At its core PySpark depends on Py4J (currently version 0.10.4), but additional sub-packages have their own requirements (including numpy and pandas).
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+!coverage.py: This is a private format, don't read it directly!{"lines":{}}`
-Original file line number
+Diff line change
@@ @@ -0,0 +1,5 @@ @@
 +*.pyc
 +docs/_build/
 +pyspark.egg-info
 +build/
 +dist/