Skip to content

Commit fe0305f

Browse files
author
Ghislain Fourny
committed
More informative error message if there is interference with another installation of Spark.
1 parent e018e8c commit fe0305f

File tree

3 files changed

+33
-2
lines changed

3 files changed

+33
-2
lines changed

README.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -349,6 +349,9 @@ Even more queries can be found [here](https://colab.research.google.com/github/R
349349

350350
# Latest updates
351351

352+
## Version 0.2.0 alpha 5
353+
- If the initialization of the Spark session fails, we now check if SPARK_HOME is set and if it may be invalid or pointing to a different Spark version than 4.0, and output a more informative error message.
354+
352355
## Version 0.2.0 alpha 4
353356
- Added parameters to the jsoniq magic to select the desired output to print: -j, -df, -pdf
354357
- Added informative error message with a hint on how to fix when trying to get a DataFrame and there is no schema.

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
44

55
[project]
66
name = "jsoniq"
7-
version = "0.2.0a4"
7+
version = "0.2.0a5"
88
description = "Python edition of RumbleDB, a JSONiq engine"
99
requires-python = ">=3.11"
1010
dependencies = [

src/jsoniq/session.py

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,15 @@
1010
with pkg_resources.path("jsoniq.jars", "rumbledb-1.24.0.jar") as jar_path:
1111
jar_path_str = "file://" + str(jar_path)
1212

13+
def get_spark_version():
14+
if os.environ.get('SPARK_HOME') != None:
15+
spark_version = os.popen("spark-submit --version 2>&1").read()
16+
if "version" in spark_version:
17+
match = re.search(r'version (\d+\.\d+.\d+)', spark_version)
18+
if match:
19+
return match.group(1)
20+
return None
21+
1322
class MetaRumbleSession(type):
1423
def __getattr__(cls, item):
1524
if item == "builder":
@@ -64,7 +73,26 @@ def __init__(self):
6473

6574
def getOrCreate(self):
6675
if RumbleSession._rumbleSession is None:
67-
RumbleSession._rumbleSession = RumbleSession(self._sparkbuilder.getOrCreate())
76+
try:
77+
RumbleSession._rumbleSession = RumbleSession(self._sparkbuilder.getOrCreate())
78+
except FileNotFoundError as e:
79+
if not os.environ.get('SPARK_HOME') is None:
80+
sys.stderr.write("[Error] SPARK_HOME environment variable may not be set properly. Please check that it points to a valid path to a Spark 4.0 directory, or maybe the easiest would be to delete the environment variable SPARK_HOME completely to fall back to the installation of Spark 4.0 packaged with pyspark.\n")
81+
sys.stderr.write(f"Current value of SPARK_HOME: {os.environ.get('SPARK_HOME')}\n")
82+
sys.exit(43)
83+
else:
84+
raise e
85+
except TypeError as e:
86+
spark_version = get_spark_version()
87+
if not os.environ.get('SPARK_HOME') is None and spark_version is None:
88+
sys.stderr.write("[Error] Could not determine Spark version. The SPARK_HOME environment variable may not be set properly. Please check that it points to a valid path to a Spark 4.0 directory, or maybe the easiest would be to delete the environment variable SPARK_HOME completely to fall back to the installation of Spark 4.0 packaged with pyspark.\n")
89+
sys.stderr.write(f"Current value of SPARK_HOME: {os.environ.get('SPARK_HOME')}\n")
90+
sys.exit(43)
91+
elif not spark_version.startswith("4.0"):
92+
sys.stderr.write(f"[Error] RumbleDB requires Spark 4.0, but found version {spark_version}. Please either set SPARK_HOME to a Spark 4.0 directory, or maybe the easiest would be to delete the environment variable SPARK_HOME completely to fall back to the installation of Spark 4.0 packaged with pyspark.\n")
93+
sys.exit(43)
94+
else:
95+
raise e
6896
return RumbleSession._rumbleSession
6997

7098
def create(self):

0 commit comments

Comments
 (0)