|
10 | 10 | with pkg_resources.path("jsoniq.jars", "rumbledb-1.24.0.jar") as jar_path: |
11 | 11 | jar_path_str = "file://" + str(jar_path) |
12 | 12 |
|
| 13 | +def get_spark_version(): |
| 14 | + if os.environ.get('SPARK_HOME') != None: |
| 15 | + spark_version = os.popen("spark-submit --version 2>&1").read() |
| 16 | + if "version" in spark_version: |
| 17 | + match = re.search(r'version (\d+\.\d+.\d+)', spark_version) |
| 18 | + if match: |
| 19 | + return match.group(1) |
| 20 | + return None |
| 21 | + |
13 | 22 | class MetaRumbleSession(type): |
14 | 23 | def __getattr__(cls, item): |
15 | 24 | if item == "builder": |
@@ -64,7 +73,26 @@ def __init__(self): |
64 | 73 |
|
65 | 74 | def getOrCreate(self): |
66 | 75 | if RumbleSession._rumbleSession is None: |
67 | | - RumbleSession._rumbleSession = RumbleSession(self._sparkbuilder.getOrCreate()) |
| 76 | + try: |
| 77 | + RumbleSession._rumbleSession = RumbleSession(self._sparkbuilder.getOrCreate()) |
| 78 | + except FileNotFoundError as e: |
| 79 | + if not os.environ.get('SPARK_HOME') is None: |
| 80 | + sys.stderr.write("[Error] SPARK_HOME environment variable may not be set properly. Please check that it points to a valid path to a Spark 4.0 directory, or maybe the easiest would be to delete the environment variable SPARK_HOME completely to fall back to the installation of Spark 4.0 packaged with pyspark.\n") |
| 81 | + sys.stderr.write(f"Current value of SPARK_HOME: {os.environ.get('SPARK_HOME')}\n") |
| 82 | + sys.exit(43) |
| 83 | + else: |
| 84 | + raise e |
| 85 | + except TypeError as e: |
| 86 | + spark_version = get_spark_version() |
| 87 | + if not os.environ.get('SPARK_HOME') is None and spark_version is None: |
| 88 | + sys.stderr.write("[Error] Could not determine Spark version. The SPARK_HOME environment variable may not be set properly. Please check that it points to a valid path to a Spark 4.0 directory, or maybe the easiest would be to delete the environment variable SPARK_HOME completely to fall back to the installation of Spark 4.0 packaged with pyspark.\n") |
| 89 | + sys.stderr.write(f"Current value of SPARK_HOME: {os.environ.get('SPARK_HOME')}\n") |
| 90 | + sys.exit(43) |
| 91 | + elif not spark_version.startswith("4.0"): |
| 92 | + sys.stderr.write(f"[Error] RumbleDB requires Spark 4.0, but found version {spark_version}. Please either set SPARK_HOME to a Spark 4.0 directory, or maybe the easiest would be to delete the environment variable SPARK_HOME completely to fall back to the installation of Spark 4.0 packaged with pyspark.\n") |
| 93 | + sys.exit(43) |
| 94 | + else: |
| 95 | + raise e |
68 | 96 | return RumbleSession._rumbleSession |
69 | 97 |
|
70 | 98 | def create(self): |
|
0 commit comments