Skip to content

Commit

Permalink
Merge pull request #121 from lanl/core_terminal_refactor
Browse files Browse the repository at this point in the history
Core terminal update
  • Loading branch information
jpulidojr authored Oct 29, 2024
2 parents 697a56d + 23332e2 commit 436a49d
Show file tree
Hide file tree
Showing 15 changed files with 426 additions and 173 deletions.
2 changes: 0 additions & 2 deletions .github/workflows/test_file_reader.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,6 @@ jobs:
run: |
python -m pip install --upgrade pip
pip install -r requirements.txt
pip install pyyaml
pip install toml
pip install .
- name: Test reader
run: |
Expand Down
2 changes: 0 additions & 2 deletions .github/workflows/test_file_writer.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,6 @@ jobs:
run: |
python -m pip install --upgrade pip
pip install -r requirements.txt
pip install pyyaml
pip install toml
pip install .
- name: Test reader
run: |
Expand Down
2 changes: 0 additions & 2 deletions .github/workflows/test_plugin.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,6 @@ jobs:
python -m pip install --upgrade pip
pip install -r requirements.txt
python -m pip install opencv-python
pip install pyyaml
pip install toml
pip install .
pip install graphviz
sudo apt-get install graphviz
Expand Down
2 changes: 0 additions & 2 deletions .github/workflows/test_sqlite.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,6 @@ jobs:
run: |
python -m pip install --upgrade pip
pip install -r requirements.txt
pip install pyyaml
pip install toml
pip install .
- name: Test reader
run: |
Expand Down
134 changes: 85 additions & 49 deletions dsi/backends/sqlite.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
# Holds table name and data properties

class DataType:
name = "TABLENAME" # Note: using the word DEFAULT outputs a syntax error
name = "" # Note: using the word DEFAULT outputs a syntax error
properties = {}
units = {}

Expand All @@ -33,7 +33,7 @@ class Artifact:
An Artifact is a generic construct that defines the schema for metadata that
defines the tables inside of SQL
"""
name = "TABLENAME"
name = ""
properties = {}


Expand Down Expand Up @@ -73,7 +73,7 @@ def check_type(self, text):
# Note 1: 'add column types' to be implemented.
# Note 2: TABLENAME is the default name for all tables created which might cause issues when creating multiple Sqlite files.

def put_artifact_type(self, types, isVerbose=False):
def put_artifact_type(self, types, foreign_query = None, isVerbose=False):
"""
Primary class for defining metadata Artifact schema.
Expand All @@ -82,10 +82,17 @@ def put_artifact_type(self, types, isVerbose=False):
`return`: none
"""

col_names = ', '.join(types.properties.keys())

str_query = "CREATE TABLE IF NOT EXISTS {} ({});".format(str(types.name), col_names)
key_names = types.properties.keys()
if "_units" in types.name:
key_names = [item + " UNIQUE" for item in types.properties.keys()]

col_names = ', '.join(key_names)

str_query = "CREATE TABLE IF NOT EXISTS {} ({}".format(str(types.name), col_names)

if foreign_query != None:
str_query += foreign_query
str_query += ");"

if isVerbose:
print(str_query)
Expand Down Expand Up @@ -124,42 +131,62 @@ def put_artifacts(self, collection, isVerbose=False):
"""
# Core compatibility name assignment
artifacts = collection

types = DataType()
types.properties = {}

# Check if this has been defined from helper function
if self.types != None:
types.name = self.types.name

for key in artifacts:
types.properties[key.replace('-','_minus_')] = artifacts[key]

self.put_artifact_type(types)

col_names = ', '.join(types.properties.keys())
placeholders = ', '.join('?' * len(types.properties))

str_query = "INSERT INTO {} ({}) VALUES ({});".format(str(types.name), col_names, placeholders)
for tableName, tableData in artifacts.items():
if "dsi_relations" in tableName:
continue

types = DataType()
types.properties = {}

# Check if this has been defined from helper function
'''if self.types != None:
types.name = self.types.name'''
types.name = tableName

foreign_query = ""
for key in tableData:
comboTuple = (tableName, key)
dsi_name = tableName[:tableName.find("__")] + "__dsi_relations"
if dsi_name in artifacts.keys() and comboTuple in artifacts[dsi_name]["primary_key"]:
key += " PRIMARY KEY"
if dsi_name in artifacts.keys() and comboTuple in artifacts[dsi_name]["foreign_key"]:
foreignIndex = artifacts[dsi_name]["foreign_key"].index(comboTuple)
foreign_query += f", FOREIGN KEY ({key}) REFERENCES {artifacts[dsi_name]['primary_key'][foreignIndex][0]} ({artifacts[dsi_name]['primary_key'][foreignIndex][1]})"

types.properties[key.replace('-','_minus_')] = tableData[key]

if foreign_query != "":
self.put_artifact_type(types, foreign_query)
else:
self.put_artifact_type(types)

# col_list helps access the specific keys of the dictionary in the for loop
col_list = col_names.split(', ')
col_names = ', '.join(types.properties.keys())
placeholders = ', '.join('?' * len(types.properties))

# loop through the contents of each column and insert into table as a row
for ind1 in range(len(types.properties[col_list[0]])):
vals = []
for ind2 in range(len(types.properties.keys())):
vals.append(str(types.properties[col_list[ind2]][ind1]))
# Make sure this works if types.properties[][] is already a string
tup_vals = tuple(vals)
self.cur.execute(str_query,tup_vals)
if "_units" in tableName:
str_query = "INSERT OR IGNORE INTO {} ({}) VALUES ({});".format(str(types.name), col_names, placeholders)
else:
str_query = "INSERT INTO {} ({}) VALUES ({});".format(str(types.name), col_names, placeholders)

if isVerbose:
print(str_query)
# col_list helps access the specific keys of the dictionary in the for loop
col_list = col_names.split(', ')

self.con.commit()

self.types = types
# loop through the contents of each column and insert into table as a row
for ind1 in range(len(types.properties[col_list[0]])):
vals = []
for ind2 in range(len(types.properties.keys())):
vals.append(str(types.properties[col_list[ind2]][ind1]))
# Make sure this works if types.properties[][] is already a string
tup_vals = tuple(vals)
self.cur.execute(str_query,tup_vals)

if isVerbose:
print(str_query)

self.con.commit()

self.types = types #This will only copy the last collection from artifacts (collections input)

def put_artifacts_only(self, artifacts, isVerbose=False):
"""
Expand Down Expand Up @@ -337,13 +364,13 @@ def put_artifacts_csv(self, fname, tname, isVerbose=False):
#[END NOTE 2]

# Returns text list from query
def get_artifact_list(self, isVerbose=False):
def get_artifact_list(self, query, isVerbose=False):
"""
Function that returns a list of all of the Artifact names (represented as sql tables)
`return`: list of Artifact names
"""
str_query = "SELECT name FROM sqlite_master WHERE type='table';"
str_query = query
if isVerbose:
print(str_query)

Expand All @@ -357,8 +384,8 @@ def get_artifact_list(self, isVerbose=False):
return resout

# Returns reference from query
def get_artifacts(self, query):
self.get_artifacts_list()
def get_artifacts(self, query, isVerbose=False):
self.get_artifact_list(query, isVerbose)

# Closes connection to server
def close(self):
Expand Down Expand Up @@ -577,6 +604,7 @@ def yamlToSqlite(self, filenames, db_name, deleteSql=True):
`deleteSql`: flag to delete temp SQL file that creates the database. Default is True, but change to False for testing or comparing outputs
"""

sql_statements = []
if isinstance(filenames, str):
filenames = [filenames]

Expand All @@ -600,9 +628,15 @@ def yamlToSqlite(self, filenames, db_name, deleteSql=True):
createStmt += f"{key} {data_types[type(val)]}, "
insertUnitStmt+= "NULL, "

sql_file.write(createStmt[:-2] + ");\n\n")
sql_file.write(createUnitStmt[:-2] + ");\n\n")
sql_file.write(insertUnitStmt[:-2] + ");\n\n")
if createStmt not in sql_statements:
sql_statements.append(createStmt)
sql_file.write(createStmt[:-2] + ");\n\n")
if createUnitStmt not in sql_statements:
sql_statements.append(createUnitStmt)
sql_file.write(createUnitStmt[:-2] + ");\n\n")
if insertUnitStmt not in sql_statements:
sql_statements.append(insertUnitStmt)
sql_file.write(insertUnitStmt[:-2] + ");\n\n")

insertStmt = f"INSERT INTO {tableName} VALUES( "
for val in table['columns'].values():
Expand All @@ -613,12 +647,14 @@ def yamlToSqlite(self, filenames, db_name, deleteSql=True):
else:
insertStmt+= f"{val}, "

sql_file.write(insertStmt[:-2] + ");\n\n")
if insertStmt not in sql_statements:
sql_statements.append(insertStmt)
sql_file.write(insertStmt[:-2] + ");\n\n")

subprocess.run(["sqlite3", db_name+".db"], stdin= open(db_name+".sql", "r"))
subprocess.run(["sqlite3", db_name+".db"], stdin= open(db_name+".sql", "r"))

if deleteSql == True:
os.remove(db_name+".sql")
if deleteSql == True:
os.remove(db_name+".sql")

def tomlDataToList(self, filenames):
"""
Expand Down
45 changes: 14 additions & 31 deletions dsi/backends/tests/test_sqlite.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def test_wildfire_data_csv_artifact():
assert True

def test_wildfiredata_artifact_put():
valid_middleware_datastructure = OrderedDict({'foo':[1,2,3],'bar':[3,2,1]})
valid_middleware_datastructure = OrderedDict({"wildfire": OrderedDict({'foo':[1,2,3],'bar':[3,2,1]})})
dbpath = 'test_wildfiredata_artifact.sqlite_data'
store = Sqlite(dbpath)
store.put_artifacts(valid_middleware_datastructure)
Expand All @@ -44,7 +44,7 @@ def test_wildfiredata_artifact_put_t():
valid_middleware_datastructure = OrderedDict({'foo':[1,2,3],'bar':[3,2,1]})
dbpath = 'test_wildfiredata_artifact.sqlite_data'
store = Sqlite(dbpath)
store.put_artifacts_t(valid_middleware_datastructure, tableName="Wildfire")
store.put_artifacts_t(OrderedDict([("wildfire", valid_middleware_datastructure)]), tableName="Wildfire")
store.close()
# No error implies success
assert True
Expand All @@ -69,32 +69,15 @@ def test_yosemite_data_csv_artifact():
assert True


def test_artifact_query():
dbpath = "wildfire.db"
store = Sqlite(dbpath)
_ = store.get_artifact_list(isVerbose=isVerbose)
data_type = DataType()
data_type.name = "simulation"
result = store.sqlquery("SELECT *, MAX(wind_speed) AS max_windspeed FROM " +
str(data_type.name) + " GROUP BY safe_unsafe_fire_behavior")
store.export_csv(result, "TABLENAME", "query.csv")
store.close()
# No error implies success
assert True


def test_yaml_reader():
reader = Sqlite("yaml-test.db")
reader.yamlToSqlite(["examples/data/schema.yml", "examples/data/schema2.yml"], "yaml-test", deleteSql=False)
subprocess.run(["diff", "examples/data/compare-schema.sql", "yaml-test.sql"], stdout=open("compare_sql.txt", "w"))
file_size = os.path.getsize("compare_sql.txt")

assert file_size == 0 #difference between sql files should be 0 characters

def test_toml_reader():
reader = Sqlite("toml-test.db")
reader.tomlToSqlite(["examples/data/schema.toml", "examples/data/schema2.toml"], "toml-test", deleteSql=False)
subprocess.run(["diff", "examples/data/compare-schema.sql", "toml-test.sql"], stdout=open("compare_sql.txt", "w"))
file_size = os.path.getsize("compare_sql.txt")

assert file_size == 0 #difference between sql files should be 0 characters
# def test_artifact_query():
# dbpath = "wildfire.db"
# store = Sqlite(dbpath)
# _ = store.get_artifact_list(isVerbose=isVerbose)
# data_type = DataType()
# data_type.name = "simulation"
# result = store.sqlquery("SELECT *, MAX(wind_speed) AS max_windspeed FROM " +
# str(data_type.name) + " GROUP BY safe_unsafe_fire_behavior")
# store.export_csv(result, "TABLENAME", "query.csv")
# store.close()
# # No error implies success
# assert True
18 changes: 12 additions & 6 deletions dsi/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@ class Terminal():
BACKEND_PREFIX = ['dsi.backends']
BACKEND_IMPLEMENTATIONS = ['gufi', 'sqlite', 'parquet']
PLUGIN_PREFIX = ['dsi.plugins']
PLUGIN_IMPLEMENTATIONS = ['env', 'file_reader']
VALID_PLUGINS = ['Hostname', 'SystemKernel', 'GitInfo', 'Bueno', 'Csv']
PLUGIN_IMPLEMENTATIONS = ['env', 'file_reader', 'file_writer']
VALID_PLUGINS = ['Hostname', 'SystemKernel', 'GitInfo', 'Bueno', 'Csv', 'ER_Diagram', 'YAML', 'TOML', "Table_Plot"]
VALID_BACKENDS = ['Gufi', 'Sqlite', 'Parquet']
VALID_MODULES = VALID_PLUGINS + VALID_BACKENDS
VALID_MODULE_FUNCTIONS = {'plugin': [
Expand Down Expand Up @@ -166,20 +166,25 @@ def transload(self, **kwargs):
data sources to a single DSI Core Middleware data structure.
"""
selected_function_modules = dict(
(k, self.active_modules[k]) for k in ('writer', 'reader'))
(k, self.active_modules[k]) for k in ('reader', 'writer'))
# Note this transload supports plugin.env Environment types now.
for module_type, objs in selected_function_modules.items():
for obj in objs:
obj.add_rows(**kwargs)
for col_name, col_metadata in obj.output_collector.items():
self.active_metadata[col_name] = col_metadata
if module_type == "reader":
obj.add_rows(**kwargs)
for table_name, table_metadata in obj.output_collector.items():
self.active_metadata[table_name] = table_metadata
elif module_type == "writer":
obj.get_rows(self.active_metadata, **kwargs)

# Plugins may add one or more rows (vector vs matrix data).
# You may have two or more plugins with different numbers of rows.
# Consequently, transload operations may have unstructured shape for
# some plugin configurations. We must force structure to create a valid
# middleware data structure.
# To resolve, we pad the shorter columns to match the max length column.
#COMMENTED OUT TILL LATER
'''
max_len = max([len(col) for col in self.active_metadata.values()])
for colname, coldata in self.active_metadata.items():
if len(coldata) != max_len:
Expand All @@ -188,6 +193,7 @@ def transload(self, **kwargs):
assert all([len(col) == max_len for col in self.active_metadata.values(
)]), "All columns must have the same number of rows"
'''

self.transload_lock = True

Expand Down
Loading

0 comments on commit 436a49d

Please sign in to comment.