From 591bb9bd57815771d447723f8bbdd3b11fa573b3 Mon Sep 17 00:00:00 2001 From: Nicolas Laval Date: Tue, 12 Nov 2024 09:52:43 +0100 Subject: [PATCH] Improve Prov module (#370) * Start prov improvements * Create VtlTypes.java * Init runWithBindings implementation * Improve provenance --- vtl-prov/pom.xml | 28 ++++ .../fr/insee/vtl/prov/ProvenanceListener.java | 142 +++++++++++++++--- .../vtl/prov/prov/DataframeInstance.java | 15 ++ .../java/fr/insee/vtl/prov/prov/Program.java | 23 ++- .../fr/insee/vtl/prov/prov/ProgramStep.java | 26 ++-- .../insee/vtl/prov/prov/VariableInstance.java | 37 +++++ .../vtl/prov/{ => utils}/ProvenanceUtils.java | 28 ++-- .../insee/vtl/prov/{ => utils}/RDFUtils.java | 24 ++- .../fr/insee/vtl/prov/utils/VtlTypes.java | 34 +++++ .../test/java/fr/insee/vtl/prov/RDFTest.java | 56 +++++++ 10 files changed, 367 insertions(+), 46 deletions(-) rename vtl-prov/src/main/java/fr/insee/vtl/prov/{ => utils}/ProvenanceUtils.java (68%) rename vtl-prov/src/main/java/fr/insee/vtl/prov/{ => utils}/RDFUtils.java (85%) create mode 100644 vtl-prov/src/main/java/fr/insee/vtl/prov/utils/VtlTypes.java diff --git a/vtl-prov/pom.xml b/vtl-prov/pom.xml index 5e74fa512..82db9daba 100644 --- a/vtl-prov/pom.xml +++ b/vtl-prov/pom.xml @@ -34,6 +34,21 @@ vtl-parser 1.8.0-SNAPSHOT + + fr.insee.trevas + vtl-engine + 1.8.0-SNAPSHOT + + + fr.insee.trevas + vtl-spark + 1.8.0-SNAPSHOT + + + fr.insee.trevas + vtl-model + 1.8.0-SNAPSHOT + @@ -45,4 +60,17 @@ + + + + org.apache.maven.plugins + maven-surefire-plugin + 3.3.0 + + --add-exports java.base/sun.nio.ch=ALL-UNNAMED + + + + + \ No newline at end of file diff --git a/vtl-prov/src/main/java/fr/insee/vtl/prov/ProvenanceListener.java b/vtl-prov/src/main/java/fr/insee/vtl/prov/ProvenanceListener.java index c44fdc8e5..f4c42df53 100644 --- a/vtl-prov/src/main/java/fr/insee/vtl/prov/ProvenanceListener.java +++ b/vtl-prov/src/main/java/fr/insee/vtl/prov/ProvenanceListener.java @@ -1,5 +1,6 @@ package fr.insee.vtl.prov; +import fr.insee.vtl.model.Dataset; import fr.insee.vtl.parser.VtlBaseListener; import fr.insee.vtl.parser.VtlLexer; import fr.insee.vtl.parser.VtlParser; @@ -7,11 +8,15 @@ import fr.insee.vtl.prov.prov.Program; import fr.insee.vtl.prov.prov.ProgramStep; import fr.insee.vtl.prov.prov.VariableInstance; +import fr.insee.vtl.prov.utils.ProvenanceUtils; import org.antlr.v4.runtime.*; import org.antlr.v4.runtime.misc.Interval; import org.antlr.v4.runtime.tree.ParseTreeWalker; -import java.util.Set; +import javax.script.ScriptEngine; +import javax.script.ScriptException; +import java.util.*; +import java.util.concurrent.atomic.AtomicInteger; /** * ANTLR Listener that create provenance objects. @@ -26,6 +31,22 @@ public class ProvenanceListener extends VtlBaseListener { private String currentComponentID; + private String currentDataframeID; + + private int stepIndex = 1; + + private boolean rootAssignment = true; + + // Map of label/UUID id + private final Map availableDataframeUUID = new HashMap<>(); + + private Map currentAvailableDataframeUUID = new HashMap<>(); + + // Map of label/UUID id + private final Map availableVariableUUID = new HashMap<>(); + + private Map currentAvailableVariableUUID = new HashMap<>(); + public ProvenanceListener(String id, String programName) { program.setId(id); program.setLabel(programName); @@ -44,11 +65,14 @@ public void enterStart(VtlParser.StartContext ctx) { @Override public void enterTemporaryAssignment(VtlParser.TemporaryAssignmentContext ctx) { - String id = getText(ctx.varID()); + String label = getText(ctx.varID()); String sourceCode = getText(ctx); - currentProgramStep = id; - ProgramStep programStep = new ProgramStep(id, id, sourceCode); - DataframeInstance df = new DataframeInstance(id, id); + currentProgramStep = label; + ProgramStep programStep = new ProgramStep(label, sourceCode, stepIndex); + stepIndex++; + String dfId = UUID.randomUUID().toString(); + currentAvailableDataframeUUID.put(label, dfId); + DataframeInstance df = new DataframeInstance(dfId, label); programStep.setProducedDataframe(df); program.getProgramSteps().add(programStep); } @@ -56,15 +80,21 @@ public void enterTemporaryAssignment(VtlParser.TemporaryAssignmentContext ctx) { @Override public void exitTemporaryAssignment(VtlParser.TemporaryAssignmentContext ctx) { currentProgramStep = null; + availableDataframeUUID.putAll(currentAvailableDataframeUUID); + currentAvailableDataframeUUID = new HashMap<>(); + rootAssignment = true; } @Override public void enterPersistAssignment(VtlParser.PersistAssignmentContext ctx) { - String id = getText(ctx.varID()); + String label = getText(ctx.varID()); String sourceCode = getText(ctx); - currentProgramStep = id; - ProgramStep programStep = new ProgramStep(id, id, sourceCode); - DataframeInstance df = new DataframeInstance(id, id); + currentProgramStep = label; + ProgramStep programStep = new ProgramStep(label, sourceCode, stepIndex); + stepIndex++; + String dfId = UUID.randomUUID().toString(); + currentAvailableDataframeUUID.put(label, dfId); + DataframeInstance df = new DataframeInstance(dfId, label); programStep.setProducedDataframe(df); program.getProgramSteps().add(programStep); } @@ -72,23 +102,33 @@ public void enterPersistAssignment(VtlParser.PersistAssignmentContext ctx) { @Override public void exitPersistAssignment(VtlParser.PersistAssignmentContext ctx) { currentProgramStep = null; + availableDataframeUUID.putAll(currentAvailableDataframeUUID); + currentAvailableDataframeUUID = new HashMap<>(); + rootAssignment = true; } @Override public void enterVarID(VtlParser.VarIDContext ctx) { - String id = ctx.IDENTIFIER().getText(); - if (!id.equals(currentProgramStep)) { - ProgramStep programStep = program.getProgramStepById(currentProgramStep); + String label = ctx.IDENTIFIER().getText(); + if (!rootAssignment) { + ProgramStep programStep = program.getProgramStepByLabel(currentProgramStep); if (!isInDatasetClause) { Set consumedDataframe = programStep.getConsumedDataframe(); - DataframeInstance df = new DataframeInstance(id, id); + String dfId = ProvenanceUtils.getOrBuildUUID(availableDataframeUUID, label); + DataframeInstance df = new DataframeInstance(dfId, label); consumedDataframe.add(df); + // Certainly don't need to reset? To check! + currentDataframeID = label; } if (isInDatasetClause && null != currentComponentID) { Set usedVariables = programStep.getUsedVariables(); - VariableInstance v = new VariableInstance(id, id); + String varUUID = ProvenanceUtils.getOrBuildUUID(availableVariableUUID, currentDataframeID + "|" + label); + VariableInstance v = new VariableInstance(varUUID, label); + v.setParentDataframe(currentDataframeID); usedVariables.add(v); } + } else { + rootAssignment = false; } } @@ -104,11 +144,18 @@ public void exitDatasetClause(VtlParser.DatasetClauseContext ctx) { @Override public void enterComponentID(VtlParser.ComponentIDContext ctx) { - String id = ctx.getText(); - ProgramStep programStep = program.getProgramStepById(currentProgramStep); + String label = ctx.getText(); + ProgramStep programStep = program.getProgramStepByLabel(currentProgramStep); Set assignedVariables = programStep.getAssignedVariables(); - VariableInstance v = new VariableInstance(id, id); + String variableUUID = ProvenanceUtils.getOrBuildUUID(availableDataframeUUID, label); + VariableInstance v = new VariableInstance(variableUUID, label); assignedVariables.add(v); + currentAvailableVariableUUID.put(currentDataframeID + "|" + label, variableUUID); + } + + @Override + public void exitComponentID(VtlParser.ComponentIDContext ctx) { + System.out.println(ctx.IDENTIFIER()); } @Override @@ -119,6 +166,8 @@ public void enterCalcClauseItem(VtlParser.CalcClauseItemContext ctx) { @Override public void exitCalcClauseItem(VtlParser.CalcClauseItemContext ctx) { currentComponentID = null; + availableVariableUUID.putAll(currentAvailableVariableUUID); + currentAvailableVariableUUID = new HashMap<>(); } @Override @@ -148,4 +197,63 @@ public static Program run(String expr, String id, String programName) { return provenanceListener.getProgram(); } + public static Program runWithBindings(ScriptEngine engine, String expr, String id, String programName) { + Program program = run(expr, id, programName); + // 0 check if input dataset are empty? + // Keep already handled dataset + List dsHandled = new ArrayList<>(); + // Split script to loop over program steps and run them + AtomicInteger index = new AtomicInteger(1); + Arrays.stream(expr.split(";")) + .map(e -> e + ";") + .forEach(stepScript -> { + int i = index.getAndIncrement(); + // 1 - Handle input dataset + ProgramStep step = program.getProgramStepByIndex(i); + Set consumedDataframe = step.getConsumedDataframe(); + consumedDataframe.forEach(d -> { + if (!dsHandled.contains(d.getLabel())) { + Dataset ds = (Dataset) engine.getContext().getAttribute(d.getLabel()); + ds.getDataStructure().values().forEach(c -> { + VariableInstance variableInstance = step.getUsedVariables() + .stream() + .filter(v -> + v.getParentDataframe().equals(d.getLabel()) && + v.getLabel().equals(c.getName())) + .findFirst() + .orElse(new VariableInstance(c.getName())); + variableInstance.setRole(c.getRole()); + variableInstance.setType(c.getType()); + d.getHasVariableInstances().add(variableInstance); + }); + } + }); + try { + engine.eval(stepScript); + } catch (ScriptException e) { + throw new RuntimeException(e); + } + // Improve built variables attributes + DataframeInstance producedDataframe = step.getProducedDataframe(); + Dataset ds = (Dataset) engine.getContext().getAttribute(producedDataframe.getLabel()); + + ds.getDataStructure().values().forEach(c -> { + VariableInstance variableInstance = step.getAssignedVariables() + .stream() + .filter(v -> v.getLabel().equals(c.getName())) + .findFirst() + // TODO: refine variable detection in usedVariable + .orElse(new VariableInstance(c.getName())); + variableInstance.setRole(c.getRole()); + variableInstance.setType(c.getType()); + producedDataframe.getHasVariableInstances().add(variableInstance); + }); + dsHandled.add((producedDataframe.getLabel())); + // Correct usedVariables ID checking ds/var of last assignment + }); + + + return program; + } + } diff --git a/vtl-prov/src/main/java/fr/insee/vtl/prov/prov/DataframeInstance.java b/vtl-prov/src/main/java/fr/insee/vtl/prov/prov/DataframeInstance.java index 2860dc6e4..c25c4218b 100644 --- a/vtl-prov/src/main/java/fr/insee/vtl/prov/prov/DataframeInstance.java +++ b/vtl-prov/src/main/java/fr/insee/vtl/prov/prov/DataframeInstance.java @@ -1,9 +1,14 @@ package fr.insee.vtl.prov.prov; +import java.util.HashSet; +import java.util.Set; + public class DataframeInstance { String id; String label; + Set hasVariableInstances = new HashSet<>(); + public DataframeInstance(String id, String label) { this.id = id; this.label = label; @@ -24,4 +29,14 @@ public String getLabel() { public void setLabel(String label) { this.label = label; } + + public Set getHasVariableInstances() { + return hasVariableInstances; + } + + public void setHasVariableInstances(Set hasVariableInstances) { + this.hasVariableInstances = hasVariableInstances; + } + + } diff --git a/vtl-prov/src/main/java/fr/insee/vtl/prov/prov/Program.java b/vtl-prov/src/main/java/fr/insee/vtl/prov/prov/Program.java index ad11c7204..50a7571b5 100644 --- a/vtl-prov/src/main/java/fr/insee/vtl/prov/prov/Program.java +++ b/vtl-prov/src/main/java/fr/insee/vtl/prov/prov/Program.java @@ -3,12 +3,16 @@ import java.util.HashSet; import java.util.Set; +/* Filled thanks to listener, except for dataframInstances */ public class Program { String id; String label; Set programSteps = new HashSet<>(); + /* Provided running preview mode */ + Set dataframeInstances = new HashSet<>(); + String sourceCode; public Program() { @@ -43,6 +47,14 @@ public void setProgramSteps(Set programSteps) { this.programSteps = programSteps; } + public Set getDataframeInstances() { + return dataframeInstances; + } + + public void setDataframeInstances(Set dataframeInstances) { + this.dataframeInstances = dataframeInstances; + } + public String getSourceCode() { return sourceCode; } @@ -51,9 +63,16 @@ public void setSourceCode(String sourceCode) { this.sourceCode = sourceCode; } - public ProgramStep getProgramStepById(String id) { + public ProgramStep getProgramStepByLabel(String label) { + return programSteps.stream() + .filter(p -> p.getLabel().equals(label)) + .findFirst() + .orElse(null); + } + + public ProgramStep getProgramStepByIndex(int index) { return programSteps.stream() - .filter(p -> p.getId().equals(id)) + .filter(p -> p.getIndex() == index) .findFirst() .orElse(null); } diff --git a/vtl-prov/src/main/java/fr/insee/vtl/prov/prov/ProgramStep.java b/vtl-prov/src/main/java/fr/insee/vtl/prov/prov/ProgramStep.java index 421999d72..8f2cad975 100644 --- a/vtl-prov/src/main/java/fr/insee/vtl/prov/prov/ProgramStep.java +++ b/vtl-prov/src/main/java/fr/insee/vtl/prov/prov/ProgramStep.java @@ -2,26 +2,24 @@ import java.util.HashSet; import java.util.Set; +import java.util.UUID; public class ProgramStep { String id; String label; String sourceCode; - Set usedVariables = new HashSet<>();; - Set assignedVariables = new HashSet<>();; - - Set consumedDataframe = new HashSet<>();; + int index; + Set usedVariables = new HashSet<>(); + Set assignedVariables = new HashSet<>(); + Set consumedDataframe = new HashSet<>(); DataframeInstance producedDataframe; - - public ProgramStep() { - } - - public ProgramStep(String id, String label, String sourceCode) { - this.id = id; + public ProgramStep(String label, String sourceCode, int index) { + this.id = UUID.randomUUID().toString(); this.label = label; this.sourceCode = sourceCode; + this.index = index; } public String getId() { @@ -44,6 +42,14 @@ public String getSourceCode() { return sourceCode; } + public int getIndex() { + return index; + } + + public void setIndex(int index) { + this.index = index; + } + public void setSourceCode(String sourceCode) { this.sourceCode = sourceCode; } diff --git a/vtl-prov/src/main/java/fr/insee/vtl/prov/prov/VariableInstance.java b/vtl-prov/src/main/java/fr/insee/vtl/prov/prov/VariableInstance.java index b7cc00707..92e150f6d 100644 --- a/vtl-prov/src/main/java/fr/insee/vtl/prov/prov/VariableInstance.java +++ b/vtl-prov/src/main/java/fr/insee/vtl/prov/prov/VariableInstance.java @@ -1,8 +1,20 @@ package fr.insee.vtl.prov.prov; +import fr.insee.vtl.model.Dataset; + +import java.util.UUID; + public class VariableInstance { String id; String label; + Dataset.Role role; + String parentDataframe; + Class type; + + public VariableInstance(String label) { + this.id = UUID.randomUUID().toString(); + this.label = label; + } public VariableInstance(String id, String label) { this.id = id; @@ -24,4 +36,29 @@ public String getLabel() { public void setLabel(String label) { this.label = label; } + + public Dataset.Role getRole() { + return role; + } + + public void setRole(Dataset.Role role) { + this.role = role; + } + + public Class getType() { + return type; + } + + public void setType(Class type) { + this.type = type; + } + + + public String getParentDataframe() { + return parentDataframe; + } + + public void setParentDataframe(String parentDataframe) { + this.parentDataframe = parentDataframe; + } } diff --git a/vtl-prov/src/main/java/fr/insee/vtl/prov/ProvenanceUtils.java b/vtl-prov/src/main/java/fr/insee/vtl/prov/utils/ProvenanceUtils.java similarity index 68% rename from vtl-prov/src/main/java/fr/insee/vtl/prov/ProvenanceUtils.java rename to vtl-prov/src/main/java/fr/insee/vtl/prov/utils/ProvenanceUtils.java index 80651142c..1a6b624bb 100644 --- a/vtl-prov/src/main/java/fr/insee/vtl/prov/ProvenanceUtils.java +++ b/vtl-prov/src/main/java/fr/insee/vtl/prov/utils/ProvenanceUtils.java @@ -1,17 +1,17 @@ -package fr.insee.vtl.prov; +package fr.insee.vtl.prov.utils; -import fr.insee.vtl.parser.VtlLexer; -import fr.insee.vtl.parser.VtlParser; -import fr.insee.vtl.prov.model.VTLDataset; -import org.antlr.v4.runtime.CharStreams; -import org.antlr.v4.runtime.CodePointCharStream; -import org.antlr.v4.runtime.CommonTokenStream; -import org.antlr.v4.runtime.tree.ParseTreeWalker; - -import java.util.*; +import java.util.Map; +import java.util.UUID; public class ProvenanceUtils { + public static String getOrBuildUUID(Map availableUUID, String label) { + if (null != availableUUID.get(label)) { + return availableUUID.get(label); + } + return UUID.randomUUID().toString(); + } + //public static List toBusinessModel(ProvenanceListener listener) { // // TODO: @nico te graph needs to be refactored. I'll try to fix it before monday. // @@ -25,12 +25,12 @@ public class ProvenanceUtils { // }); // return model; //} -// + // //public static void toJSON(ProvenanceListener.Node node) { -// + // //} -// + // //public static void toRDF(ProvenanceListener.Node node) { -// + // //} } diff --git a/vtl-prov/src/main/java/fr/insee/vtl/prov/RDFUtils.java b/vtl-prov/src/main/java/fr/insee/vtl/prov/utils/RDFUtils.java similarity index 85% rename from vtl-prov/src/main/java/fr/insee/vtl/prov/RDFUtils.java rename to vtl-prov/src/main/java/fr/insee/vtl/prov/utils/RDFUtils.java index 26fb91409..eede0af40 100644 --- a/vtl-prov/src/main/java/fr/insee/vtl/prov/RDFUtils.java +++ b/vtl-prov/src/main/java/fr/insee/vtl/prov/utils/RDFUtils.java @@ -1,10 +1,9 @@ -package fr.insee.vtl.prov; +package fr.insee.vtl.prov.utils; import fr.insee.vtl.prov.prov.DataframeInstance; import fr.insee.vtl.prov.prov.Program; import fr.insee.vtl.prov.prov.ProgramStep; import fr.insee.vtl.prov.prov.VariableInstance; -import fr.insee.vtl.prov.utils.PROV; import org.apache.jena.rdf.model.Model; import org.apache.jena.rdf.model.ModelFactory; import org.apache.jena.rdf.model.Property; @@ -13,6 +12,7 @@ import org.apache.jena.rdfconnection.RDFConnectionFactory; import org.apache.jena.vocabulary.RDF; import org.apache.jena.vocabulary.RDFS; +import org.apache.jena.vocabulary.XSD; import java.io.IOException; import java.io.StringWriter; @@ -60,7 +60,7 @@ public static void handleProgramStep(Model model, ProgramStep programStep) { Resource programStepURI = model.createResource(TREVAS_BASE_URI + "program-step/" + id); Resource SDTH_PROGRAM_STEP = model.createResource(SDTH_BASE_URI + "ProgramStep"); programStepURI.addProperty(RDF.type, SDTH_PROGRAM_STEP); - programStepURI.addProperty(RDFS.label, "Create " + id + " dataset"); + programStepURI.addProperty(RDFS.label, "Step " + programStep.getIndex()); String sourceCode = programStep.getSourceCode(); Property SDTH_HAS_SOURCE_CODE = model.createProperty(SDTH_BASE_URI + "hasSourceCode"); programStepURI.addProperty(SDTH_HAS_SOURCE_CODE, sourceCode); @@ -102,6 +102,12 @@ public static void handleDataframeInstance(Model model, DataframeInstance dfInst dfURI.addProperty(RDF.type, SDTH_DATAFRAME); String label = dfInstance.getLabel(); dfURI.addProperty(RDFS.label, label); + Property SDTH_USED_VARIABLE = model.createProperty(SDTH_BASE_URI + "hasVariableInstance"); + dfInstance.getHasVariableInstances().forEach(v -> { + Resource varAssignedURI = model.createResource(TREVAS_BASE_URI + "variable/" + v.getId()); + dfURI.addProperty(SDTH_USED_VARIABLE, varAssignedURI); + handleVariableInstance(model, v); + }); } public static void handleVariableInstance(Model model, VariableInstance varInstance) { @@ -112,6 +118,18 @@ public static void handleVariableInstance(Model model, VariableInstance varInsta varURI.addProperty(RDF.type, SDTH_VARIABLE); String label = varInstance.getLabel(); varURI.addProperty(RDFS.label, label); + if (null != varInstance.getRole()) { + String role = varInstance.getRole().toString(); + // TO EXTRACT + Property hasRole = model.createProperty("http://id.making-sense.info/vtl/component/hasRole"); + varURI.addProperty(hasRole, role); + } + if (null != varInstance.getType()) { + Class type = varInstance.getType(); + // TO EXTRACT + Property hasType = model.createProperty("http://id.making-sense.info/vtl/component/hasType"); + varURI.addProperty(hasType, VtlTypes.getVtlType(type)); + } } public static Model initModel(String baseFilePath) { diff --git a/vtl-prov/src/main/java/fr/insee/vtl/prov/utils/VtlTypes.java b/vtl-prov/src/main/java/fr/insee/vtl/prov/utils/VtlTypes.java new file mode 100644 index 000000000..463b5d167 --- /dev/null +++ b/vtl-prov/src/main/java/fr/insee/vtl/prov/utils/VtlTypes.java @@ -0,0 +1,34 @@ +package fr.insee.vtl.prov.utils; + +import org.threeten.extra.Interval; +import org.threeten.extra.PeriodDuration; + +import java.time.Instant; + +public class VtlTypes { + + /** + * Method to map Java classes and VTL basic scalar types. + * + * @param clazz Java type. + * @return Basic scalar type + */ + public static String getVtlType(Class clazz) { + if (clazz.equals(String.class)) { + return "STRING"; + } else if (clazz.equals(Long.class)) { + return "INTEGER"; + } else if (clazz.equals(Double.class)) { + return "NUMBER"; + } else if (clazz.equals(Boolean.class)) { + return "BOOLEAN"; + } else if (clazz.equals(Instant.class)) { + return "DATE"; + } else if (clazz.equals(PeriodDuration.class)) { + return "DURATION"; + } else if (clazz.equals(Interval.class)) { + return "TIME_PERIOD"; + } + throw new UnsupportedOperationException("class " + clazz + " unsupported"); + } +} diff --git a/vtl-prov/src/test/java/fr/insee/vtl/prov/RDFTest.java b/vtl-prov/src/test/java/fr/insee/vtl/prov/RDFTest.java index 315071592..a999b29dc 100644 --- a/vtl-prov/src/test/java/fr/insee/vtl/prov/RDFTest.java +++ b/vtl-prov/src/test/java/fr/insee/vtl/prov/RDFTest.java @@ -1,11 +1,19 @@ package fr.insee.vtl.prov; +import fr.insee.vtl.engine.VtlScriptEngine; +import fr.insee.vtl.model.Dataset; +import fr.insee.vtl.model.InMemoryDataset; +import fr.insee.vtl.model.utils.Java8Helpers; import fr.insee.vtl.prov.prov.Program; import fr.insee.vtl.prov.utils.PropertiesLoader; +import fr.insee.vtl.prov.utils.RDFUtils; import org.apache.jena.rdf.model.Model; +import org.apache.spark.sql.SparkSession; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; +import javax.script.ScriptEngine; +import javax.script.ScriptEngineManager; import java.io.File; import java.io.IOException; import java.util.Properties; @@ -70,6 +78,54 @@ public void simpleTest() throws IOException { assertThat(program.getProgramSteps()).hasSize(3); } + @Test + public void simpleTestWithBindings() throws IOException { + SparkSession spark = SparkSession.builder() + .appName("test") + .master("local") + .getOrCreate(); + + ScriptEngineManager mgr = new ScriptEngineManager(); + ScriptEngine engine = mgr.getEngineByExtension("vtl"); + engine.put(VtlScriptEngine.PROCESSING_ENGINE_NAMES, "spark"); + + InMemoryDataset ds1 = new InMemoryDataset( + Java8Helpers.listOf( + Java8Helpers.mapOf("id1", "A", "var1", 0L, "var2", 100L), + Java8Helpers.mapOf("id1", "B", "var1", 1L, "var2", 200L), + Java8Helpers.mapOf("id1", "C", "var1", 2L, "var2", 300L) + ), + Java8Helpers.mapOf("id1", String.class, "var1", Long.class, "var2", Long.class), + Java8Helpers.mapOf("id1", Dataset.Role.IDENTIFIER, "var1", Dataset.Role.MEASURE, "var2", Dataset.Role.MEASURE) + ); + InMemoryDataset ds2 = new InMemoryDataset( + Java8Helpers.listOf( + Java8Helpers.mapOf("id1", "A", "var1", 10L, "var2", 1L), + Java8Helpers.mapOf("id1", "B", "var1", 11L, "var2", 2L), + Java8Helpers.mapOf("id1", "D", "var1", 12L, "var2", 3L) + ), + Java8Helpers.mapOf("id1", String.class, "var1", Long.class, "var2", Long.class), + Java8Helpers.mapOf("id1", Dataset.Role.IDENTIFIER, "var1", Dataset.Role.MEASURE, "var2", Dataset.Role.MEASURE) + ); + + engine.put("ds1", ds1); + engine.put("ds2", ds2); + + String script = "ds1 := ds1[calc identifier id1 := id1, var1 := cast(var1, integer), var2 := cast(var2, integer)];\n" + + "ds2 := ds2[calc identifier id1 := id1, var1 := cast(var1, integer), var2 := cast(var2, integer)];\n" + + "ds_sum := ds1 + ds2;\n" + + "ds_mul := ds_sum * 3; \n" + + "ds_res <- ds_mul[filter mod(var1, 2) = 0][calc var_sum := var1 + var2];"; + + Program program = ProvenanceListener.runWithBindings(engine, script, "trevas-simple-test", "Simple test from Trevas tests"); + Model model = RDFUtils.buildModel(program); + String content = RDFUtils.serialize(model, "JSON-LD"); + assertThat(content).isNotEmpty(); + RDFUtils.loadModelWithCredentials(model, sparqlEndpoint, sparqlEndpointUser, sparlqEndpointPassword); + RDFUtils.writeJsonLdToFile(model, "src/test/resources/output/test-simple-with-bindings.json"); + assertThat(program.getProgramSteps()).hasSize(5); + } + @Test public void bpeTest() throws IOException {