Skip to content

Commit

Permalink
WIP: ssdeep discovery query
Browse files Browse the repository at this point in the history
  • Loading branch information
drewfarris committed Jan 11, 2024
1 parent 173cb27 commit a6a2b17
Show file tree
Hide file tree
Showing 4 changed files with 182 additions and 60 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -18,16 +18,16 @@
import java.util.Collections;
import java.util.Set;

public class SSDeepDiscoveryQueryTable extends ChainedQueryTable<Entry<Key, Value>, DiscoveredThing> {
public class SSDeepDiscoveryQueryLogic extends ChainedQueryTable<Entry<Key, Value>, DiscoveredThing> {

private static final Logger log = Logger.getLogger(SSDeepDiscoveryQueryTable.class);
private static final Logger log = Logger.getLogger(SSDeepDiscoveryQueryLogic.class);

private Query q = null;

public SSDeepDiscoveryQueryTable() { super(); }
public SSDeepDiscoveryQueryLogic() { super(); }

@SuppressWarnings("CopyConstructorMissesField")
public SSDeepDiscoveryQueryTable(SSDeepDiscoveryQueryTable other) {
public SSDeepDiscoveryQueryLogic(SSDeepDiscoveryQueryLogic other) {
super(other);
}

Expand All @@ -47,7 +47,7 @@ public GenericQueryConfiguration initialize(AccumuloClient client, Query setting

public void setupQuery(GenericQueryConfiguration config) throws Exception {
if (null == this.getChainStrategy()) {
final String error = "No ChainStrategy provided for SSDeepDiscoveryQueryTable!";
final String error = "No ChainStrategy provided for SSDeepDiscoveryQueryLogic!";
log.error(error);
throw new RuntimeException(error);
}
Expand All @@ -67,8 +67,8 @@ public QueryLogicTransformer getTransformer(Query settings) {
}

@Override
public SSDeepDiscoveryQueryTable clone() throws CloneNotSupportedException {
return new SSDeepDiscoveryQueryTable(this);
public SSDeepDiscoveryQueryLogic clone() throws CloneNotSupportedException {
return new SSDeepDiscoveryQueryLogic(this);
}

public Set<String> getExampleQueries() {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
package datawave.query.tables.ssdeep;

import datawave.query.tables.chained.ChainedQueryTable;
import datawave.webservice.query.Query;
import datawave.webservice.query.configuration.GenericQueryConfiguration;
import datawave.webservice.query.logic.QueryLogicTransformer;
import org.apache.accumulo.core.client.AccumuloClient;
import org.apache.accumulo.core.data.Key;
import org.apache.accumulo.core.data.Value;
import org.apache.accumulo.core.security.Authorizations;
import org.apache.log4j.Logger;

import java.util.Collections;
import java.util.Iterator;
import java.util.Map.Entry;
import java.util.Set;

public class SSDeepEventQueryLogic extends ChainedQueryTable<Entry<Key, Value>, Entry<Key, Value>> {

private static final Logger log = Logger.getLogger(SSDeepEventQueryLogic.class);

private Query q = null;

public SSDeepEventQueryLogic() { super(); }

@SuppressWarnings("CopyConstructorMissesField")
public SSDeepEventQueryLogic(SSDeepEventQueryLogic other) {
super(other);
}

@Override
public void close() {
super.close();
}

public GenericQueryConfiguration initialize(AccumuloClient client, Query settings, Set<Authorizations> auths) throws Exception {
super.initialize(client, settings, auths);
this.q = settings.duplicate(settings.getQueryName() + "_discovery_query");

log.debug("Initial settings parameters: " + settings.getParameters().toString());
GenericQueryConfiguration config = this.logic1.initialize(client, settings, auths);
return config;
}

public void setupQuery(GenericQueryConfiguration config) throws Exception {
if (null == this.getChainStrategy()) {
final String error = "No ChainStrategy provided for SSDeepDiscoveryQueryLogic!";
log.error(error);
throw new RuntimeException(error);
}

log.info("Setting up ssdeep query using config");
this.logic1.setupQuery(config);

final Iterator<Entry<Key,Value>> iter1 = this.logic1.iterator();

log.info("Running chained discovery query");
this.iterator = this.getChainStrategy().runChainedQuery(config.getClient(), this.q, config.getAuthorizations(), iter1, this.logic2);
}

@Override
public QueryLogicTransformer getTransformer(Query settings) {
return this.logic2.getTransformer(settings);
}

@Override
public SSDeepEventQueryLogic clone() throws CloneNotSupportedException {
return new SSDeepEventQueryLogic(this);
}

public Set<String> getExampleQueries() {
return Collections.emptySet();
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
package datawave.query.tables.ssdeep;

import datawave.query.tables.chained.strategy.FullChainStrategy;
import datawave.webservice.query.Query;
import datawave.webservice.query.QueryImpl;
import org.apache.accumulo.core.data.Key;
import org.apache.accumulo.core.data.Value;

import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import java.util.UUID;

public class FullSSDeepEventChainStrategy extends FullChainStrategy<Map.Entry<Key, Value>, Map.Entry<Key, Value>> {
@Override
protected Query buildLatterQuery(Query initialQuery, Iterator<Map.Entry<Key, Value>> initialQueryResults, String latterLogicName) {
log.debug("buildLatterQuery() called...");
StringBuilder b = new StringBuilder();
Set<String> ssdeepSeen = new HashSet<>();
while (initialQueryResults.hasNext()) {
Map.Entry<Key, Value> result = initialQueryResults.next();
Key key = result.getKey();
String ssdeep = key.getColumnQualifier().toString();
if (ssdeepSeen.contains(ssdeep)) {
continue;
}
log.debug("Added new ssdeep " + ssdeep);
ssdeepSeen.add(ssdeep);
if (b.length() > 0) {
b.append(" OR ");
}
b.append("CHECKSUM_SSDEEP:\"").append(ssdeep).append("\"");
}

Query q = new QueryImpl(); // TODO, need to use a factory? don't hardcode this.
q.setQuery(b.toString());
q.setId(UUID.randomUUID());
q.setPagesize(Integer.MAX_VALUE); // TODO: choose something reasonable.
q.setQueryAuthorizations(initialQuery.getQueryAuthorizations());
q.setUserDN(initialQuery.getUserDN());
// TODO: set up a reasonable start and end date.
return q;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import datawave.query.RebuildingScannerTestHelper;
import datawave.query.discovery.DiscoveryLogic;
import datawave.query.discovery.DiscoveryTransformer;
import datawave.query.tables.ShardQueryLogic;
import datawave.query.testframework.AbstractFunctionalQuery;
import datawave.query.testframework.AccumuloSetup;
import datawave.query.testframework.DataTypeHadoopConfig;
Expand All @@ -20,6 +21,8 @@
import datawave.security.authorization.SubjectIssuerDNPair;
import datawave.webservice.common.connection.AccumuloConnectionFactory;
import datawave.webservice.query.QueryImpl;
import datawave.webservice.query.logic.AbstractQueryLogicTransformer;
import datawave.webservice.query.logic.QueryLogic;
import datawave.webservice.query.result.event.DefaultResponseObjectFactory;
import datawave.webservice.query.result.event.EventBase;
import datawave.webservice.query.result.event.FieldBase;
Expand Down Expand Up @@ -58,7 +61,11 @@ public class SSDeepDiscoveryQueryTest extends AbstractFunctionalQuery {

DiscoveryLogic discoveryQueryLogic;

SSDeepDiscoveryQueryTable similarityDiscoveryQueryLogic;
ShardQueryLogic eventQueryLogic;

SSDeepDiscoveryQueryLogic similarityDiscoveryQueryLogic;

SSDeepEventQueryLogic similarityEventQueryLogic;

@BeforeClass
public static void filterSetup() throws Exception {
Expand Down Expand Up @@ -98,20 +105,39 @@ public void setupQuery() {
discoveryQueryLogic.setMetadataHelperFactory(metadataHelperFactory);
discoveryQueryLogic.setResponseObjectFactory(responseFactory);

eventQueryLogic = new ShardQueryLogic();
eventQueryLogic.setTableName("shardIndex");
eventQueryLogic.setIndexTableName("shardIndex");
eventQueryLogic.setReverseIndexTableName("shardReverseIndex");
eventQueryLogic.setModelTableName("metadata");
eventQueryLogic.setMarkingFunctions(markingFunctions);
eventQueryLogic.setMetadataHelperFactory(metadataHelperFactory);
eventQueryLogic.setResponseObjectFactory(responseFactory);

//TODO: this implementation currently does not properly initialize the latter logic at the right time
// this means that the model is null when we attempt to get the transformer initially. For now,
// we'll develop using the full chain strategy.
StreamingSSDeepDiscoveryChainStrategy ssdeepStreamedChainStrategy = new StreamingSSDeepDiscoveryChainStrategy();
ssdeepStreamedChainStrategy.setMaxResultsToBuffer(1); // disable buffering for this test

//TODO: This implementation works for now, but will likely not scale.
FullSSDeepDiscoveryChainStrategy ssdeepFullChainStrategy = new FullSSDeepDiscoveryChainStrategy();
FullSSDeepDiscoveryChainStrategy ssdeepDiscoveryChainStrategy = new FullSSDeepDiscoveryChainStrategy();

similarityDiscoveryQueryLogic = new SSDeepDiscoveryQueryTable();
//TODO: eliminate duplication in SSDeepDiscoveryQueryLogic and SSDeepEventQueryLogic
// also eliminate duplication in FullSSDeepDiscoveryChainStrategy and FullSSDeepEventChainStrategy.
similarityDiscoveryQueryLogic = new SSDeepDiscoveryQueryLogic();
similarityDiscoveryQueryLogic.setTableName("ssdeepIndex");
similarityDiscoveryQueryLogic.setLogic1(similarityQueryLogic);
similarityDiscoveryQueryLogic.setLogic2(discoveryQueryLogic);
similarityDiscoveryQueryLogic.setChainStrategy(ssdeepFullChainStrategy);
similarityDiscoveryQueryLogic.setChainStrategy(ssdeepDiscoveryChainStrategy);

FullSSDeepEventChainStrategy ssdeepEventChainStrategy = new FullSSDeepEventChainStrategy();

similarityEventQueryLogic = new SSDeepEventQueryLogic();
similarityEventQueryLogic.setTableName("ssdeepIndex");
similarityEventQueryLogic.setLogic1(similarityQueryLogic);
similarityEventQueryLogic.setLogic2(eventQueryLogic);
similarityEventQueryLogic.setChainStrategy(ssdeepEventChainStrategy);

// init must set auths
testInit();
Expand All @@ -137,11 +163,11 @@ public void testSSDeepSimilarity() throws Exception {
log.info("------ testSSDeepSimilarity ------");
String testSSDeep = "384:nv/fP9FmWVMdRFj2aTgSO+u5QT4ZE1PIVS:nDmWOdRFNTTs504cQS";
String query = "CHECKSUM_SSDEEP:" + testSSDeep;
EventQueryResponseBase response = runSSDeepSimilarityQuery(query, 0);
EventQueryResponseBase response = runSSDeepQuery(query, similarityQueryLogic, 0);

List<EventBase> events = response.getEvents();
int eventCount = events.size();
Assert.assertEquals(1, events.size());
Map<String,Map<String,String>> observedEvents = extractObservedEvents(events);
Assert.assertEquals(1, eventCount);

SSDeepTestUtil.assertSSDeepSimilarityMatch(testSSDeep, testSSDeep, "38.0", "1", "100", observedEvents);
}
Expand All @@ -151,11 +177,13 @@ public void testDiscovery() throws Exception {
log.info("------ testDiscovery ------");
String testSSDeep = "384:nv/fP9FmWVMdRFj2aTgSO+u5QT4ZE1PIVS:nDmWOdRFNTTs504cQS";
String query = "CHECKSUM_SSDEEP:\"" + testSSDeep + "\"";
EventQueryResponseBase response = runDiscoveryQuery(query, 0);
EventQueryResponseBase response = runSSDeepQuery(query, discoveryQueryLogic, 0);

List<EventBase> events = response.getEvents();
int eventCount = events.size();
Assert.assertEquals(1, events.size());
Map<String,Map<String,String>> observedEvents = extractObservedEvents(events);
Assert.assertEquals(1, eventCount);

//TODO: add assertions
}

@Test
Expand All @@ -166,11 +194,11 @@ public void testChainedSSDeepDiscovery() throws Exception {
String testSSDeep = "384:nv/fP9FmWVMdRFj2aTgSO+u5QT4ZE1PIVS:nDmWOdRFNTTs504---";
String targetSSDeep = "384:nv/fP9FmWVMdRFj2aTgSO+u5QT4ZE1PIVS:nDmWOdRFNTTs504cQS";
String query = "CHECKSUM_SSDEEP:" + testSSDeep;
EventQueryResponseBase response = runChainedQuery(query, 0);
EventQueryResponseBase response = runSSDeepQuery(query, similarityDiscoveryQueryLogic, 0);

List<EventBase> events = response.getEvents();
int eventCount = events.size();
Assert.assertEquals(1, events.size());
Map<String,Map<String,String>> observedEvents = extractObservedEvents(events);
Assert.assertEquals(1, eventCount);

Map.Entry<String, Map<String,String>> result = observedEvents.entrySet().iterator().next();
Map<String, String> resultFields = result.getValue();
Expand All @@ -181,48 +209,22 @@ public void testChainedSSDeepDiscovery() throws Exception {
Assert.assertEquals("4", resultFields.get("RECORD COUNT"));
}

@Test
public void testChainedSSDeepEvent() throws Exception {
Logger.getLogger(StreamingSSDeepDiscoveryChainStrategy.SSDeepDiscoveryChainedIterator.class).setLevel(Level.DEBUG);

public EventQueryResponseBase runSSDeepSimilarityQuery(String query, int minScoreThreshold) throws Exception {
QueryImpl q = new QueryImpl();
q.setQuery(query);
q.setId(UUID.randomUUID());
q.setPagesize(Integer.MAX_VALUE);
q.setQueryAuthorizations(auths.toString());

if (minScoreThreshold > 0) {
q.addParameter(SSDeepSimilarityQueryTransformer.MIN_SSDEEP_SCORE_PARAMETER, String.valueOf(minScoreThreshold));
}

RunningQuery runner = new RunningQuery(client, AccumuloConnectionFactory.Priority.NORMAL, similarityQueryLogic, q, "", principal,
new QueryMetricFactoryImpl());
TransformIterator transformIterator = runner.getTransformIterator();
SSDeepSimilarityQueryTransformer transformer = (SSDeepSimilarityQueryTransformer) transformIterator.getTransformer();
EventQueryResponseBase response = (EventQueryResponseBase) transformer.createResponse(runner.next());

return response;
}

public EventQueryResponseBase runDiscoveryQuery(String query, int minScoreThreshold) throws Exception {
QueryImpl q = new QueryImpl();
q.setQuery(query);
q.setId(UUID.randomUUID());
q.setPagesize(Integer.MAX_VALUE);
q.setQueryAuthorizations(auths.toString());

if (minScoreThreshold > 0) {
q.addParameter(SSDeepSimilarityQueryTransformer.MIN_SSDEEP_SCORE_PARAMETER, String.valueOf(minScoreThreshold));
}

RunningQuery runner = new RunningQuery(client, AccumuloConnectionFactory.Priority.NORMAL, discoveryQueryLogic, q, "", principal,
new QueryMetricFactoryImpl());
TransformIterator transformIterator = runner.getTransformIterator();
DiscoveryTransformer transformer = (DiscoveryTransformer) transformIterator.getTransformer();
EventQueryResponseBase response = (EventQueryResponseBase) transformer.createResponse(runner.next());
log.info("------ testSSDeepDiscovery ------");
String testSSDeep = "384:nv/fP9FmWVMdRFj2aTgSO+u5QT4ZE1PIVS:nDmWOdRFNTTs504---";
String targetSSDeep = "384:nv/fP9FmWVMdRFj2aTgSO+u5QT4ZE1PIVS:nDmWOdRFNTTs504cQS";
String query = "CHECKSUM_SSDEEP:" + testSSDeep;
EventQueryResponseBase response = runSSDeepQuery(query, similarityEventQueryLogic, 0);

return response;
List<EventBase> events = response.getEvents();
Assert.assertEquals(1, events.size());
Map<String,Map<String,String>> observedEvents = extractObservedEvents(events);
}

public EventQueryResponseBase runChainedQuery(String query, int minScoreThreshold) throws Exception {
public EventQueryResponseBase runSSDeepQuery(String query, QueryLogic<?> queryLogic, int minScoreThreshold) throws Exception {
QueryImpl q = new QueryImpl();
q.setQuery(query);
q.setId(UUID.randomUUID());
Expand All @@ -233,10 +235,10 @@ public EventQueryResponseBase runChainedQuery(String query, int minScoreThreshol
q.addParameter(SSDeepSimilarityQueryTransformer.MIN_SSDEEP_SCORE_PARAMETER, String.valueOf(minScoreThreshold));
}

RunningQuery runner = new RunningQuery(client, AccumuloConnectionFactory.Priority.NORMAL, similarityDiscoveryQueryLogic, q, "", principal,
RunningQuery runner = new RunningQuery(client, AccumuloConnectionFactory.Priority.NORMAL, queryLogic, q, "", principal,
new QueryMetricFactoryImpl());
TransformIterator transformIterator = runner.getTransformIterator();
DiscoveryTransformer transformer = (DiscoveryTransformer) transformIterator.getTransformer();
AbstractQueryLogicTransformer<?,?> transformer = (AbstractQueryLogicTransformer<?,?>) transformIterator.getTransformer();
EventQueryResponseBase response = (EventQueryResponseBase) transformer.createResponse(runner.next());

return response;
Expand Down

0 comments on commit a6a2b17

Please sign in to comment.