From 432c9b253780c7edbe6bfa28734919c83c9b5475 Mon Sep 17 00:00:00 2001 From: Jason Wells Date: Thu, 13 Jun 2019 15:02:16 -0700 Subject: [PATCH 1/4] Initial support for markup set recording --- .../annotations/AnnotationRepository.java | 163 +++++++++++++ .../superutilities/misc/SQLiteBacked.java | 224 ++++++++++++++++++ .../superutilities/query/QueryHelper.java | 6 + 3 files changed, 393 insertions(+) create mode 100644 Java/src/main/java/com/nuix/superutilities/annotations/AnnotationRepository.java create mode 100644 Java/src/main/java/com/nuix/superutilities/misc/SQLiteBacked.java diff --git a/Java/src/main/java/com/nuix/superutilities/annotations/AnnotationRepository.java b/Java/src/main/java/com/nuix/superutilities/annotations/AnnotationRepository.java new file mode 100644 index 0000000..d35c035 --- /dev/null +++ b/Java/src/main/java/com/nuix/superutilities/annotations/AnnotationRepository.java @@ -0,0 +1,163 @@ +package com.nuix.superutilities.annotations; + +import java.io.File; +import java.io.IOException; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.util.List; +import java.util.Set; +import java.util.function.Consumer; + +import com.google.common.collect.HashBiMap; +import com.nuix.superutilities.misc.FormatUtility; +import com.nuix.superutilities.misc.SQLiteBacked; +import com.nuix.superutilities.query.QueryHelper; + +import jxl.common.Logger; +import nuix.Case; +import nuix.Item; +import nuix.Markup; +import nuix.MarkupSet; +import nuix.MutablePrintedImage; +import nuix.MutablePrintedPage; +import nuix.PrintedPage; + +public class AnnotationRepository extends SQLiteBacked { + private static Logger logger = Logger.getLogger(AnnotationRepository.class); + private HashBiMap itemGuidIdLookup = HashBiMap.create(); + private HashBiMap markupSetIdLookup = HashBiMap.create(); + + public AnnotationRepository(String databaseFile) throws SQLException { + this(new File(databaseFile)); + } + + public AnnotationRepository(File databaseFile) throws SQLException { + super(databaseFile); + createTables(); + } + + private void createTables() throws SQLException { + // Create item table + String createTableItem = "CREATE TABLE IF NOT EXISTS Item ("+ + "ID INTEGER PRIMARY KEY AUTOINCREMENT, Name TEXT, GUID TEXT, MD5 TEXT)"; + executeUpdate(createTableItem); + + String createTableMarkupSet = "CREATE TABLE IF NOT EXISTS MarkupSet ("+ + "ID INTEGER PRIMARY KEY AUTOINCREMENT, Name TEXT, Description TEXT, RedactionReason TEXT)"; + executeUpdate(createTableMarkupSet); + + String createTableItemMarkup = "CREATE TABLE IF NOT EXISTS ItemMarkup ("+ + "ID INTEGER PRIMARY KEY AUTOINCREMENT, Item_ID INTEGER, MarkupSet_ID INTEGER, PageNumber INTEGER,"+ + "IsRedaction INTEGER, X REAL, Y REAL, Width REAL, Height REAL)"; + executeUpdate(createTableItemMarkup); + + rebuildXrefs(); + } + + private void rebuildXrefs() throws SQLException { + itemGuidIdLookup.clear(); + String sql = "SELECT GUID,ID FROM Item"; + executeQuery(sql, null, new Consumer() { + @Override + public void accept(ResultSet rs) { + try { + while(rs.next()) { + String guid = FormatUtility.bytesToHex(rs.getBytes(1)); + long id = rs.getLong(2); + itemGuidIdLookup.put(guid, id); + } + } catch (SQLException exc) { + logger.error("Error building GUID to ID XREF",exc); + } + } + }); + + markupSetIdLookup.clear(); + sql = "SELECT Name,ID FROM MarkupSet"; + executeQuery(sql,null, new Consumer() { + @Override + public void accept(ResultSet rs) { + try { + while(rs.next()) { + String name = rs.getString(1); + long id = rs.getLong(2); + markupSetIdLookup.put(name, id); + } + } catch (SQLException exc) { + logger.error("Error building Markup Set Name to ID XREF",exc); + } + } + }); + } + + public void storeAllMarkupSets(Case nuixCase) throws IOException, SQLException { + List markupSets = nuixCase.getMarkupSets(); + for(MarkupSet markupSet : markupSets) { + storeMarkupSet(nuixCase, markupSet); + } + } + + public void storeMarkupSet(Case nuixCase, MarkupSet markupSet) throws IOException, SQLException { + String insertItemMarkup = "INSERT INTO ItemMarkup (Item_ID,MarkupSet_ID,PageNumber,IsRedaction,X,Y,Width,Height) VALUES (?,?,?,?,?,?,?,?)"; + String itemQuery = QueryHelper.markupSetQuery(markupSet); + + long markupSetId = getMarkupSetId(markupSet); + Set markupSetItems = nuixCase.searchUnsorted(itemQuery); + for(Item item : markupSetItems) { + long itemId = getItemId(item); + MutablePrintedImage itemImage = item.getPrintedImage(); + List pages = itemImage.getPages(); + for (int i = 0; i < pages.size(); i++) { + MutablePrintedPage page = (MutablePrintedPage) pages.get(i); + Set pageMarkups = page.getMarkups(markupSet); + for(Markup pageMarkup : pageMarkups) { + executeInsert(insertItemMarkup, + itemId, + markupSetId, + i+1, + pageMarkup.isRedaction(), + pageMarkup.getX(), + pageMarkup.getY(), + pageMarkup.getWidth(), + pageMarkup.getHeight()); + } + } + } + } + + public long getItemId(Item item) throws SQLException { + String guid = item.getGuid().replaceAll("\\-", ""); + + if(itemGuidIdLookup.containsKey(guid)) { + return itemGuidIdLookup.get(guid); + } else { + String md5 = item.getDigests().getMd5(); + String name = item.getLocalisedName(); + + byte[] guidBytes = FormatUtility.hexToBytes(guid); + byte[] md5Bytes = FormatUtility.hexToBytes(md5); + + String sql = "INSERT INTO Item (GUID,MD5,Name) VALUES (?,?,?)"; + executeInsert(sql, guidBytes, md5Bytes, name); + long id = executeScalar("SELECT ID FROM Item WHERE GUID = ?", guidBytes); + itemGuidIdLookup.put(guid, id); + return id; + } + } + + public long getMarkupSetId(MarkupSet markupSet) throws SQLException { + String name = markupSet.getName(); + if(markupSetIdLookup.containsKey(name)) { + return markupSetIdLookup.get(name); + } else { + String description = markupSet.getDescription(); + String redactionReason = markupSet.getRedactionReason(); + + String sql = "INSERT INTO MarkupSet (Name,Description,RedactionReason) VALUES (?,?,?)"; + executeInsert(sql,name,description,redactionReason); + long id = executeScalar("SELECT ID FROM MarkupSet WHERE Name = ?", name); + markupSetIdLookup.put(name, id); + return id; + } + } +} diff --git a/Java/src/main/java/com/nuix/superutilities/misc/SQLiteBacked.java b/Java/src/main/java/com/nuix/superutilities/misc/SQLiteBacked.java new file mode 100644 index 0000000..eeae4c6 --- /dev/null +++ b/Java/src/main/java/com/nuix/superutilities/misc/SQLiteBacked.java @@ -0,0 +1,224 @@ +package com.nuix.superutilities.misc; + +import java.io.Closeable; +import java.io.File; +import java.io.IOException; +import java.sql.Connection; +import java.sql.DriverManager; +import java.sql.PreparedStatement; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.util.List; +import java.util.Properties; +import java.util.function.Consumer; + +import org.sqlite.SQLiteConfig; +import org.sqlite.SQLiteConfig.JournalMode; +import org.sqlite.SQLiteConfig.LockingMode; +import org.sqlite.SQLiteConfig.SynchronousMode; +import org.sqlite.SQLiteConfig.TransactionMode; + +/*** + * Provides a wrapper around a SQLite database. Intended to be extended by other classes which intended to leverage a + * SQLite database is some way. + * @author Jason Wells + * + */ +public class SQLiteBacked implements Closeable { + private File databaseFile = null; + private Properties connectionProperties = new Properties(); + private Connection persistentConnection = null; + + public SQLiteBacked(File databaseFile) { + SQLiteConfig config = new SQLiteConfig(); + config.setCacheSize(2000); + config.setPageSize(4096 * 10); + config.setJournalMode(JournalMode.WAL); + config.setLockingMode(LockingMode.EXCLUSIVE); + config.setTransactionMode(TransactionMode.EXCLUSIVE); + config.setSynchronous(SynchronousMode.OFF); + connectionProperties = config.toProperties(); + } + + private Connection getConnection() throws SQLException { + if(persistentConnection == null){ + String connectionString = String.format("jdbc:sqlite:%s", databaseFile); + persistentConnection = DriverManager.getConnection(connectionString, connectionProperties); + } + return persistentConnection; + } + + /*** + * Binds a list of objects to a prepared statement + * @param statement The prepared statement to bind data to + * @param data The data to bind + * @throws SQLException If the SQL bits throw an error + */ + private void bindData(PreparedStatement statement, List data) throws SQLException{ + if(data != null){ + for (int i = 0; i < data.size(); i++) { + Object value = data.get(i); + statement.setObject(i+1, value); + } + } + } + + private void bindData(PreparedStatement statement, Object[] data) throws SQLException{ + if(data != null){ + for (int i = 0; i < data.length; i++) { + Object value = data[i]; + statement.setObject(i+1, value); + } + } + } + + /*** + * Executes an update query against the SQLite database file + * @param sql The SQL to execute + * @param data Optional list of associated data, can be null + * @return Count of affected records + * @throws SQLException If the SQL bits throw an error + */ + public int executeUpdate(String sql, List data) throws SQLException{ + Connection conn = getConnection(); + try(PreparedStatement statement = conn.prepareStatement(sql)){ + if(data != null){ bindData(statement,data); } + return statement.executeUpdate(); + } + } + + public int executeUpdate(String sql, Object ...data) throws SQLException{ + Connection conn = getConnection(); + try(PreparedStatement statement = conn.prepareStatement(sql)){ + if(data != null){ bindData(statement,data); } + return statement.executeUpdate(); + } + } + + public int executeUpdate(String sql) throws SQLException{ + Connection conn = getConnection(); + try(PreparedStatement statement = conn.prepareStatement(sql)){ + return statement.executeUpdate(); + } + } + + /*** + * Executes an insert query against the SQLite database file + * @param sql The SQL to execute + * @param data Optional list of associated data, can be null + * @throws SQLException If the SQL bits throw an error + */ + public void executeInsert(String sql, List data) throws SQLException{ + Connection conn = getConnection(); + try(PreparedStatement statement = conn.prepareStatement(sql)){ + if(data != null){ bindData(statement,data); } + statement.executeUpdate(); + } + } + + public void executeInsert(String sql, Object ...data) throws SQLException{ + Connection conn = getConnection(); + try(PreparedStatement statement = conn.prepareStatement(sql)){ + if(data != null){ bindData(statement,data); } + statement.executeUpdate(); + } + } + + public Long executeLongScalar(String sql, Object ...data) throws SQLException{ + Connection conn = getConnection(); + try(PreparedStatement statement = conn.prepareStatement(sql)){ + if(data != null){ bindData(statement,data); } + try(ResultSet resultSet = statement.executeQuery()){ + return resultSet.getLong(1); + } + } + } + + public Long executeLongScalar(String sql, List data) throws SQLException{ + Connection conn = getConnection(); + try(PreparedStatement statement = conn.prepareStatement(sql)){ + if(data != null){ bindData(statement,data); } + try(ResultSet resultSet = statement.executeQuery()){ + return resultSet.getLong(1); + } + } + } + + public Long executeLongScalar(String sql) throws SQLException{ + Connection conn = getConnection(); + try(PreparedStatement statement = conn.prepareStatement(sql)){ + try(ResultSet resultSet = statement.executeQuery()){ + return resultSet.getLong(1); + } + } + } + + public String executeStringScalar(String sql, Object ...data) throws SQLException{ + Connection conn = getConnection(); + try(PreparedStatement statement = conn.prepareStatement(sql)){ + if(data != null){ bindData(statement,data); } + try(ResultSet resultSet = statement.executeQuery()){ + return resultSet.getString(1); + } + } + } + + public String executeStringScalar(String sql, List data) throws SQLException{ + Connection conn = getConnection(); + try(PreparedStatement statement = conn.prepareStatement(sql)){ + if(data != null){ bindData(statement,data); } + try(ResultSet resultSet = statement.executeQuery()){ + return resultSet.getString(1); + } + } + } + + public String executeStringScalar(String sql) throws SQLException{ + Connection conn = getConnection(); + try(PreparedStatement statement = conn.prepareStatement(sql)){ + try(ResultSet resultSet = statement.executeQuery()){ + return resultSet.getString(1); + } + } + } + + @SuppressWarnings("unchecked") + public T executeScalar(String sql, Object ...data) throws SQLException{ + Connection conn = getConnection(); + try(PreparedStatement statement = conn.prepareStatement(sql)){ + if(data != null){ bindData(statement,data); } + try(ResultSet resultSet = statement.executeQuery()){ + return (T)resultSet.getObject(1); + } + } + } + + /*** + * Executes a query which is expected to return row data, providing the result set to the provided callback. + * @param sql The SQL query to execute + * @param data Optional list of associated data, can be null + * @param resultConsumer Callback which will be provided the result set. This is where you provide code to make use of the results. + * @throws SQLException If the SQL bits throw an error + */ + public void executeQuery(String sql, List data, Consumer resultConsumer) throws SQLException{ + Connection conn = getConnection(); + try(PreparedStatement statement = conn.prepareStatement(sql)){ + if(data != null){ bindData(statement,data); } + try(ResultSet resultSet = statement.executeQuery()){ + resultConsumer.accept(resultSet); + } + } + } + + @Override + public void close() throws IOException { + if(persistentConnection != null){ + try { + persistentConnection.close(); + } catch (SQLException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + } + } +} diff --git a/Java/src/main/java/com/nuix/superutilities/query/QueryHelper.java b/Java/src/main/java/com/nuix/superutilities/query/QueryHelper.java index 348dd02..684b308 100644 --- a/Java/src/main/java/com/nuix/superutilities/query/QueryHelper.java +++ b/Java/src/main/java/com/nuix/superutilities/query/QueryHelper.java @@ -7,6 +7,8 @@ import org.joda.time.DateTime; +import nuix.MarkupSet; + public class QueryHelper { /*** * Builds an item-date Nuix range query for items with a date occurring within the specified year. @@ -125,4 +127,8 @@ public static String namedEntityQuery(Collection entityNames) { String query = String.format("named-entities:(%s)", String.join(" OR ",entityNameFragments)); return query; } + + public static String markupSetQuery(MarkupSet markupSet) { + return String.format("markup-set:\"%s\"", markupSet.getName()); + } } From 19ad08877beed1ba83e2648eb9eaadb3a08d920f Mon Sep 17 00:00:00 2001 From: Jason Wells Date: Thu, 13 Jun 2019 16:18:13 -0700 Subject: [PATCH 2/4] Update SQLiteBacked.java Fix to make sure database file is actually set :smile: --- .../src/main/java/com/nuix/superutilities/misc/SQLiteBacked.java | 1 + 1 file changed, 1 insertion(+) diff --git a/Java/src/main/java/com/nuix/superutilities/misc/SQLiteBacked.java b/Java/src/main/java/com/nuix/superutilities/misc/SQLiteBacked.java index eeae4c6..d39ed50 100644 --- a/Java/src/main/java/com/nuix/superutilities/misc/SQLiteBacked.java +++ b/Java/src/main/java/com/nuix/superutilities/misc/SQLiteBacked.java @@ -30,6 +30,7 @@ public class SQLiteBacked implements Closeable { private Connection persistentConnection = null; public SQLiteBacked(File databaseFile) { + this.databaseFile = databaseFile; SQLiteConfig config = new SQLiteConfig(); config.setCacheSize(2000); config.setPageSize(4096 * 10); From 92805f8d68d7a11e351ea62610b9c5fcf2eec879 Mon Sep 17 00:00:00 2001 From: Jason Wells Date: Thu, 13 Jun 2019 16:18:49 -0700 Subject: [PATCH 3/4] Building logic to record markups in database back to a case --- .../annotations/AnnotationRepository.java | 92 ++++++++++++++++++- RubyTests/Test_AnnotationRepository.rb | 15 +++ 2 files changed, 105 insertions(+), 2 deletions(-) create mode 100644 RubyTests/Test_AnnotationRepository.rb diff --git a/Java/src/main/java/com/nuix/superutilities/annotations/AnnotationRepository.java b/Java/src/main/java/com/nuix/superutilities/annotations/AnnotationRepository.java index d35c035..35ad424 100644 --- a/Java/src/main/java/com/nuix/superutilities/annotations/AnnotationRepository.java +++ b/Java/src/main/java/com/nuix/superutilities/annotations/AnnotationRepository.java @@ -4,7 +4,10 @@ import java.io.IOException; import java.sql.ResultSet; import java.sql.SQLException; +import java.util.ArrayList; +import java.util.HashMap; import java.util.List; +import java.util.Map; import java.util.Set; import java.util.function.Consumer; @@ -139,7 +142,7 @@ public long getItemId(Item item) throws SQLException { String sql = "INSERT INTO Item (GUID,MD5,Name) VALUES (?,?,?)"; executeInsert(sql, guidBytes, md5Bytes, name); - long id = executeScalar("SELECT ID FROM Item WHERE GUID = ?", guidBytes); + long id = executeLongScalar("SELECT ID FROM Item WHERE GUID = ?", guidBytes); itemGuidIdLookup.put(guid, id); return id; } @@ -155,9 +158,94 @@ public long getMarkupSetId(MarkupSet markupSet) throws SQLException { String sql = "INSERT INTO MarkupSet (Name,Description,RedactionReason) VALUES (?,?,?)"; executeInsert(sql,name,description,redactionReason); - long id = executeScalar("SELECT ID FROM MarkupSet WHERE Name = ?", name); + long id = executeLongScalar("SELECT ID FROM MarkupSet WHERE Name = ?", name); markupSetIdLookup.put(name, id); return id; } } + + public void applyMarkupsFromDatabaseToCase(Case nuixCase, boolean addToExisting) throws SQLException { + Map markupSetLookup = new HashMap(); + for(MarkupSet existingMarkupSet : nuixCase.getMarkupSets()) { + markupSetLookup.put(existingMarkupSet.getName(), existingMarkupSet); + } + + List bindData = new ArrayList(); + + for(Map.Entry markupEntry : markupSetIdLookup.entrySet()) { + String markupSetName = markupEntry.getKey(); + long markupSetId = markupEntry.getValue(); + String markupSetDescription = executeStringScalar("SELECT Description FROM MarkupSet WHERE ID = ?",markupSetId); + String markupSetRedactionReason = executeStringScalar("SELECT RedactionReason FROM MarkupSet WHERE ID = ?",markupSetId); + + // We need to resolve the MarkupSet object, either by obtaining existing one in case or creating new one + MarkupSet markupSet = null; + if(markupSetLookup.containsKey(markupSetName)) { + if(addToExisting) { + markupSet = markupSetLookup.get(markupSetName); + } else { + // When addToExisting is false and we have a name collision, we will attempt to find a usable name + int nameSequence = 2; + String targetName = markupSetName+"_"+nameSequence; + while(markupSetLookup.containsKey(targetName)) { + nameSequence++; + targetName = markupSetName+"_"+nameSequence; + } + Map markupSetSettings = new HashMap(); + markupSetSettings.put("description", markupSetDescription); + markupSetSettings.put("redactionReason", markupSetRedactionReason); + markupSet = nuixCase.createMarkupSet(targetName, markupSetSettings); + } + } else { + // Markup set does not appear to already exist, so lets create it + Map markupSetSettings = new HashMap(); + markupSetSettings.put("description", markupSetDescription); + markupSetSettings.put("redactionReason", markupSetRedactionReason); + markupSet = nuixCase.createMarkupSet(markupSetName, markupSetSettings); + } + + final MarkupSet targetMarkupSet = markupSet; + + // Now that we have a MarkupSet, we need to get ItemMarkup records from DB + String itemMarkupSql = "SELECT i.GUID,im.PageNumber,im.IsRedaction,im.X,im.Y,im.Width,im.Height FROM ItemMarkup AS im " + + "INNER JOIN Item AS i ON im.Item_ID = i.ID " + + "WHERE im.MarkupSet_ID = ? " + + "ORDER BY GUID, PageNumber"; + bindData.clear(); + bindData.add(markupSetId); + executeQuery(itemMarkupSql,bindData,new Consumer() { + @Override + public void accept(ResultSet rs) { + try { + while(rs.next()) { + String guid = FormatUtility.bytesToHex(rs.getBytes(1)); + long pageNumber = rs.getLong(2); + boolean isRedaction = rs.getBoolean(3); + double x = rs.getDouble(4); + double y = rs.getDouble(5); + double width = rs.getDouble(6); + double height = rs.getDouble(7); + + Set items = nuixCase.searchUnsorted("guid:"+guid); + + for(Item item : items) { + MutablePrintedImage itemImage = item.getPrintedImage(); + List pages = itemImage.getPages(); + MutablePrintedPage page = (MutablePrintedPage)pages.get((int) (pageNumber-1)); + if(isRedaction) { + page.createRedaction(targetMarkupSet, x, y, width, height); + } else { + page.createHighlight(targetMarkupSet, x, y, width, height); + } + } + } + } catch (SQLException exc) { + logger.error("Error retrieving ItemMarkup data from database", exc); + } catch (IOException exc2) { + logger.error("Error retrieving item from case", exc2); + } + } + }); + } + } } diff --git a/RubyTests/Test_AnnotationRepository.rb b/RubyTests/Test_AnnotationRepository.rb new file mode 100644 index 0000000..a6446f8 --- /dev/null +++ b/RubyTests/Test_AnnotationRepository.rb @@ -0,0 +1,15 @@ +script_directory = File.dirname(__FILE__) +require File.join(script_directory,"SuperUtilities.jar") +java_import com.nuix.superutilities.SuperUtilities +$su = SuperUtilities.init($utilities,NUIX_VERSION) +java_import com.nuix.superutilities.annotations.AnnotationRepository + +$current_case = $utilities.getCaseFactory.open('D:\cases\FakeData_7.8') + +db_file = "D:\\Temp\\Annotations_#{Time.now.to_i}.db" +puts "DB File: #{db_file}" +repo = AnnotationRepository.new(db_file) +repo.storeAllMarkupSets($current_case) +repo.applyMarkupsFromDatabaseToCase($current_case,false) +repo.close +$current_case.close \ No newline at end of file From ac2f38049839465300bf7fb1f381c86fd578dcac Mon Sep 17 00:00:00 2001 From: Jason Wells Date: Mon, 17 Jun 2019 10:55:02 -0700 Subject: [PATCH 4/4] Further annotation repo functionality - Message logging and progress reporting - Support for aborting - Support for choosing match by GUID or MD5 - During import, using a loading cache for item retrieval so that if multiple markups in a series are all added to the same item, the search for the item occurs just the first time and then the cache offers up that item on subsequent requests. --- .../annotations/AnnotationMatchingMethod.java | 6 + .../annotations/AnnotationRepository.java | 151 ++++++++++++++++-- RubyTests/Test_AnnotationRepository.rb | 15 +- 3 files changed, 160 insertions(+), 12 deletions(-) create mode 100644 Java/src/main/java/com/nuix/superutilities/annotations/AnnotationMatchingMethod.java diff --git a/Java/src/main/java/com/nuix/superutilities/annotations/AnnotationMatchingMethod.java b/Java/src/main/java/com/nuix/superutilities/annotations/AnnotationMatchingMethod.java new file mode 100644 index 0000000..abd2c28 --- /dev/null +++ b/Java/src/main/java/com/nuix/superutilities/annotations/AnnotationMatchingMethod.java @@ -0,0 +1,6 @@ +package com.nuix.superutilities.annotations; + +public enum AnnotationMatchingMethod { + GUID, + MD5 +} diff --git a/Java/src/main/java/com/nuix/superutilities/annotations/AnnotationRepository.java b/Java/src/main/java/com/nuix/superutilities/annotations/AnnotationRepository.java index 35ad424..7fe42ef 100644 --- a/Java/src/main/java/com/nuix/superutilities/annotations/AnnotationRepository.java +++ b/Java/src/main/java/com/nuix/superutilities/annotations/AnnotationRepository.java @@ -6,11 +6,17 @@ import java.sql.SQLException; import java.util.ArrayList; import java.util.HashMap; +import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; +import java.util.concurrent.ExecutionException; +import java.util.function.BiConsumer; import java.util.function.Consumer; +import com.google.common.cache.CacheBuilder; +import com.google.common.cache.CacheLoader; +import com.google.common.cache.LoadingCache; import com.google.common.collect.HashBiMap; import com.nuix.superutilities.misc.FormatUtility; import com.nuix.superutilities.misc.SQLiteBacked; @@ -27,6 +33,46 @@ public class AnnotationRepository extends SQLiteBacked { private static Logger logger = Logger.getLogger(AnnotationRepository.class); + + private boolean abortWasRequested = false; + private Consumer messageLoggedCallback = null; + + /*** + * Allows you to provide a callback which will be invoked when this instance emits a log message. + * @param callback The callback to receive logged messages + */ + public void whenMessageLogged(Consumer callback) { + messageLoggedCallback = callback; + } + + private void logMessage(String message) { + if(messageLoggedCallback != null) { + messageLoggedCallback.accept(message); + } else { + logger.info(message); + } + } + + private void logMessage(String format, Object... params) { + logMessage(String.format(format, params)); + } + + private BiConsumer progressUpdatedCallback = null; + + /*** + * Allows you to provide a callback which will be invoked when this instance emits a progress update. + * @param callback The callback to invoke when progress is updated + */ + public void whenProgressUpdated(BiConsumer callback) { + progressUpdatedCallback = callback; + } + + private void fireProgressUpdated(int current, int total) { + if(progressUpdatedCallback != null) { + progressUpdatedCallback.accept(current,total); + } + } + private HashBiMap itemGuidIdLookup = HashBiMap.create(); private HashBiMap markupSetIdLookup = HashBiMap.create(); @@ -58,6 +104,7 @@ private void createTables() throws SQLException { } private void rebuildXrefs() throws SQLException { + logMessage("Building GUID lookup from any existing entries in DB..."); itemGuidIdLookup.clear(); String sql = "SELECT GUID,ID FROM Item"; executeQuery(sql, null, new Consumer() { @@ -75,6 +122,7 @@ public void accept(ResultSet rs) { } }); + logMessage("Building MarkupSet name lookup from any existing entries in DB..."); markupSetIdLookup.clear(); sql = "SELECT Name,ID FROM MarkupSet"; executeQuery(sql,null, new Consumer() { @@ -94,19 +142,28 @@ public void accept(ResultSet rs) { } public void storeAllMarkupSets(Case nuixCase) throws IOException, SQLException { + abortWasRequested = false; List markupSets = nuixCase.getMarkupSets(); for(MarkupSet markupSet : markupSets) { + if(abortWasRequested) { break; } storeMarkupSet(nuixCase, markupSet); } } public void storeMarkupSet(Case nuixCase, MarkupSet markupSet) throws IOException, SQLException { + abortWasRequested = false; + logMessage("Storing markups from MarkupSet: "+markupSet.getName()); + long itemMarkupCountBefore = getItemMarkupCount(); + logMessage("Item Markup Count Before: %s", itemMarkupCountBefore); String insertItemMarkup = "INSERT INTO ItemMarkup (Item_ID,MarkupSet_ID,PageNumber,IsRedaction,X,Y,Width,Height) VALUES (?,?,?,?,?,?,?,?)"; String itemQuery = QueryHelper.markupSetQuery(markupSet); long markupSetId = getMarkupSetId(markupSet); Set markupSetItems = nuixCase.searchUnsorted(itemQuery); + int currentItemIndex = 1; for(Item item : markupSetItems) { + if(abortWasRequested) { break; } + fireProgressUpdated(currentItemIndex, markupSetItems.size()); long itemId = getItemId(item); MutablePrintedImage itemImage = item.getPrintedImage(); List pages = itemImage.getPages(); @@ -125,7 +182,12 @@ public void storeMarkupSet(Case nuixCase, MarkupSet markupSet) throws IOExceptio pageMarkup.getHeight()); } } + currentItemIndex++; } + + long itemMarkupCountAfter = getItemMarkupCount(); + logMessage("Item Markup Count After: %s",itemMarkupCountAfter); + logMessage("Difference: +%s",(itemMarkupCountAfter - itemMarkupCountBefore)); } public long getItemId(Item item) throws SQLException { @@ -164,7 +226,8 @@ public long getMarkupSetId(MarkupSet markupSet) throws SQLException { } } - public void applyMarkupsFromDatabaseToCase(Case nuixCase, boolean addToExisting) throws SQLException { + public void applyMarkupsFromDatabaseToCase(Case nuixCase, boolean addToExistingMarkupSet, AnnotationMatchingMethod matchingMethod) throws SQLException { + abortWasRequested = false; Map markupSetLookup = new HashMap(); for(MarkupSet existingMarkupSet : nuixCase.getMarkupSets()) { markupSetLookup.put(existingMarkupSet.getName(), existingMarkupSet); @@ -173,15 +236,24 @@ public void applyMarkupsFromDatabaseToCase(Case nuixCase, boolean addToExisting) List bindData = new ArrayList(); for(Map.Entry markupEntry : markupSetIdLookup.entrySet()) { + if(abortWasRequested) { break; } String markupSetName = markupEntry.getKey(); long markupSetId = markupEntry.getValue(); String markupSetDescription = executeStringScalar("SELECT Description FROM MarkupSet WHERE ID = ?",markupSetId); String markupSetRedactionReason = executeStringScalar("SELECT RedactionReason FROM MarkupSet WHERE ID = ?",markupSetId); + logMessage("Applying markups to case from MarkupSet: %s",markupSetName); + if(matchingMethod == AnnotationMatchingMethod.GUID) { + logMessage("Matching DB entries to case items using: GUID"); + } else if(matchingMethod == AnnotationMatchingMethod.MD5) { + logMessage("Matching DB entries to case items using: MD5"); + } + // We need to resolve the MarkupSet object, either by obtaining existing one in case or creating new one MarkupSet markupSet = null; if(markupSetLookup.containsKey(markupSetName)) { - if(addToExisting) { + if(addToExistingMarkupSet) { + logMessage("Applying markups in destination case to existing markup set: %s",markupSetName); markupSet = markupSetLookup.get(markupSetName); } else { // When addToExisting is false and we have a name collision, we will attempt to find a usable name @@ -191,12 +263,16 @@ public void applyMarkupsFromDatabaseToCase(Case nuixCase, boolean addToExisting) nameSequence++; targetName = markupSetName+"_"+nameSequence; } + + logMessage("Applying markups in DB to new markup set: %s",targetName); + Map markupSetSettings = new HashMap(); markupSetSettings.put("description", markupSetDescription); markupSetSettings.put("redactionReason", markupSetRedactionReason); markupSet = nuixCase.createMarkupSet(targetName, markupSetSettings); } } else { + logMessage("Applying markups in DB to new markup set: %s",markupSetName); // Markup set does not appear to already exist, so lets create it Map markupSetSettings = new HashMap(); markupSetSettings.put("description", markupSetDescription); @@ -207,27 +283,66 @@ public void applyMarkupsFromDatabaseToCase(Case nuixCase, boolean addToExisting) final MarkupSet targetMarkupSet = markupSet; // Now that we have a MarkupSet, we need to get ItemMarkup records from DB - String itemMarkupSql = "SELECT i.GUID,im.PageNumber,im.IsRedaction,im.X,im.Y,im.Width,im.Height FROM ItemMarkup AS im " + + String itemMarkupSql = "SELECT i.GUID,i.MD5,im.PageNumber,im.IsRedaction,im.X,im.Y,im.Width,im.Height FROM ItemMarkup AS im " + "INNER JOIN Item AS i ON im.Item_ID = i.ID " + "WHERE im.MarkupSet_ID = ? " + - "ORDER BY GUID, PageNumber"; + "ORDER BY MD5,GUID,PageNumber"; + String itemMarkupTotalCountSql = "SELECT COUNT(*) FROM ItemMarkup AS im " + + "INNER JOIN Item AS i ON im.Item_ID = i.ID " + + "WHERE im.MarkupSet_ID = ? " + + "ORDER BY MD5,GUID,PageNumber"; + bindData.clear(); bindData.add(markupSetId); + + int totalItemMarkups = executeLongScalar(itemMarkupTotalCountSql,bindData).intValue(); + LoadingCache> itemCache = CacheBuilder.newBuilder() + .maximumSize(1000) + .build(new CacheLoader>(){ + + @Override + public Set load(String guidOrMd5) throws Exception { + Set items = new HashSet(); + if(matchingMethod == AnnotationMatchingMethod.GUID) { + items = nuixCase.searchUnsorted("guid:"+guidOrMd5); + if(items.size() < 1) { + logMessage("No items in case found to match GUID: %s",guidOrMd5); + } + } else if(matchingMethod == AnnotationMatchingMethod.MD5) { + items = nuixCase.searchUnsorted("md5:"+guidOrMd5); + if(items.size() < 1) { + logMessage("No items in case found to match MD5: %s",guidOrMd5); + } + } + return items; + } + }); + executeQuery(itemMarkupSql,bindData,new Consumer() { + int currentIndex = 1; @Override public void accept(ResultSet rs) { try { while(rs.next()) { + if(abortWasRequested) { break; } + fireProgressUpdated(currentIndex,totalItemMarkups); String guid = FormatUtility.bytesToHex(rs.getBytes(1)); - long pageNumber = rs.getLong(2); - boolean isRedaction = rs.getBoolean(3); - double x = rs.getDouble(4); - double y = rs.getDouble(5); - double width = rs.getDouble(6); - double height = rs.getDouble(7); + String md5 = FormatUtility.bytesToHex(rs.getBytes(2)); + long pageNumber = rs.getLong(3); + boolean isRedaction = rs.getBoolean(4); + double x = rs.getDouble(5); + double y = rs.getDouble(6); + double width = rs.getDouble(7); + double height = rs.getDouble(8); - Set items = nuixCase.searchUnsorted("guid:"+guid); + Set items = null; + if(matchingMethod == AnnotationMatchingMethod.GUID) { + items = itemCache.get(guid); + } else if(matchingMethod == AnnotationMatchingMethod.MD5) { + items = itemCache.get(md5); + } + for(Item item : items) { MutablePrintedImage itemImage = item.getPrintedImage(); List pages = itemImage.getPages(); @@ -238,14 +353,28 @@ public void accept(ResultSet rs) { page.createHighlight(targetMarkupSet, x, y, width, height); } } + currentIndex++; } } catch (SQLException exc) { logger.error("Error retrieving ItemMarkup data from database", exc); + logMessage("Error retrieving ItemMarkup data from database: %s",exc.getMessage()); } catch (IOException exc2) { logger.error("Error retrieving item from case", exc2); + logMessage("Error retrieving item from case: ",exc2.getMessage()); + } catch (ExecutionException e) { + logger.error(e); } } }); } } + + public long getItemMarkupCount() throws SQLException { + return executeLongScalar("SELECT COUNT(*) FROM ItemMarkup"); + } + + public void abort() { + logMessage("Signalling abort..."); + abortWasRequested = true; + } } diff --git a/RubyTests/Test_AnnotationRepository.rb b/RubyTests/Test_AnnotationRepository.rb index a6446f8..b21243a 100644 --- a/RubyTests/Test_AnnotationRepository.rb +++ b/RubyTests/Test_AnnotationRepository.rb @@ -3,13 +3,26 @@ java_import com.nuix.superutilities.SuperUtilities $su = SuperUtilities.init($utilities,NUIX_VERSION) java_import com.nuix.superutilities.annotations.AnnotationRepository +java_import com.nuix.superutilities.annotations.AnnotationMatchingMethod $current_case = $utilities.getCaseFactory.open('D:\cases\FakeData_7.8') db_file = "D:\\Temp\\Annotations_#{Time.now.to_i}.db" puts "DB File: #{db_file}" + +last_progress = Time.now repo = AnnotationRepository.new(db_file) +repo.whenMessageLogged do |message| + puts message +end +repo.whenProgressUpdated do |current,total| + if (Time.now - last_progress > 1) || current == total + puts "#{current}/#{total}" + last_progress = Time.now + end +end repo.storeAllMarkupSets($current_case) -repo.applyMarkupsFromDatabaseToCase($current_case,false) +repo.applyMarkupsFromDatabaseToCase($current_case,false,AnnotationMatchingMethod::GUID) repo.close + $current_case.close \ No newline at end of file