diff --git a/Java/src/main/java/com/nuix/superutilities/annotations/AnnotationMatchingMethod.java b/Java/src/main/java/com/nuix/superutilities/annotations/AnnotationMatchingMethod.java new file mode 100644 index 0000000..abd2c28 --- /dev/null +++ b/Java/src/main/java/com/nuix/superutilities/annotations/AnnotationMatchingMethod.java @@ -0,0 +1,6 @@ +package com.nuix.superutilities.annotations; + +public enum AnnotationMatchingMethod { + GUID, + MD5 +} diff --git a/Java/src/main/java/com/nuix/superutilities/annotations/AnnotationRepository.java b/Java/src/main/java/com/nuix/superutilities/annotations/AnnotationRepository.java new file mode 100644 index 0000000..7fe42ef --- /dev/null +++ b/Java/src/main/java/com/nuix/superutilities/annotations/AnnotationRepository.java @@ -0,0 +1,380 @@ +package com.nuix.superutilities.annotations; + +import java.io.File; +import java.io.IOException; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.ExecutionException; +import java.util.function.BiConsumer; +import java.util.function.Consumer; + +import com.google.common.cache.CacheBuilder; +import com.google.common.cache.CacheLoader; +import com.google.common.cache.LoadingCache; +import com.google.common.collect.HashBiMap; +import com.nuix.superutilities.misc.FormatUtility; +import com.nuix.superutilities.misc.SQLiteBacked; +import com.nuix.superutilities.query.QueryHelper; + +import jxl.common.Logger; +import nuix.Case; +import nuix.Item; +import nuix.Markup; +import nuix.MarkupSet; +import nuix.MutablePrintedImage; +import nuix.MutablePrintedPage; +import nuix.PrintedPage; + +public class AnnotationRepository extends SQLiteBacked { + private static Logger logger = Logger.getLogger(AnnotationRepository.class); + + private boolean abortWasRequested = false; + private Consumer messageLoggedCallback = null; + + /*** + * Allows you to provide a callback which will be invoked when this instance emits a log message. + * @param callback The callback to receive logged messages + */ + public void whenMessageLogged(Consumer callback) { + messageLoggedCallback = callback; + } + + private void logMessage(String message) { + if(messageLoggedCallback != null) { + messageLoggedCallback.accept(message); + } else { + logger.info(message); + } + } + + private void logMessage(String format, Object... params) { + logMessage(String.format(format, params)); + } + + private BiConsumer progressUpdatedCallback = null; + + /*** + * Allows you to provide a callback which will be invoked when this instance emits a progress update. + * @param callback The callback to invoke when progress is updated + */ + public void whenProgressUpdated(BiConsumer callback) { + progressUpdatedCallback = callback; + } + + private void fireProgressUpdated(int current, int total) { + if(progressUpdatedCallback != null) { + progressUpdatedCallback.accept(current,total); + } + } + + private HashBiMap itemGuidIdLookup = HashBiMap.create(); + private HashBiMap markupSetIdLookup = HashBiMap.create(); + + public AnnotationRepository(String databaseFile) throws SQLException { + this(new File(databaseFile)); + } + + public AnnotationRepository(File databaseFile) throws SQLException { + super(databaseFile); + createTables(); + } + + private void createTables() throws SQLException { + // Create item table + String createTableItem = "CREATE TABLE IF NOT EXISTS Item ("+ + "ID INTEGER PRIMARY KEY AUTOINCREMENT, Name TEXT, GUID TEXT, MD5 TEXT)"; + executeUpdate(createTableItem); + + String createTableMarkupSet = "CREATE TABLE IF NOT EXISTS MarkupSet ("+ + "ID INTEGER PRIMARY KEY AUTOINCREMENT, Name TEXT, Description TEXT, RedactionReason TEXT)"; + executeUpdate(createTableMarkupSet); + + String createTableItemMarkup = "CREATE TABLE IF NOT EXISTS ItemMarkup ("+ + "ID INTEGER PRIMARY KEY AUTOINCREMENT, Item_ID INTEGER, MarkupSet_ID INTEGER, PageNumber INTEGER,"+ + "IsRedaction INTEGER, X REAL, Y REAL, Width REAL, Height REAL)"; + executeUpdate(createTableItemMarkup); + + rebuildXrefs(); + } + + private void rebuildXrefs() throws SQLException { + logMessage("Building GUID lookup from any existing entries in DB..."); + itemGuidIdLookup.clear(); + String sql = "SELECT GUID,ID FROM Item"; + executeQuery(sql, null, new Consumer() { + @Override + public void accept(ResultSet rs) { + try { + while(rs.next()) { + String guid = FormatUtility.bytesToHex(rs.getBytes(1)); + long id = rs.getLong(2); + itemGuidIdLookup.put(guid, id); + } + } catch (SQLException exc) { + logger.error("Error building GUID to ID XREF",exc); + } + } + }); + + logMessage("Building MarkupSet name lookup from any existing entries in DB..."); + markupSetIdLookup.clear(); + sql = "SELECT Name,ID FROM MarkupSet"; + executeQuery(sql,null, new Consumer() { + @Override + public void accept(ResultSet rs) { + try { + while(rs.next()) { + String name = rs.getString(1); + long id = rs.getLong(2); + markupSetIdLookup.put(name, id); + } + } catch (SQLException exc) { + logger.error("Error building Markup Set Name to ID XREF",exc); + } + } + }); + } + + public void storeAllMarkupSets(Case nuixCase) throws IOException, SQLException { + abortWasRequested = false; + List markupSets = nuixCase.getMarkupSets(); + for(MarkupSet markupSet : markupSets) { + if(abortWasRequested) { break; } + storeMarkupSet(nuixCase, markupSet); + } + } + + public void storeMarkupSet(Case nuixCase, MarkupSet markupSet) throws IOException, SQLException { + abortWasRequested = false; + logMessage("Storing markups from MarkupSet: "+markupSet.getName()); + long itemMarkupCountBefore = getItemMarkupCount(); + logMessage("Item Markup Count Before: %s", itemMarkupCountBefore); + String insertItemMarkup = "INSERT INTO ItemMarkup (Item_ID,MarkupSet_ID,PageNumber,IsRedaction,X,Y,Width,Height) VALUES (?,?,?,?,?,?,?,?)"; + String itemQuery = QueryHelper.markupSetQuery(markupSet); + + long markupSetId = getMarkupSetId(markupSet); + Set markupSetItems = nuixCase.searchUnsorted(itemQuery); + int currentItemIndex = 1; + for(Item item : markupSetItems) { + if(abortWasRequested) { break; } + fireProgressUpdated(currentItemIndex, markupSetItems.size()); + long itemId = getItemId(item); + MutablePrintedImage itemImage = item.getPrintedImage(); + List pages = itemImage.getPages(); + for (int i = 0; i < pages.size(); i++) { + MutablePrintedPage page = (MutablePrintedPage) pages.get(i); + Set pageMarkups = page.getMarkups(markupSet); + for(Markup pageMarkup : pageMarkups) { + executeInsert(insertItemMarkup, + itemId, + markupSetId, + i+1, + pageMarkup.isRedaction(), + pageMarkup.getX(), + pageMarkup.getY(), + pageMarkup.getWidth(), + pageMarkup.getHeight()); + } + } + currentItemIndex++; + } + + long itemMarkupCountAfter = getItemMarkupCount(); + logMessage("Item Markup Count After: %s",itemMarkupCountAfter); + logMessage("Difference: +%s",(itemMarkupCountAfter - itemMarkupCountBefore)); + } + + public long getItemId(Item item) throws SQLException { + String guid = item.getGuid().replaceAll("\\-", ""); + + if(itemGuidIdLookup.containsKey(guid)) { + return itemGuidIdLookup.get(guid); + } else { + String md5 = item.getDigests().getMd5(); + String name = item.getLocalisedName(); + + byte[] guidBytes = FormatUtility.hexToBytes(guid); + byte[] md5Bytes = FormatUtility.hexToBytes(md5); + + String sql = "INSERT INTO Item (GUID,MD5,Name) VALUES (?,?,?)"; + executeInsert(sql, guidBytes, md5Bytes, name); + long id = executeLongScalar("SELECT ID FROM Item WHERE GUID = ?", guidBytes); + itemGuidIdLookup.put(guid, id); + return id; + } + } + + public long getMarkupSetId(MarkupSet markupSet) throws SQLException { + String name = markupSet.getName(); + if(markupSetIdLookup.containsKey(name)) { + return markupSetIdLookup.get(name); + } else { + String description = markupSet.getDescription(); + String redactionReason = markupSet.getRedactionReason(); + + String sql = "INSERT INTO MarkupSet (Name,Description,RedactionReason) VALUES (?,?,?)"; + executeInsert(sql,name,description,redactionReason); + long id = executeLongScalar("SELECT ID FROM MarkupSet WHERE Name = ?", name); + markupSetIdLookup.put(name, id); + return id; + } + } + + public void applyMarkupsFromDatabaseToCase(Case nuixCase, boolean addToExistingMarkupSet, AnnotationMatchingMethod matchingMethod) throws SQLException { + abortWasRequested = false; + Map markupSetLookup = new HashMap(); + for(MarkupSet existingMarkupSet : nuixCase.getMarkupSets()) { + markupSetLookup.put(existingMarkupSet.getName(), existingMarkupSet); + } + + List bindData = new ArrayList(); + + for(Map.Entry markupEntry : markupSetIdLookup.entrySet()) { + if(abortWasRequested) { break; } + String markupSetName = markupEntry.getKey(); + long markupSetId = markupEntry.getValue(); + String markupSetDescription = executeStringScalar("SELECT Description FROM MarkupSet WHERE ID = ?",markupSetId); + String markupSetRedactionReason = executeStringScalar("SELECT RedactionReason FROM MarkupSet WHERE ID = ?",markupSetId); + + logMessage("Applying markups to case from MarkupSet: %s",markupSetName); + if(matchingMethod == AnnotationMatchingMethod.GUID) { + logMessage("Matching DB entries to case items using: GUID"); + } else if(matchingMethod == AnnotationMatchingMethod.MD5) { + logMessage("Matching DB entries to case items using: MD5"); + } + + // We need to resolve the MarkupSet object, either by obtaining existing one in case or creating new one + MarkupSet markupSet = null; + if(markupSetLookup.containsKey(markupSetName)) { + if(addToExistingMarkupSet) { + logMessage("Applying markups in destination case to existing markup set: %s",markupSetName); + markupSet = markupSetLookup.get(markupSetName); + } else { + // When addToExisting is false and we have a name collision, we will attempt to find a usable name + int nameSequence = 2; + String targetName = markupSetName+"_"+nameSequence; + while(markupSetLookup.containsKey(targetName)) { + nameSequence++; + targetName = markupSetName+"_"+nameSequence; + } + + logMessage("Applying markups in DB to new markup set: %s",targetName); + + Map markupSetSettings = new HashMap(); + markupSetSettings.put("description", markupSetDescription); + markupSetSettings.put("redactionReason", markupSetRedactionReason); + markupSet = nuixCase.createMarkupSet(targetName, markupSetSettings); + } + } else { + logMessage("Applying markups in DB to new markup set: %s",markupSetName); + // Markup set does not appear to already exist, so lets create it + Map markupSetSettings = new HashMap(); + markupSetSettings.put("description", markupSetDescription); + markupSetSettings.put("redactionReason", markupSetRedactionReason); + markupSet = nuixCase.createMarkupSet(markupSetName, markupSetSettings); + } + + final MarkupSet targetMarkupSet = markupSet; + + // Now that we have a MarkupSet, we need to get ItemMarkup records from DB + String itemMarkupSql = "SELECT i.GUID,i.MD5,im.PageNumber,im.IsRedaction,im.X,im.Y,im.Width,im.Height FROM ItemMarkup AS im " + + "INNER JOIN Item AS i ON im.Item_ID = i.ID " + + "WHERE im.MarkupSet_ID = ? " + + "ORDER BY MD5,GUID,PageNumber"; + String itemMarkupTotalCountSql = "SELECT COUNT(*) FROM ItemMarkup AS im " + + "INNER JOIN Item AS i ON im.Item_ID = i.ID " + + "WHERE im.MarkupSet_ID = ? " + + "ORDER BY MD5,GUID,PageNumber"; + + bindData.clear(); + bindData.add(markupSetId); + + int totalItemMarkups = executeLongScalar(itemMarkupTotalCountSql,bindData).intValue(); + LoadingCache> itemCache = CacheBuilder.newBuilder() + .maximumSize(1000) + .build(new CacheLoader>(){ + + @Override + public Set load(String guidOrMd5) throws Exception { + Set items = new HashSet(); + if(matchingMethod == AnnotationMatchingMethod.GUID) { + items = nuixCase.searchUnsorted("guid:"+guidOrMd5); + if(items.size() < 1) { + logMessage("No items in case found to match GUID: %s",guidOrMd5); + } + } else if(matchingMethod == AnnotationMatchingMethod.MD5) { + items = nuixCase.searchUnsorted("md5:"+guidOrMd5); + if(items.size() < 1) { + logMessage("No items in case found to match MD5: %s",guidOrMd5); + } + } + return items; + } + }); + + executeQuery(itemMarkupSql,bindData,new Consumer() { + int currentIndex = 1; + @Override + public void accept(ResultSet rs) { + try { + while(rs.next()) { + if(abortWasRequested) { break; } + fireProgressUpdated(currentIndex,totalItemMarkups); + String guid = FormatUtility.bytesToHex(rs.getBytes(1)); + String md5 = FormatUtility.bytesToHex(rs.getBytes(2)); + long pageNumber = rs.getLong(3); + boolean isRedaction = rs.getBoolean(4); + double x = rs.getDouble(5); + double y = rs.getDouble(6); + double width = rs.getDouble(7); + double height = rs.getDouble(8); + + Set items = null; + + if(matchingMethod == AnnotationMatchingMethod.GUID) { + items = itemCache.get(guid); + } else if(matchingMethod == AnnotationMatchingMethod.MD5) { + items = itemCache.get(md5); + } + + for(Item item : items) { + MutablePrintedImage itemImage = item.getPrintedImage(); + List pages = itemImage.getPages(); + MutablePrintedPage page = (MutablePrintedPage)pages.get((int) (pageNumber-1)); + if(isRedaction) { + page.createRedaction(targetMarkupSet, x, y, width, height); + } else { + page.createHighlight(targetMarkupSet, x, y, width, height); + } + } + currentIndex++; + } + } catch (SQLException exc) { + logger.error("Error retrieving ItemMarkup data from database", exc); + logMessage("Error retrieving ItemMarkup data from database: %s",exc.getMessage()); + } catch (IOException exc2) { + logger.error("Error retrieving item from case", exc2); + logMessage("Error retrieving item from case: ",exc2.getMessage()); + } catch (ExecutionException e) { + logger.error(e); + } + } + }); + } + } + + public long getItemMarkupCount() throws SQLException { + return executeLongScalar("SELECT COUNT(*) FROM ItemMarkup"); + } + + public void abort() { + logMessage("Signalling abort..."); + abortWasRequested = true; + } +} diff --git a/Java/src/main/java/com/nuix/superutilities/misc/SQLiteBacked.java b/Java/src/main/java/com/nuix/superutilities/misc/SQLiteBacked.java new file mode 100644 index 0000000..d39ed50 --- /dev/null +++ b/Java/src/main/java/com/nuix/superutilities/misc/SQLiteBacked.java @@ -0,0 +1,225 @@ +package com.nuix.superutilities.misc; + +import java.io.Closeable; +import java.io.File; +import java.io.IOException; +import java.sql.Connection; +import java.sql.DriverManager; +import java.sql.PreparedStatement; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.util.List; +import java.util.Properties; +import java.util.function.Consumer; + +import org.sqlite.SQLiteConfig; +import org.sqlite.SQLiteConfig.JournalMode; +import org.sqlite.SQLiteConfig.LockingMode; +import org.sqlite.SQLiteConfig.SynchronousMode; +import org.sqlite.SQLiteConfig.TransactionMode; + +/*** + * Provides a wrapper around a SQLite database. Intended to be extended by other classes which intended to leverage a + * SQLite database is some way. + * @author Jason Wells + * + */ +public class SQLiteBacked implements Closeable { + private File databaseFile = null; + private Properties connectionProperties = new Properties(); + private Connection persistentConnection = null; + + public SQLiteBacked(File databaseFile) { + this.databaseFile = databaseFile; + SQLiteConfig config = new SQLiteConfig(); + config.setCacheSize(2000); + config.setPageSize(4096 * 10); + config.setJournalMode(JournalMode.WAL); + config.setLockingMode(LockingMode.EXCLUSIVE); + config.setTransactionMode(TransactionMode.EXCLUSIVE); + config.setSynchronous(SynchronousMode.OFF); + connectionProperties = config.toProperties(); + } + + private Connection getConnection() throws SQLException { + if(persistentConnection == null){ + String connectionString = String.format("jdbc:sqlite:%s", databaseFile); + persistentConnection = DriverManager.getConnection(connectionString, connectionProperties); + } + return persistentConnection; + } + + /*** + * Binds a list of objects to a prepared statement + * @param statement The prepared statement to bind data to + * @param data The data to bind + * @throws SQLException If the SQL bits throw an error + */ + private void bindData(PreparedStatement statement, List data) throws SQLException{ + if(data != null){ + for (int i = 0; i < data.size(); i++) { + Object value = data.get(i); + statement.setObject(i+1, value); + } + } + } + + private void bindData(PreparedStatement statement, Object[] data) throws SQLException{ + if(data != null){ + for (int i = 0; i < data.length; i++) { + Object value = data[i]; + statement.setObject(i+1, value); + } + } + } + + /*** + * Executes an update query against the SQLite database file + * @param sql The SQL to execute + * @param data Optional list of associated data, can be null + * @return Count of affected records + * @throws SQLException If the SQL bits throw an error + */ + public int executeUpdate(String sql, List data) throws SQLException{ + Connection conn = getConnection(); + try(PreparedStatement statement = conn.prepareStatement(sql)){ + if(data != null){ bindData(statement,data); } + return statement.executeUpdate(); + } + } + + public int executeUpdate(String sql, Object ...data) throws SQLException{ + Connection conn = getConnection(); + try(PreparedStatement statement = conn.prepareStatement(sql)){ + if(data != null){ bindData(statement,data); } + return statement.executeUpdate(); + } + } + + public int executeUpdate(String sql) throws SQLException{ + Connection conn = getConnection(); + try(PreparedStatement statement = conn.prepareStatement(sql)){ + return statement.executeUpdate(); + } + } + + /*** + * Executes an insert query against the SQLite database file + * @param sql The SQL to execute + * @param data Optional list of associated data, can be null + * @throws SQLException If the SQL bits throw an error + */ + public void executeInsert(String sql, List data) throws SQLException{ + Connection conn = getConnection(); + try(PreparedStatement statement = conn.prepareStatement(sql)){ + if(data != null){ bindData(statement,data); } + statement.executeUpdate(); + } + } + + public void executeInsert(String sql, Object ...data) throws SQLException{ + Connection conn = getConnection(); + try(PreparedStatement statement = conn.prepareStatement(sql)){ + if(data != null){ bindData(statement,data); } + statement.executeUpdate(); + } + } + + public Long executeLongScalar(String sql, Object ...data) throws SQLException{ + Connection conn = getConnection(); + try(PreparedStatement statement = conn.prepareStatement(sql)){ + if(data != null){ bindData(statement,data); } + try(ResultSet resultSet = statement.executeQuery()){ + return resultSet.getLong(1); + } + } + } + + public Long executeLongScalar(String sql, List data) throws SQLException{ + Connection conn = getConnection(); + try(PreparedStatement statement = conn.prepareStatement(sql)){ + if(data != null){ bindData(statement,data); } + try(ResultSet resultSet = statement.executeQuery()){ + return resultSet.getLong(1); + } + } + } + + public Long executeLongScalar(String sql) throws SQLException{ + Connection conn = getConnection(); + try(PreparedStatement statement = conn.prepareStatement(sql)){ + try(ResultSet resultSet = statement.executeQuery()){ + return resultSet.getLong(1); + } + } + } + + public String executeStringScalar(String sql, Object ...data) throws SQLException{ + Connection conn = getConnection(); + try(PreparedStatement statement = conn.prepareStatement(sql)){ + if(data != null){ bindData(statement,data); } + try(ResultSet resultSet = statement.executeQuery()){ + return resultSet.getString(1); + } + } + } + + public String executeStringScalar(String sql, List data) throws SQLException{ + Connection conn = getConnection(); + try(PreparedStatement statement = conn.prepareStatement(sql)){ + if(data != null){ bindData(statement,data); } + try(ResultSet resultSet = statement.executeQuery()){ + return resultSet.getString(1); + } + } + } + + public String executeStringScalar(String sql) throws SQLException{ + Connection conn = getConnection(); + try(PreparedStatement statement = conn.prepareStatement(sql)){ + try(ResultSet resultSet = statement.executeQuery()){ + return resultSet.getString(1); + } + } + } + + @SuppressWarnings("unchecked") + public T executeScalar(String sql, Object ...data) throws SQLException{ + Connection conn = getConnection(); + try(PreparedStatement statement = conn.prepareStatement(sql)){ + if(data != null){ bindData(statement,data); } + try(ResultSet resultSet = statement.executeQuery()){ + return (T)resultSet.getObject(1); + } + } + } + + /*** + * Executes a query which is expected to return row data, providing the result set to the provided callback. + * @param sql The SQL query to execute + * @param data Optional list of associated data, can be null + * @param resultConsumer Callback which will be provided the result set. This is where you provide code to make use of the results. + * @throws SQLException If the SQL bits throw an error + */ + public void executeQuery(String sql, List data, Consumer resultConsumer) throws SQLException{ + Connection conn = getConnection(); + try(PreparedStatement statement = conn.prepareStatement(sql)){ + if(data != null){ bindData(statement,data); } + try(ResultSet resultSet = statement.executeQuery()){ + resultConsumer.accept(resultSet); + } + } + } + + @Override + public void close() throws IOException { + if(persistentConnection != null){ + try { + persistentConnection.close(); + } catch (SQLException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + } + } +} diff --git a/Java/src/main/java/com/nuix/superutilities/query/QueryHelper.java b/Java/src/main/java/com/nuix/superutilities/query/QueryHelper.java index 348dd02..684b308 100644 --- a/Java/src/main/java/com/nuix/superutilities/query/QueryHelper.java +++ b/Java/src/main/java/com/nuix/superutilities/query/QueryHelper.java @@ -7,6 +7,8 @@ import org.joda.time.DateTime; +import nuix.MarkupSet; + public class QueryHelper { /*** * Builds an item-date Nuix range query for items with a date occurring within the specified year. @@ -125,4 +127,8 @@ public static String namedEntityQuery(Collection entityNames) { String query = String.format("named-entities:(%s)", String.join(" OR ",entityNameFragments)); return query; } + + public static String markupSetQuery(MarkupSet markupSet) { + return String.format("markup-set:\"%s\"", markupSet.getName()); + } } diff --git a/RubyTests/Test_AnnotationRepository.rb b/RubyTests/Test_AnnotationRepository.rb new file mode 100644 index 0000000..b21243a --- /dev/null +++ b/RubyTests/Test_AnnotationRepository.rb @@ -0,0 +1,28 @@ +script_directory = File.dirname(__FILE__) +require File.join(script_directory,"SuperUtilities.jar") +java_import com.nuix.superutilities.SuperUtilities +$su = SuperUtilities.init($utilities,NUIX_VERSION) +java_import com.nuix.superutilities.annotations.AnnotationRepository +java_import com.nuix.superutilities.annotations.AnnotationMatchingMethod + +$current_case = $utilities.getCaseFactory.open('D:\cases\FakeData_7.8') + +db_file = "D:\\Temp\\Annotations_#{Time.now.to_i}.db" +puts "DB File: #{db_file}" + +last_progress = Time.now +repo = AnnotationRepository.new(db_file) +repo.whenMessageLogged do |message| + puts message +end +repo.whenProgressUpdated do |current,total| + if (Time.now - last_progress > 1) || current == total + puts "#{current}/#{total}" + last_progress = Time.now + end +end +repo.storeAllMarkupSets($current_case) +repo.applyMarkupsFromDatabaseToCase($current_case,false,AnnotationMatchingMethod::GUID) +repo.close + +$current_case.close \ No newline at end of file