From 4223969406950747bf24369858d5d61ece3953ab Mon Sep 17 00:00:00 2001 From: Jason Wells Date: Tue, 18 Jun 2019 16:00:12 -0700 Subject: [PATCH 1/3] Update QueryHelper.java Added code for generating tag OR query --- .../superutilities/query/QueryHelper.java | 30 +++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/Java/src/main/java/com/nuix/superutilities/query/QueryHelper.java b/Java/src/main/java/com/nuix/superutilities/query/QueryHelper.java index 684b308..1381fd0 100644 --- a/Java/src/main/java/com/nuix/superutilities/query/QueryHelper.java +++ b/Java/src/main/java/com/nuix/superutilities/query/QueryHelper.java @@ -1,5 +1,6 @@ package com.nuix.superutilities.query; +import java.util.ArrayList; import java.util.Collection; import java.util.List; import java.util.Set; @@ -131,4 +132,33 @@ public static String namedEntityQuery(Collection entityNames) { public static String markupSetQuery(MarkupSet markupSet) { return String.format("markup-set:\"%s\"", markupSet.getName()); } + + public static String escapeForSearch(String value) { + String result = value.replaceAll("\\", "\\\\"); + result = result.replaceAll("\\?", "\\\\?"); + result = result.replaceAll("\\*", "\\\\*"); + result = result.replaceAll("\"", "\\\""); + result = result.replaceAll("\u201C", "\\\u201C"); + result = result.replaceAll("\u201D", "\\\u201D"); + result = result.replaceAll("'", "\\'"); + result = result.replaceAll("{", "\\{"); + result = result.replaceAll("}", "\\}"); + return result; + } + + public static String orTagQuery(Collection tags) { + List escapedTags = tags.stream() + .map(tag -> escapeForSearch(tag)) + .map(tag -> "\""+tag+"\"") + .collect(Collectors.toList()); + return String.format("tag:(%s)", String.join(" OR ", escapedTags)); + } + + public static String orTagQuery(String... tags) { + List tagsList = new ArrayList(); + for (int i = 0; i < tags.length; i++) { + tagsList.add(tags[i]); + } + return orTagQuery(tagsList); + } } From f4656e1c5f475c20158d1332815acb09744e7f76 Mon Sep 17 00:00:00 2001 From: Jason Wells Date: Tue, 18 Jun 2019 16:00:34 -0700 Subject: [PATCH 2/3] Update AnnotationRepository.java Added functionality for tags --- .../annotations/AnnotationRepository.java | 274 ++++++++++++++---- 1 file changed, 217 insertions(+), 57 deletions(-) diff --git a/Java/src/main/java/com/nuix/superutilities/annotations/AnnotationRepository.java b/Java/src/main/java/com/nuix/superutilities/annotations/AnnotationRepository.java index b5ee666..c143cdf 100644 --- a/Java/src/main/java/com/nuix/superutilities/annotations/AnnotationRepository.java +++ b/Java/src/main/java/com/nuix/superutilities/annotations/AnnotationRepository.java @@ -18,11 +18,13 @@ import com.google.common.cache.CacheLoader; import com.google.common.cache.LoadingCache; import com.google.common.collect.HashBiMap; +import com.nuix.superutilities.SuperUtilities; import com.nuix.superutilities.misc.FormatUtility; import com.nuix.superutilities.misc.SQLiteBacked; import com.nuix.superutilities.query.QueryHelper; import jxl.common.Logger; +import nuix.BulkAnnotater; import nuix.Case; import nuix.Item; import nuix.Markup; @@ -80,6 +82,7 @@ private void fireProgressUpdated(int current, int total) { private HashBiMap itemGuidIdLookup = HashBiMap.create(); private HashBiMap markupSetIdLookup = HashBiMap.create(); + private HashBiMap tagIdLookup = HashBiMap.create(); /*** * Creates a new instance associated to the specified SQLite DB file. File will be created if it does not already exist. @@ -105,20 +108,32 @@ public AnnotationRepository(File databaseFile) throws SQLException { * @throws SQLException Thrown if there are errors while interacting with the SQLite DB file. */ private void createTables() throws SQLException { - // Create item table + // Create table with item info String createTableItem = "CREATE TABLE IF NOT EXISTS Item ("+ "ID INTEGER PRIMARY KEY AUTOINCREMENT, Name TEXT, GUID TEXT, MD5 TEXT)"; executeUpdate(createTableItem); + // Create table with markup set info String createTableMarkupSet = "CREATE TABLE IF NOT EXISTS MarkupSet ("+ "ID INTEGER PRIMARY KEY AUTOINCREMENT, Name TEXT, Description TEXT, RedactionReason TEXT)"; executeUpdate(createTableMarkupSet); + // Create table with markup information String createTableItemMarkup = "CREATE TABLE IF NOT EXISTS ItemMarkup ("+ "ID INTEGER PRIMARY KEY AUTOINCREMENT, Item_ID INTEGER, MarkupSet_ID INTEGER, PageNumber INTEGER,"+ "IsRedaction INTEGER, X REAL, Y REAL, Width REAL, Height REAL)"; executeUpdate(createTableItemMarkup); + // Create table with tag info + String createTableTag = "CREATE TABLE IF NOT EXISTS Tag ("+ + "ID INTEGER PRIMARY KEY AUTOINCREMENT, Name TEXT)"; + executeUpdate(createTableTag); + + // Create table with tag to item associations + String createTableItemTag = "CREATE TABLE IF NOT EXISTS ItemTag ("+ + "ID INTEGER PRIMARY KEY AUTOINCREMENT, Item_ID INTEGER, Tag_ID INTEGER)"; + executeUpdate(createTableItemTag); + rebuildXrefs(); } @@ -162,6 +177,24 @@ public void accept(ResultSet rs) { } } }); + + logMessage("Building Tag name lookup from any existing entries in DB..."); + markupSetIdLookup.clear(); + sql = "SELECT Name,ID FROM Tag"; + executeQuery(sql,null, new Consumer() { + @Override + public void accept(ResultSet rs) { + try { + while(rs.next()) { + String name = rs.getString(1); + long id = rs.getLong(2); + markupSetIdLookup.put(name, id); + } + } catch (SQLException exc) { + logger.error("Error building Tag Name to ID XREF",exc); + } + } + }); } /*** @@ -171,7 +204,6 @@ public void accept(ResultSet rs) { * @throws SQLException Thrown if anything goes wrong interacting with the SQLite database file. */ public void storeAllMarkupSets(Case nuixCase) throws IOException, SQLException { - abortWasRequested = false; List markupSets = nuixCase.getMarkupSets(); for(MarkupSet markupSet : markupSets) { // Support aborting @@ -181,55 +213,6 @@ public void storeAllMarkupSets(Case nuixCase) throws IOException, SQLException { } } - /*** - * Stores a particular markup set living in the provided Nuix case. - * @param nuixCase The Nuix case containing the provided markup set. - * @param markupSet The specific markup set to store. - * @throws IOException Thrown most likely if there was an issue searching or retrieving printed pages of and item. - * @throws SQLException Thrown if anything goes wrong interacting with the SQLite database file. - */ - public void storeMarkupSet(Case nuixCase, MarkupSet markupSet) throws IOException, SQLException { - abortWasRequested = false; - logMessage("Storing markups from MarkupSet: "+markupSet.getName()); - long itemMarkupCountBefore = getItemMarkupCount(); - logMessage("Item Markup Count Before: %s", itemMarkupCountBefore); - String insertItemMarkup = "INSERT INTO ItemMarkup (Item_ID,MarkupSet_ID,PageNumber,IsRedaction,X,Y,Width,Height) VALUES (?,?,?,?,?,?,?,?)"; - String itemQuery = QueryHelper.markupSetQuery(markupSet); - - long markupSetId = getMarkupSetId(markupSet); - Set markupSetItems = nuixCase.searchUnsorted(itemQuery); - int currentItemIndex = 1; - for(Item item : markupSetItems) { - // Support aborting - if(abortWasRequested) { break; } - - fireProgressUpdated(currentItemIndex, markupSetItems.size()); - long itemId = getItemId(item); - MutablePrintedImage itemImage = item.getPrintedImage(); - List pages = itemImage.getPages(); - for (int i = 0; i < pages.size(); i++) { - MutablePrintedPage page = (MutablePrintedPage) pages.get(i); - Set pageMarkups = page.getMarkups(markupSet); - for(Markup pageMarkup : pageMarkups) { - executeInsert(insertItemMarkup, - itemId, - markupSetId, - i+1, - pageMarkup.isRedaction(), - pageMarkup.getX(), - pageMarkup.getY(), - pageMarkup.getWidth(), - pageMarkup.getHeight()); - } - } - currentItemIndex++; - } - - long itemMarkupCountAfter = getItemMarkupCount(); - logMessage("Item Markup Count After: %s",itemMarkupCountAfter); - logMessage("Difference: +%s",(itemMarkupCountAfter - itemMarkupCountBefore)); - } - /*** * Gets the sequentially assigned ID value from the Item table for a given item based on its GUID. Will attempt to get this from a cache first. * @param item The item to retrieve the DB ID number for. @@ -278,6 +261,184 @@ public long getMarkupSetId(MarkupSet markupSet) throws SQLException { } } + /*** + * Gets the sequentially assigned ID value from the Tag table for a given tag name. Will attempt to get this from a cache first. + * @param tagName Name of the tag you wish to get the ID of. + * @return The DB ID number for the given Tag, based on finding a record in the Tag table with a matching name. + * @throws SQLException Thrown if there are errors while interacting with the SQLite DB file. + */ + public long getTagId(String tagName) throws SQLException { + if(tagIdLookup.containsKey(tagName)) { + return tagIdLookup.get(tagName); + } else { + String sql = "INSERT INTO Tag (Name) VALUES (?)"; + executeInsert(sql,tagName); + long id = executeLongScalar("SELECT ID FROM Tag WHERE Name = ?", tagName); + tagIdLookup.put(tagName, id); + return id; + } + } + + public void storeTag(Case nuixCase, String tagName) throws IOException, SQLException { + logMessage("Storing tag: %s",tagName); + String insertItemTag = "INSERT INTO Tag (Item_ID,Tag_ID) VALUES (?,?)"; + String itemQuery = QueryHelper.orTagQuery(tagName); + Set tagItems = nuixCase.searchUnsorted(itemQuery); + long tagId = getTagId(tagName); + int currentItemIndex = 1; + for(Item item : tagItems) { + // Support aborting + if(abortWasRequested) { break; } + fireProgressUpdated(currentItemIndex, tagItems.size()); + long itemId = getItemId(item); + executeInsert(insertItemTag,itemId,tagId); + } + } + + public void storeAllTags(Case nuixCase) throws IOException, SQLException { + Set tags = nuixCase.getAllTags(); + for(String tag : tags) { + // Support aborting + if(abortWasRequested) { break; } + storeTag(nuixCase,tag); + } + } + + public void applyTagsFromDatabaseToCase(Case nuixCase, AnnotationMatchingMethod matchingMethod) throws SQLException { + List bindData = new ArrayList(); + BulkAnnotater annotater = SuperUtilities.getInstance().getBulkAnnotater(); + String itemTagSql = "SELECT i.GUID,i.MD5 FROM Tag AS t " + + "INNER JOIN Item AS i ON t.Item_ID = i.ID " + + "WHERE t.ID = ? " + + "ORDER BY MD5,GUID"; + + String itemTagCountSql = "SELECT COUNT(*) FROM Tag AS t " + + "INNER JOIN Item AS i ON t.Item_ID = i.ID " + + "WHERE t.ID = ? " + + "ORDER BY MD5,GUID"; + + logMessage("Applying tags to case..."); + if(matchingMethod == AnnotationMatchingMethod.GUID) { + logMessage("Matching DB entries to case items using: GUID"); + } else if(matchingMethod == AnnotationMatchingMethod.MD5) { + logMessage("Matching DB entries to case items using: MD5"); + } + + // We use a cache for item retrieval, running a serach for the item by GUID or MD5 if requested but + // not currently present in the cache. + LoadingCache> itemCache = CacheBuilder.newBuilder() + .maximumSize(1000) + .build(new CacheLoader>(){ + @Override + public Set load(String guidOrMd5) throws Exception { + // When a given GUID or MD5 is found to note already be present in our cache + // we will need to go find it in our case, cache it and return it. + Set items = new HashSet(); + if(matchingMethod == AnnotationMatchingMethod.GUID) { + items = nuixCase.searchUnsorted("guid:"+guidOrMd5); + if(items.size() < 1) { + logMessage("No items in case found to match GUID: %s",guidOrMd5); + } + } else if(matchingMethod == AnnotationMatchingMethod.MD5) { + items = nuixCase.searchUnsorted("md5:"+guidOrMd5); + if(items.size() < 1) { + logMessage("No items in case found to match MD5: %s",guidOrMd5); + } + } + return items; + } + }); + + for(Map.Entry tagEntry : tagIdLookup.entrySet()) { + // Support aborting + if(abortWasRequested) { break; } + String tagName = tagEntry.getKey(); + long tagId = tagEntry.getValue(); + bindData.clear(); + bindData.add(tagId); + int totalItemTags = executeLongScalar(itemTagCountSql,bindData).intValue(); + executeQuery(itemTagSql,bindData, rs ->{ + int currentIndex = 1; + try { + while(rs.next()) { + fireProgressUpdated(currentIndex,totalItemTags); + String guid = rs.getString(1); + String md5 = rs.getString(2); + + Set items = null; + + // Leverage our cache to minimize unnecessary searching for the same item or items repeatedly + if(matchingMethod == AnnotationMatchingMethod.GUID) { + items = itemCache.get(guid); + } else if(matchingMethod == AnnotationMatchingMethod.MD5) { + items = itemCache.get(md5); + } + + // Apply tag to relevant items in the destination case + annotater.addTag(tagName, items); + currentIndex++; + } + } catch (SQLException e) { + logger.error("Error retrieving ItemTag data from database", e); + logMessage("Error retrieving ItemTag data from database: %s",e.getMessage()); + } catch (IOException e) { + logger.error("Error retrieving item from case", e); + logMessage("Error retrieving item from case: ",e.getMessage()); + } catch (ExecutionException e) { + logger.error(e); + } + }); + } + } + + /*** + * Stores a particular markup set living in the provided Nuix case. + * @param nuixCase The Nuix case containing the provided markup set. + * @param markupSet The specific markup set to store. + * @throws IOException Thrown most likely if there was an issue searching or retrieving printed pages of and item. + * @throws SQLException Thrown if anything goes wrong interacting with the SQLite database file. + */ + public void storeMarkupSet(Case nuixCase, MarkupSet markupSet) throws IOException, SQLException { + logMessage("Storing markups from MarkupSet: "+markupSet.getName()); + long itemMarkupCountBefore = getItemMarkupCount(); + logMessage("Item Markup Count Before: %s", itemMarkupCountBefore); + String insertItemMarkup = "INSERT INTO ItemMarkup (Item_ID,MarkupSet_ID,PageNumber,IsRedaction,X,Y,Width,Height) VALUES (?,?,?,?,?,?,?,?)"; + String itemQuery = QueryHelper.markupSetQuery(markupSet); + + long markupSetId = getMarkupSetId(markupSet); + Set markupSetItems = nuixCase.searchUnsorted(itemQuery); + int currentItemIndex = 1; + for(Item item : markupSetItems) { + // Support aborting + if(abortWasRequested) { break; } + + fireProgressUpdated(currentItemIndex, markupSetItems.size()); + long itemId = getItemId(item); + MutablePrintedImage itemImage = item.getPrintedImage(); + List pages = itemImage.getPages(); + for (int i = 0; i < pages.size(); i++) { + MutablePrintedPage page = (MutablePrintedPage) pages.get(i); + Set pageMarkups = page.getMarkups(markupSet); + for(Markup pageMarkup : pageMarkups) { + executeInsert(insertItemMarkup, + itemId, + markupSetId, + i+1, + pageMarkup.isRedaction(), + pageMarkup.getX(), + pageMarkup.getY(), + pageMarkup.getWidth(), + pageMarkup.getHeight()); + } + } + currentItemIndex++; + } + + long itemMarkupCountAfter = getItemMarkupCount(); + logMessage("Item Markup Count After: %s",itemMarkupCountAfter); + logMessage("Difference: +%s",(itemMarkupCountAfter - itemMarkupCountBefore)); + } + /*** * Applies markups present in the SQLite DB file associated to this instance to the provided Nuix case. * @param nuixCase The Nuix case to apply the DB file markups to. @@ -286,10 +447,9 @@ public long getMarkupSetId(MarkupSet markupSet) throws SQLException { * @throws SQLException Thrown if there are errors while interacting with the SQLite DB file. */ public void applyMarkupsFromDatabaseToCase(Case nuixCase, boolean addToExistingMarkupSet, AnnotationMatchingMethod matchingMethod) throws SQLException { - abortWasRequested = false; - Map markupSetLookup = new HashMap(); + Map existingMarkupSetLookup = new HashMap(); for(MarkupSet existingMarkupSet : nuixCase.getMarkupSets()) { - markupSetLookup.put(existingMarkupSet.getName(), existingMarkupSet); + existingMarkupSetLookup.put(existingMarkupSet.getName(), existingMarkupSet); } List bindData = new ArrayList(); @@ -312,15 +472,15 @@ public void applyMarkupsFromDatabaseToCase(Case nuixCase, boolean addToExistingM // We need to resolve the MarkupSet object, either by obtaining existing one in case or creating new one MarkupSet markupSet = null; - if(markupSetLookup.containsKey(markupSetName)) { + if(existingMarkupSetLookup.containsKey(markupSetName)) { if(addToExistingMarkupSet) { logMessage("Applying markups in destination case to existing markup set: %s",markupSetName); - markupSet = markupSetLookup.get(markupSetName); + markupSet = existingMarkupSetLookup.get(markupSetName); } else { // When addToExisting is false and we have a name collision, we will attempt to find a usable name int nameSequence = 2; String targetName = markupSetName+"_"+nameSequence; - while(markupSetLookup.containsKey(targetName)) { + while(existingMarkupSetLookup.containsKey(targetName)) { nameSequence++; targetName = markupSetName+"_"+nameSequence; } From f16caa83c9b17518b9c6e360a03b5580be482230 Mon Sep 17 00:00:00 2001 From: Jason Wells Date: Wed, 19 Jun 2019 17:03:53 -0700 Subject: [PATCH 3/3] Continued development of Tag support in AnnotationRepository --- .../annotations/AnnotationRepository.java | 206 +++++++++++++----- .../superutilities/misc/FormatUtility.java | 2 + .../superutilities/query/QueryHelper.java | 6 +- 3 files changed, 159 insertions(+), 55 deletions(-) diff --git a/Java/src/main/java/com/nuix/superutilities/annotations/AnnotationRepository.java b/Java/src/main/java/com/nuix/superutilities/annotations/AnnotationRepository.java index c143cdf..ff3b669 100644 --- a/Java/src/main/java/com/nuix/superutilities/annotations/AnnotationRepository.java +++ b/Java/src/main/java/com/nuix/superutilities/annotations/AnnotationRepository.java @@ -52,6 +52,11 @@ public void whenMessageLogged(Consumer callback) { messageLoggedCallback = callback; } + /*** + * Logs a message, either providing it to the callback supplied in a call to {@link #whenMessageLogged(Consumer)} or in + * absence of that callback, to log4j. + * @param message + */ private void logMessage(String message) { if(messageLoggedCallback != null) { messageLoggedCallback.accept(message); @@ -60,6 +65,13 @@ private void logMessage(String message) { } } + /*** + * Logs a message, either providing it to the callback supplied in a call to {@link #whenMessageLogged(Consumer)} or in + * absence of that callback, to log4j. This method passes the message through a call to String.format with the message + * being provided as the format and the params provided as the params to String.format. + * @param format The message format string, formatted as accepted by String.format. + * @param params Parameters to be inserted into the formatted string, as accepted by String.format. + */ private void logMessage(String format, Object... params) { logMessage(String.format(format, params)); } @@ -74,6 +86,11 @@ public void whenProgressUpdated(BiConsumer callback) { progressUpdatedCallback = callback; } + /*** + * Invokes callback previously provided in a call to {@link #whenProgressUpdated(BiConsumer)}, if one has been provided. + * @param current The current progress amount. + * @param total The total amount of work. + */ private void fireProgressUpdated(int current, int total) { if(progressUpdatedCallback != null) { progressUpdatedCallback.accept(current,total); @@ -138,7 +155,8 @@ private void createTables() throws SQLException { } /*** - * Rebuilds in memory look ups for GUID/MD5 => database record IDs + * Rebuilds in memory look ups for GUID/MD5 => database record IDs. Later as new records are added + * to the database file, these lookup will be updated in tandem as in memory caches. * @throws SQLException Thrown if there are errors while interacting with the SQLite DB file. */ private void rebuildXrefs() throws SQLException { @@ -179,7 +197,7 @@ public void accept(ResultSet rs) { }); logMessage("Building Tag name lookup from any existing entries in DB..."); - markupSetIdLookup.clear(); + tagIdLookup.clear(); sql = "SELECT Name,ID FROM Tag"; executeQuery(sql,null, new Consumer() { @Override @@ -188,7 +206,7 @@ public void accept(ResultSet rs) { while(rs.next()) { String name = rs.getString(1); long id = rs.getLong(2); - markupSetIdLookup.put(name, id); + tagIdLookup.put(name, id); } } catch (SQLException exc) { logger.error("Error building Tag Name to ID XREF",exc); @@ -229,7 +247,12 @@ public long getItemId(Item item) throws SQLException { String name = item.getLocalisedName(); byte[] guidBytes = FormatUtility.hexToBytes(guid); - byte[] md5Bytes = FormatUtility.hexToBytes(md5); + byte[] md5Bytes = null; + if(md5 == null) { + logMessage("Item with GUID %s has no MD5",guid); + } else { + md5Bytes = FormatUtility.hexToBytes(md5); + } String sql = "INSERT INTO Item (GUID,MD5,Name) VALUES (?,?,?)"; executeInsert(sql, guidBytes, md5Bytes, name); @@ -279,9 +302,16 @@ public long getTagId(String tagName) throws SQLException { } } + /*** + * Stores a specific tag present in the provided case as records in the DB file. + * @param nuixCase The Nuix case that the specified tag is present in. + * @param tagName The name of the tag in the specified case to store in the DB file. + * @throws IOException Thrown if a search error occurs. + * @throws SQLException Thrown if there are errors while interacting with the SQLite DB file. + */ public void storeTag(Case nuixCase, String tagName) throws IOException, SQLException { logMessage("Storing tag: %s",tagName); - String insertItemTag = "INSERT INTO Tag (Item_ID,Tag_ID) VALUES (?,?)"; + String insertItemTag = "INSERT INTO ItemTag (Item_ID,Tag_ID) VALUES (?,?)"; String itemQuery = QueryHelper.orTagQuery(tagName); Set tagItems = nuixCase.searchUnsorted(itemQuery); long tagId = getTagId(tagName); @@ -295,6 +325,12 @@ public void storeTag(Case nuixCase, String tagName) throws IOException, SQLExcep } } + /*** + * Stores all tags present in the provided case as records in the DB file. + * @param nuixCase The Nuix case to record tags from. + * @throws IOException Thrown if a search error occurs. + * @throws SQLException Thrown if there are errors while interacting with the SQLite DB file. + */ public void storeAllTags(Case nuixCase) throws IOException, SQLException { Set tags = nuixCase.getAllTags(); for(String tag : tags) { @@ -304,19 +340,31 @@ public void storeAllTags(Case nuixCase) throws IOException, SQLException { } } + /*** + * Applies tags to items in the provided case based on tag records in the DB file associated to this instance. + * @param nuixCase The case in which items will be tagged. + * @param matchingMethod Determines how a record in the DB file is associated to an item in the case to apply tags to it. + * @throws SQLException Thrown if there are errors while interacting with the SQLite DB file. + */ public void applyTagsFromDatabaseToCase(Case nuixCase, AnnotationMatchingMethod matchingMethod) throws SQLException { + // Will reuse this multiple times to provide values to be bound to prepared SQL statements later List bindData = new ArrayList(); - BulkAnnotater annotater = SuperUtilities.getInstance().getBulkAnnotater(); - String itemTagSql = "SELECT i.GUID,i.MD5 FROM Tag AS t " + + // Will use this to apply tags later + BulkAnnotater annotater = SuperUtilities.getBulkAnnotater(); + + // SQL query to get information about each item a given tag is to be applied to + String itemTagSql = "SELECT i.GUID,i.MD5,i.Name FROM ItemTag AS t " + "INNER JOIN Item AS i ON t.Item_ID = i.ID " + - "WHERE t.ID = ? " + + "WHERE t.Tag_ID = ? " + "ORDER BY MD5,GUID"; - String itemTagCountSql = "SELECT COUNT(*) FROM Tag AS t " + + // SQL query to get count of items a given tag should be applied to + String itemTagCountSql = "SELECT COUNT(*) FROM ItemTag AS t " + "INNER JOIN Item AS i ON t.Item_ID = i.ID " + - "WHERE t.ID = ? " + + "WHERE t.Tag_ID = ? " + "ORDER BY MD5,GUID"; + // Always good to tell the user what you're doing and create a record of the settings in use logMessage("Applying tags to case..."); if(matchingMethod == AnnotationMatchingMethod.GUID) { logMessage("Matching DB entries to case items using: GUID"); @@ -324,75 +372,97 @@ public void applyTagsFromDatabaseToCase(Case nuixCase, AnnotationMatchingMethod logMessage("Matching DB entries to case items using: MD5"); } - // We use a cache for item retrieval, running a serach for the item by GUID or MD5 if requested but - // not currently present in the cache. - LoadingCache> itemCache = CacheBuilder.newBuilder() - .maximumSize(1000) - .build(new CacheLoader>(){ - @Override - public Set load(String guidOrMd5) throws Exception { - // When a given GUID or MD5 is found to note already be present in our cache - // we will need to go find it in our case, cache it and return it. - Set items = new HashSet(); - if(matchingMethod == AnnotationMatchingMethod.GUID) { - items = nuixCase.searchUnsorted("guid:"+guidOrMd5); - if(items.size() < 1) { - logMessage("No items in case found to match GUID: %s",guidOrMd5); - } - } else if(matchingMethod == AnnotationMatchingMethod.MD5) { - items = nuixCase.searchUnsorted("md5:"+guidOrMd5); - if(items.size() < 1) { - logMessage("No items in case found to match MD5: %s",guidOrMd5); - } - } - return items; - } - }); + // Since we can apply any given tag to multiple items at once and this is a more efficient approach, + // we gather up all the items a given tag will be applied to and then apply that tag in batches. Periodically + // we will apply a batch of tags and clear this collection so we don't need to hold on to all the items receiving + // a given tag at once. + Set tagGroupedItems = new HashSet(); + // Use our in memory cache of Name->ID to drive application of each tag since it should + // already be in memory and synced to the state of the database. for(Map.Entry tagEntry : tagIdLookup.entrySet()) { // Support aborting if(abortWasRequested) { break; } + String tagName = tagEntry.getKey(); long tagId = tagEntry.getValue(); + bindData.clear(); bindData.add(tagId); + + // Determine how many items this tag should be applied to based on the number + // of ItemTag records associated to this tag. int totalItemTags = executeLongScalar(itemTagCountSql,bindData).intValue(); + + // Here we run the query for ItemTag records associated with the tag we are currently + // processing. We will then collect up the relevant items. When we get a good batch of + // items, we tag them, clear the collection and continue on. + logMessage("Applying tag '%s' to %s items",tagName,totalItemTags); executeQuery(itemTagSql,bindData, rs ->{ int currentIndex = 1; try { while(rs.next()) { fireProgressUpdated(currentIndex,totalItemTags); - String guid = rs.getString(1); - String md5 = rs.getString(2); + + // GUID and MD5 are hex strings. We store them in the database as the byte arrays those hex + // strings represent which reduces the storage footprint for these values. Nuix needs them as + // hex strings for use in queries, so we need to convert them back to hex strings here. + String guid = FormatUtility.bytesToHex(rs.getBytes(1)); + String md5 = FormatUtility.bytesToHex(rs.getBytes(2)); + String itemName = rs.getString(3); + + // If a given record does not have an MD5 (likely because the source item had no Md5) we can't really + // use MD5 matching from database record to destination case item, so we report the issue to the user + // and skip this record. + if(md5 == null && matchingMethod == AnnotationMatchingMethod.MD5) { + logMessage("Record for item named '%s' with GUID %s does not have an MD5 value",itemName,guid); + continue; + } Set items = null; - // Leverage our cache to minimize unnecessary searching for the same item or items repeatedly + // Obtain the relevant item or items depending on the matching method specified if(matchingMethod == AnnotationMatchingMethod.GUID) { - items = itemCache.get(guid); + items = nuixCase.searchUnsorted("guid:"+guid); } else if(matchingMethod == AnnotationMatchingMethod.MD5) { - items = itemCache.get(md5); + items = nuixCase.searchUnsorted("md5:"+md5); + } + + // Add all the items we found to our collection + tagGroupedItems.addAll(items); + + // If our collection has 5000 items or more in it now, lets tag those items and then + // clear the collection so we aren't holding on to all of the items at once. + if(tagGroupedItems.size() > 5000) { + logMessage(" Apply tag '%s' to 5000 items",tagName); + annotater.addTag(tagName, tagGroupedItems); + tagGroupedItems.clear(); } - // Apply tag to relevant items in the destination case - annotater.addTag(tagName, items); currentIndex++; } + + // If there are any items left in our collection that still need a tag applied, we check and + // tag them here. + if(tagGroupedItems.size() > 0) { + logMessage(" Apply tag '%s' to %s items",tagName,tagGroupedItems.size()); + annotater.addTag(tagName, tagGroupedItems); + tagGroupedItems.clear(); + } + } catch (SQLException e) { logger.error("Error retrieving ItemTag data from database", e); logMessage("Error retrieving ItemTag data from database: %s",e.getMessage()); } catch (IOException e) { logger.error("Error retrieving item from case", e); logMessage("Error retrieving item from case: ",e.getMessage()); - } catch (ExecutionException e) { - logger.error(e); } }); } } /*** - * Stores a particular markup set living in the provided Nuix case. + * Stores a particular markup set present in the provided Nuix case. * @param nuixCase The Nuix case containing the provided markup set. * @param markupSet The specific markup set to store. * @throws IOException Thrown most likely if there was an issue searching or retrieving printed pages of and item. @@ -454,15 +524,21 @@ public void applyMarkupsFromDatabaseToCase(Case nuixCase, boolean addToExistingM List bindData = new ArrayList(); + // Use our in memory cache of Name->ID to drive application of each markup set since it should + // already be in memory and synced to the state of the database. for(Map.Entry markupEntry : markupSetIdLookup.entrySet()) { // Support aborting if(abortWasRequested) { break; } String markupSetName = markupEntry.getKey(); long markupSetId = markupEntry.getValue(); + + // SQL to get description for this markup set String markupSetDescription = executeStringScalar("SELECT Description FROM MarkupSet WHERE ID = ?",markupSetId); + // SQL to get reason for this markup set String markupSetRedactionReason = executeStringScalar("SELECT RedactionReason FROM MarkupSet WHERE ID = ?",markupSetId); + // Always good to echo back to user the settings they are using logMessage("Applying markups to case from MarkupSet: %s",markupSetName); if(matchingMethod == AnnotationMatchingMethod.GUID) { logMessage("Matching DB entries to case items using: GUID"); @@ -470,10 +546,11 @@ public void applyMarkupsFromDatabaseToCase(Case nuixCase, boolean addToExistingM logMessage("Matching DB entries to case items using: MD5"); } - // We need to resolve the MarkupSet object, either by obtaining existing one in case or creating new one + // We need to resolve the MarkupSet object, either by obtaining an existing one in destination case or creating a new one. MarkupSet markupSet = null; if(existingMarkupSetLookup.containsKey(markupSetName)) { if(addToExistingMarkupSet) { + // We can just add more annotations the the existing markup set with the same name logMessage("Applying markups in destination case to existing markup set: %s",markupSetName); markupSet = existingMarkupSetLookup.get(markupSetName); } else { @@ -506,7 +583,7 @@ public void applyMarkupsFromDatabaseToCase(Case nuixCase, boolean addToExistingM // SQL for info needed to apply markups. Sorted by MD5/GUID/PageNumber so that we should get markups for the same item // one after another, making our cache defined below more efficiently leveraged. - String itemMarkupSql = "SELECT i.GUID,i.MD5,im.PageNumber,im.IsRedaction,im.X,im.Y,im.Width,im.Height FROM ItemMarkup AS im " + + String itemMarkupSql = "SELECT i.GUID,i.MD5,i.Name,im.PageNumber,im.IsRedaction,im.X,im.Y,im.Width,im.Height FROM ItemMarkup AS im " + "INNER JOIN Item AS i ON im.Item_ID = i.ID " + "WHERE im.MarkupSet_ID = ? " + "ORDER BY MD5,GUID,PageNumber"; @@ -559,14 +636,27 @@ public void accept(ResultSet rs) { if(abortWasRequested) { break; } fireProgressUpdated(currentIndex,totalItemMarkups); + + // GUID and MD5 are stored in database as byte arrays to save space, but we need them as hex strings + // for Nuix searching, so we need to convert them back to strings here. String guid = FormatUtility.bytesToHex(rs.getBytes(1)); String md5 = FormatUtility.bytesToHex(rs.getBytes(2)); - long pageNumber = rs.getLong(3); - boolean isRedaction = rs.getBoolean(4); - double x = rs.getDouble(5); - double y = rs.getDouble(6); - double width = rs.getDouble(7); - double height = rs.getDouble(8); + String itemName = rs.getString(3); + + // Get details needed to apply a markup to the relevant item + long pageNumber = rs.getLong(4); + boolean isRedaction = rs.getBoolean(5); + double x = rs.getDouble(6); + double y = rs.getDouble(7); + double width = rs.getDouble(8); + double height = rs.getDouble(9); + + // If our matching method is MD5, but the current record does not have an MD5 (likely because the originating item + // did not have an MD5, we let the user know and then skip this record. + if(md5 == null && matchingMethod == AnnotationMatchingMethod.MD5) { + logMessage("Record for item named '%s' with GUID %s does not have an MD5 value",itemName,guid); + continue; + } Set items = null; @@ -581,6 +671,18 @@ public void accept(ResultSet rs) { for(Item item : items) { MutablePrintedImage itemImage = item.getPrintedImage(); List pages = itemImage.getPages(); + + if(pages == null || pages.size() < 1) { + logMessage("Item named '%s' and GUID %s has no printed pages, generating now...",itemName,guid); + itemImage.generate(); + pages = itemImage.getPages(); + } + + if(pages.size() < pageNumber-1) { + logMessage("Item named '%s' and GUID %s does not have a page %s",itemName,guid,pageNumber); + continue; + } + MutablePrintedPage page = (MutablePrintedPage)pages.get((int) (pageNumber-1)); if(isRedaction) { page.createRedaction(targetMarkupSet, x, y, width, height); diff --git a/Java/src/main/java/com/nuix/superutilities/misc/FormatUtility.java b/Java/src/main/java/com/nuix/superutilities/misc/FormatUtility.java index cfeb2bc..615e2b9 100644 --- a/Java/src/main/java/com/nuix/superutilities/misc/FormatUtility.java +++ b/Java/src/main/java/com/nuix/superutilities/misc/FormatUtility.java @@ -43,6 +43,7 @@ public static FormatUtility getInstance(){ * @return Byte array equivalent. */ public static byte[] hexToBytes(String hex){ + if(hex == null) { return null; } String normalizedHex = hex.replace("-", ""); return DatatypeConverter.parseHexBinary(normalizedHex); } @@ -53,6 +54,7 @@ public static byte[] hexToBytes(String hex){ * @return A string representation of the byte array as hexadecimal. */ public static String bytesToHex(byte[] bytes){ + if(bytes == null) { return null; } return DatatypeConverter.printHexBinary(bytes); } diff --git a/Java/src/main/java/com/nuix/superutilities/query/QueryHelper.java b/Java/src/main/java/com/nuix/superutilities/query/QueryHelper.java index 1381fd0..e9416a0 100644 --- a/Java/src/main/java/com/nuix/superutilities/query/QueryHelper.java +++ b/Java/src/main/java/com/nuix/superutilities/query/QueryHelper.java @@ -134,15 +134,15 @@ public static String markupSetQuery(MarkupSet markupSet) { } public static String escapeForSearch(String value) { - String result = value.replaceAll("\\", "\\\\"); + String result = value.replaceAll("\\\\", "\\\\\\\\"); result = result.replaceAll("\\?", "\\\\?"); result = result.replaceAll("\\*", "\\\\*"); result = result.replaceAll("\"", "\\\""); result = result.replaceAll("\u201C", "\\\u201C"); result = result.replaceAll("\u201D", "\\\u201D"); result = result.replaceAll("'", "\\'"); - result = result.replaceAll("{", "\\{"); - result = result.replaceAll("}", "\\}"); + result = result.replaceAll("\\{", "\\\\{"); + result = result.replaceAll("\\}", "\\\\}"); return result; }