Skip to content

Commit

Permalink
Further annotation repo functionality
Browse files Browse the repository at this point in the history
- Message logging and progress reporting
- Support for aborting
- Support for choosing match by GUID or MD5
- During import, using a loading cache for item retrieval so that if multiple markups in a series are all added to the same item, the search for the item occurs just the first time and then the cache offers up that item on subsequent requests.
  • Loading branch information
JuicyDragon committed Jun 17, 2019
1 parent 92805f8 commit ac2f380
Show file tree
Hide file tree
Showing 3 changed files with 160 additions and 12 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
package com.nuix.superutilities.annotations;

public enum AnnotationMatchingMethod {
GUID,
MD5
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,17 @@
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ExecutionException;
import java.util.function.BiConsumer;
import java.util.function.Consumer;

import com.google.common.cache.CacheBuilder;
import com.google.common.cache.CacheLoader;
import com.google.common.cache.LoadingCache;
import com.google.common.collect.HashBiMap;
import com.nuix.superutilities.misc.FormatUtility;
import com.nuix.superutilities.misc.SQLiteBacked;
Expand All @@ -27,6 +33,46 @@

public class AnnotationRepository extends SQLiteBacked {
private static Logger logger = Logger.getLogger(AnnotationRepository.class);

private boolean abortWasRequested = false;
private Consumer<String> messageLoggedCallback = null;

/***
* Allows you to provide a callback which will be invoked when this instance emits a log message.
* @param callback The callback to receive logged messages
*/
public void whenMessageLogged(Consumer<String> callback) {
messageLoggedCallback = callback;
}

private void logMessage(String message) {
if(messageLoggedCallback != null) {
messageLoggedCallback.accept(message);
} else {
logger.info(message);
}
}

private void logMessage(String format, Object... params) {
logMessage(String.format(format, params));
}

private BiConsumer<Integer,Integer> progressUpdatedCallback = null;

/***
* Allows you to provide a callback which will be invoked when this instance emits a progress update.
* @param callback The callback to invoke when progress is updated
*/
public void whenProgressUpdated(BiConsumer<Integer,Integer> callback) {
progressUpdatedCallback = callback;
}

private void fireProgressUpdated(int current, int total) {
if(progressUpdatedCallback != null) {
progressUpdatedCallback.accept(current,total);
}
}

private HashBiMap<String,Long> itemGuidIdLookup = HashBiMap.create();
private HashBiMap<String,Long> markupSetIdLookup = HashBiMap.create();

Expand Down Expand Up @@ -58,6 +104,7 @@ private void createTables() throws SQLException {
}

private void rebuildXrefs() throws SQLException {
logMessage("Building GUID lookup from any existing entries in DB...");
itemGuidIdLookup.clear();
String sql = "SELECT GUID,ID FROM Item";
executeQuery(sql, null, new Consumer<ResultSet>() {
Expand All @@ -75,6 +122,7 @@ public void accept(ResultSet rs) {
}
});

logMessage("Building MarkupSet name lookup from any existing entries in DB...");
markupSetIdLookup.clear();
sql = "SELECT Name,ID FROM MarkupSet";
executeQuery(sql,null, new Consumer<ResultSet>() {
Expand All @@ -94,19 +142,28 @@ public void accept(ResultSet rs) {
}

public void storeAllMarkupSets(Case nuixCase) throws IOException, SQLException {
abortWasRequested = false;
List<MarkupSet> markupSets = nuixCase.getMarkupSets();
for(MarkupSet markupSet : markupSets) {
if(abortWasRequested) { break; }
storeMarkupSet(nuixCase, markupSet);
}
}

public void storeMarkupSet(Case nuixCase, MarkupSet markupSet) throws IOException, SQLException {
abortWasRequested = false;
logMessage("Storing markups from MarkupSet: "+markupSet.getName());
long itemMarkupCountBefore = getItemMarkupCount();
logMessage("Item Markup Count Before: %s", itemMarkupCountBefore);
String insertItemMarkup = "INSERT INTO ItemMarkup (Item_ID,MarkupSet_ID,PageNumber,IsRedaction,X,Y,Width,Height) VALUES (?,?,?,?,?,?,?,?)";
String itemQuery = QueryHelper.markupSetQuery(markupSet);

long markupSetId = getMarkupSetId(markupSet);
Set<Item> markupSetItems = nuixCase.searchUnsorted(itemQuery);
int currentItemIndex = 1;
for(Item item : markupSetItems) {
if(abortWasRequested) { break; }
fireProgressUpdated(currentItemIndex, markupSetItems.size());
long itemId = getItemId(item);
MutablePrintedImage itemImage = item.getPrintedImage();
List<? extends PrintedPage> pages = itemImage.getPages();
Expand All @@ -125,7 +182,12 @@ public void storeMarkupSet(Case nuixCase, MarkupSet markupSet) throws IOExceptio
pageMarkup.getHeight());
}
}
currentItemIndex++;
}

long itemMarkupCountAfter = getItemMarkupCount();
logMessage("Item Markup Count After: %s",itemMarkupCountAfter);
logMessage("Difference: +%s",(itemMarkupCountAfter - itemMarkupCountBefore));
}

public long getItemId(Item item) throws SQLException {
Expand Down Expand Up @@ -164,7 +226,8 @@ public long getMarkupSetId(MarkupSet markupSet) throws SQLException {
}
}

public void applyMarkupsFromDatabaseToCase(Case nuixCase, boolean addToExisting) throws SQLException {
public void applyMarkupsFromDatabaseToCase(Case nuixCase, boolean addToExistingMarkupSet, AnnotationMatchingMethod matchingMethod) throws SQLException {
abortWasRequested = false;
Map<String,MarkupSet> markupSetLookup = new HashMap<String,MarkupSet>();
for(MarkupSet existingMarkupSet : nuixCase.getMarkupSets()) {
markupSetLookup.put(existingMarkupSet.getName(), existingMarkupSet);
Expand All @@ -173,15 +236,24 @@ public void applyMarkupsFromDatabaseToCase(Case nuixCase, boolean addToExisting)
List<Object> bindData = new ArrayList<Object>();

for(Map.Entry<String, Long> markupEntry : markupSetIdLookup.entrySet()) {
if(abortWasRequested) { break; }
String markupSetName = markupEntry.getKey();
long markupSetId = markupEntry.getValue();
String markupSetDescription = executeStringScalar("SELECT Description FROM MarkupSet WHERE ID = ?",markupSetId);
String markupSetRedactionReason = executeStringScalar("SELECT RedactionReason FROM MarkupSet WHERE ID = ?",markupSetId);

logMessage("Applying markups to case from MarkupSet: %s",markupSetName);
if(matchingMethod == AnnotationMatchingMethod.GUID) {
logMessage("Matching DB entries to case items using: GUID");
} else if(matchingMethod == AnnotationMatchingMethod.MD5) {
logMessage("Matching DB entries to case items using: MD5");
}

// We need to resolve the MarkupSet object, either by obtaining existing one in case or creating new one
MarkupSet markupSet = null;
if(markupSetLookup.containsKey(markupSetName)) {
if(addToExisting) {
if(addToExistingMarkupSet) {
logMessage("Applying markups in destination case to existing markup set: %s",markupSetName);
markupSet = markupSetLookup.get(markupSetName);
} else {
// When addToExisting is false and we have a name collision, we will attempt to find a usable name
Expand All @@ -191,12 +263,16 @@ public void applyMarkupsFromDatabaseToCase(Case nuixCase, boolean addToExisting)
nameSequence++;
targetName = markupSetName+"_"+nameSequence;
}

logMessage("Applying markups in DB to new markup set: %s",targetName);

Map<String,Object> markupSetSettings = new HashMap<String,Object>();
markupSetSettings.put("description", markupSetDescription);
markupSetSettings.put("redactionReason", markupSetRedactionReason);
markupSet = nuixCase.createMarkupSet(targetName, markupSetSettings);
}
} else {
logMessage("Applying markups in DB to new markup set: %s",markupSetName);
// Markup set does not appear to already exist, so lets create it
Map<String,Object> markupSetSettings = new HashMap<String,Object>();
markupSetSettings.put("description", markupSetDescription);
Expand All @@ -207,27 +283,66 @@ public void applyMarkupsFromDatabaseToCase(Case nuixCase, boolean addToExisting)
final MarkupSet targetMarkupSet = markupSet;

// Now that we have a MarkupSet, we need to get ItemMarkup records from DB
String itemMarkupSql = "SELECT i.GUID,im.PageNumber,im.IsRedaction,im.X,im.Y,im.Width,im.Height FROM ItemMarkup AS im " +
String itemMarkupSql = "SELECT i.GUID,i.MD5,im.PageNumber,im.IsRedaction,im.X,im.Y,im.Width,im.Height FROM ItemMarkup AS im " +
"INNER JOIN Item AS i ON im.Item_ID = i.ID " +
"WHERE im.MarkupSet_ID = ? " +
"ORDER BY GUID, PageNumber";
"ORDER BY MD5,GUID,PageNumber";
String itemMarkupTotalCountSql = "SELECT COUNT(*) FROM ItemMarkup AS im " +
"INNER JOIN Item AS i ON im.Item_ID = i.ID " +
"WHERE im.MarkupSet_ID = ? " +
"ORDER BY MD5,GUID,PageNumber";

bindData.clear();
bindData.add(markupSetId);

int totalItemMarkups = executeLongScalar(itemMarkupTotalCountSql,bindData).intValue();
LoadingCache<String,Set<Item>> itemCache = CacheBuilder.newBuilder()
.maximumSize(1000)
.build(new CacheLoader<String,Set<Item>>(){

@Override
public Set<Item> load(String guidOrMd5) throws Exception {
Set<Item> items = new HashSet<Item>();
if(matchingMethod == AnnotationMatchingMethod.GUID) {
items = nuixCase.searchUnsorted("guid:"+guidOrMd5);
if(items.size() < 1) {
logMessage("No items in case found to match GUID: %s",guidOrMd5);
}
} else if(matchingMethod == AnnotationMatchingMethod.MD5) {
items = nuixCase.searchUnsorted("md5:"+guidOrMd5);
if(items.size() < 1) {
logMessage("No items in case found to match MD5: %s",guidOrMd5);
}
}
return items;
}
});

executeQuery(itemMarkupSql,bindData,new Consumer<ResultSet>() {
int currentIndex = 1;
@Override
public void accept(ResultSet rs) {
try {
while(rs.next()) {
if(abortWasRequested) { break; }
fireProgressUpdated(currentIndex,totalItemMarkups);
String guid = FormatUtility.bytesToHex(rs.getBytes(1));
long pageNumber = rs.getLong(2);
boolean isRedaction = rs.getBoolean(3);
double x = rs.getDouble(4);
double y = rs.getDouble(5);
double width = rs.getDouble(6);
double height = rs.getDouble(7);
String md5 = FormatUtility.bytesToHex(rs.getBytes(2));
long pageNumber = rs.getLong(3);
boolean isRedaction = rs.getBoolean(4);
double x = rs.getDouble(5);
double y = rs.getDouble(6);
double width = rs.getDouble(7);
double height = rs.getDouble(8);

Set<Item> items = nuixCase.searchUnsorted("guid:"+guid);
Set<Item> items = null;

if(matchingMethod == AnnotationMatchingMethod.GUID) {
items = itemCache.get(guid);
} else if(matchingMethod == AnnotationMatchingMethod.MD5) {
items = itemCache.get(md5);
}

for(Item item : items) {
MutablePrintedImage itemImage = item.getPrintedImage();
List<? extends PrintedPage> pages = itemImage.getPages();
Expand All @@ -238,14 +353,28 @@ public void accept(ResultSet rs) {
page.createHighlight(targetMarkupSet, x, y, width, height);
}
}
currentIndex++;
}
} catch (SQLException exc) {
logger.error("Error retrieving ItemMarkup data from database", exc);
logMessage("Error retrieving ItemMarkup data from database: %s",exc.getMessage());
} catch (IOException exc2) {
logger.error("Error retrieving item from case", exc2);
logMessage("Error retrieving item from case: ",exc2.getMessage());
} catch (ExecutionException e) {
logger.error(e);
}
}
});
}
}

public long getItemMarkupCount() throws SQLException {
return executeLongScalar("SELECT COUNT(*) FROM ItemMarkup");
}

public void abort() {
logMessage("Signalling abort...");
abortWasRequested = true;
}
}
15 changes: 14 additions & 1 deletion RubyTests/Test_AnnotationRepository.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,26 @@
java_import com.nuix.superutilities.SuperUtilities
$su = SuperUtilities.init($utilities,NUIX_VERSION)
java_import com.nuix.superutilities.annotations.AnnotationRepository
java_import com.nuix.superutilities.annotations.AnnotationMatchingMethod

$current_case = $utilities.getCaseFactory.open('D:\cases\FakeData_7.8')

db_file = "D:\\Temp\\Annotations_#{Time.now.to_i}.db"
puts "DB File: #{db_file}"

last_progress = Time.now
repo = AnnotationRepository.new(db_file)
repo.whenMessageLogged do |message|
puts message
end
repo.whenProgressUpdated do |current,total|
if (Time.now - last_progress > 1) || current == total
puts "#{current}/#{total}"
last_progress = Time.now
end
end
repo.storeAllMarkupSets($current_case)
repo.applyMarkupsFromDatabaseToCase($current_case,false)
repo.applyMarkupsFromDatabaseToCase($current_case,false,AnnotationMatchingMethod::GUID)
repo.close

$current_case.close

0 comments on commit ac2f380

Please sign in to comment.