Skip to content

Commit

Permalink
Merge pull request #14 from Nuix/profile_digester
Browse files Browse the repository at this point in the history
Update ProfileDigester.java
  • Loading branch information
JuicyDragon authored Mar 24, 2020
2 parents b354a37 + 6a1e23c commit 4bc2c46
Showing 1 changed file with 26 additions and 8 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import java.nio.charset.Charset;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.Collection;
import java.util.HashMap;
import java.util.Map;
Expand Down Expand Up @@ -34,6 +35,8 @@ public class ProfileDigester {
private Consumer<String> infoMessageCallback = null;
private BiConsumer<String,Item> errorCallback = null;

private MessageDigest md5Digester = null;

private void fireProgressUpdate(int current, int total) {
if(progressCallback != null) {
progressCallback.accept(current, total);
Expand Down Expand Up @@ -81,8 +84,17 @@ public void whenErrorLogged(BiConsumer<String,Item> callback) {
errorCallback = callback;
}

public ProfileDigester() {}
public ProfileDigester(MetadataProfile metadataProfile) { profile = metadataProfile; }
public ProfileDigester() {
try {
md5Digester = MessageDigest.getInstance("md5");
} catch (NoSuchAlgorithmException e) {
e.printStackTrace();
}
}
public ProfileDigester(MetadataProfile metadataProfile) {
this();
profile = metadataProfile;
}

/***
* Adds items to an item set using "Scripted" deduplication, providing a MD5 hash generated from the concatenation of the values yielded
Expand Down Expand Up @@ -131,7 +143,7 @@ public ItemSet addItemsToItemSet(Case nuixCase, String itemSetName, String dedup
if(includeItemText) {
description = String.format("Generated using MD5 of profile '%s' field values concatenation and Item Text", profile.getName());
} else {
description = String.format("Generated using MD5 of profile '%s' field values concatenationt", profile.getName());
description = String.format("Generated using MD5 of profile '%s' field values concatenation", profile.getName());
}

Map<String,Object> itemSetSettings = new HashMap<String,Object>();
Expand Down Expand Up @@ -180,28 +192,34 @@ public void itemProcessed(ItemEventInfo info) {
/***
* Generates MD5 digest byte array for a given item. Digest is generated by digesting concatenation of values yielded by the
* metadata profile associated with this instance for the given item and optionally including the item's content text.
* Note that method is synchronized due to:
* - Reuse of MD5 digester
* - Some metadata profile fields don't seem to play nice when called concurrently
* @param item The item to generate a custom MD5 digest for.
* @return Byte array representation of the MD5 digest
* @throws Exception Most likely if metadata profile has not yet been set for this instance.
*/
public byte[] generateMd5Bytes(Item item) throws Exception {
public synchronized byte[] generateMd5Bytes(Item item) throws Exception {
if(profile == null) {
throw new IllegalArgumentException("profile cannot be null, please provide a profile by calling setProfile(MetadataProfile profile) before calling this method");
}

MessageDigest md = MessageDigest.getInstance("MD5");
for(MetadataItem field : profile.getMetadata()) {
String fieldValue = field.evaluate(item);
if(fieldValue != null) {
md.update(fieldValue.getBytes());
md5Digester.update(fieldValue.getBytes());
}
}

if(includeItemText) {
md.update(item.getTextObject().toString().getBytes(Charset.forName("utf8")));
md5Digester.update(item.getTextObject().toString().getBytes(Charset.forName("utf8")));
}

return md.digest();
// Capture our result and then cleanup for the next call
byte[] result = md5Digester.digest();
md5Digester.reset();

return result;
}

/***
Expand Down

0 comments on commit 4bc2c46

Please sign in to comment.