Skip to content

Commit

Permalink
Merge pull request #16 from Nuix/continued_memory_refinements
Browse files Browse the repository at this point in the history
More tweaks aimed at curbing RegexScanner memory usage
  • Loading branch information
JuicyDragon authored Apr 10, 2020
2 parents 64cf2a9 + fdb488e commit bbeb4f9
Show file tree
Hide file tree
Showing 3 changed files with 61 additions and 16 deletions.
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
package com.nuix.superutilities.regex;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.stream.Collectors;

import nuix.Case;
import nuix.Item;

/***
Expand All @@ -12,23 +14,28 @@
*
*/
public class ItemRegexMatchCollection {
private Item item = null;
private String itemGuid = null;
private List<RegexMatch> matchData = new ArrayList<RegexMatch>();

/***
* Creates a new empty instance against the specified item.
* @param item The item to associated.
*/
public ItemRegexMatchCollection(Item item){
this.item = item;
this.itemGuid = item.getGuid();
}

/***
* Gets the associated item.
* @return The associated item.
*/
public Item getItem(){
return item;
public Item getItem(Case nuixCase){
try {
return nuixCase.search(String.format("guid:%s", itemGuid)).get(0);
} catch (IOException e) {
e.printStackTrace();
return null;
}
}

/***
Expand Down Expand Up @@ -66,7 +73,8 @@ public List<RegexMatch> getPropertyMatches(){
* @param matchEnd Offset in source text where this match ends
*/
public void addMatch(PatternInfo patternInfo, String location, boolean isContentMatch, String value, String valueContext, int matchStart, int matchEnd){
matchData.add(new RegexMatch(patternInfo,location,isContentMatch,value,valueContext,matchStart,matchEnd));
// Intern location in case there is a large amount of duplication of a small set of actual values
matchData.add(new RegexMatch(patternInfo,location.intern(),isContentMatch,value,valueContext,matchStart,matchEnd));
}

/***
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
package com.nuix.superutilities.regex;

import java.io.IOException;

import nuix.Case;
import nuix.Item;

/***
Expand All @@ -8,7 +11,7 @@
*
*/
public class RegexScanError {
private Item item = null;
private String itemGuid = null;
private PatternInfo patternInfo = null;
private String location = null;
private Exception exception = null;
Expand All @@ -21,7 +24,7 @@ public class RegexScanError {
* @param exception The exception which was thrown
*/
public RegexScanError(Item item, PatternInfo patternInfo, String location, Exception exception){
this.item = item;
this.itemGuid = item.getGuid();
this.patternInfo = patternInfo;
this.location = location;
this.exception = exception;
Expand All @@ -31,8 +34,17 @@ public RegexScanError(Item item, PatternInfo patternInfo, String location, Excep
* Gets the associated item
* @return The associated item
*/
public Item getItem() {
return item;
public Item getItem(Case nuixCase) {
try {
return nuixCase.search(String.format("guid:%s", itemGuid)).get(0);
} catch (IOException e) {
e.printStackTrace();
return null;
}
}

public String getItemGuid() {
return this.itemGuid;
}

/***
Expand Down
39 changes: 32 additions & 7 deletions Java/src/main/java/com/nuix/superutilities/regex/RegexScanner.java
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import com.nuix.superutilities.misc.FormatUtility;

import nuix.Item;
import nuix.ItemCustomMetadataMap;

/***
* Class for scanning a series of items with a series of regular expressions.
Expand Down Expand Up @@ -107,7 +108,7 @@ protected void fireScanError(RegexScanError error){
if(error.getLocation() != null){
errorMessage.add("\tLocation: "+error.getLocation());
}
errorMessage.add("\tItem GUID: "+error.getItem().getGuid());
errorMessage.add("\tItem GUID: "+error.getItemGuid());
logger.error(errorMessage.toString());
logger.error(error.getException());
}
Expand Down Expand Up @@ -444,12 +445,25 @@ protected ItemRegexMatchCollection scanItem(Item item) {
* @return Map of "stringified" metadata properties for the specified item
*/
public static Map<String,String> getStringProperties(Item item, Set<String> specificProperties){
// Note below String.intern use on property names which likely is highly repetitive

Map<String,String> result = new HashMap<String,String>();
for (Entry<String, Object> entry : item.getProperties().entrySet()) {
if(specificProperties == null || specificProperties.contains(entry.getKey())){
result.put(entry.getKey(), FormatUtility.getInstance().convertToString(entry.getValue()));

if(specificProperties == null | specificProperties.size() == 0) {
// We're scanning all the properties
for (Entry<String, Object> entry : item.getProperties().entrySet()) {
result.put(entry.getKey().intern(), FormatUtility.getInstance().convertToString(entry.getValue()));
}
} else {
// We're just scanning specific properties
Map<String,Object> itemProperties = item.getProperties();
for(String specificProperty : specificProperties) {
if(itemProperties.containsKey(specificProperty)) {
result.put(specificProperty.intern(), FormatUtility.getInstance().convertToString(itemProperties.get(specificProperty)));
}
}
}

return result;
}

Expand All @@ -462,11 +476,22 @@ public static Map<String,String> getStringProperties(Item item, Set<String> spec
*/
public static Map<String,String> getStringCustomMetadata(Item item, Set<String> specificFields){
Map<String,String> result = new HashMap<String,String>();
for (Entry<String, Object> entry : item.getCustomMetadata().entrySet()) {
if(specificFields == null || specificFields.contains(entry.getKey())){
result.put(entry.getKey(), FormatUtility.getInstance().convertToString(entry.getValue()));

if(specificFields == null || specificFields.size() == 0) {
// We're scanning all the custom metadata fields
for (Entry<String, Object> entry : item.getCustomMetadata().entrySet()) {
result.put(entry.getKey().intern(), FormatUtility.getInstance().convertToString(entry.getValue()));
}
} else {
ItemCustomMetadataMap itemCustomMetadata = item.getCustomMetadata();
// We're scanning specific custom metadata fields
for(String specificField : specificFields) {
if(itemCustomMetadata.containsKey(specificField)) {
result.put(specificField.intern(),FormatUtility.getInstance().convertToString(itemCustomMetadata.get(specificField)));
}
}
}

return result;
}

Expand Down

0 comments on commit bbeb4f9

Please sign in to comment.