Skip to content

Commit

Permalink
JN-646 Analysis-friendly export (#603)
Browse files Browse the repository at this point in the history
  • Loading branch information
devonbush authored Oct 25, 2023
1 parent 2a7c167 commit 03c8073
Show file tree
Hide file tree
Showing 12 changed files with 312 additions and 90 deletions.
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
package bio.terra.pearl.core.model.survey;

public record QuestionChoice(String stableId, String text) { }
import lombok.Builder;

public record QuestionChoice(String stableId, String text) {
@Builder public QuestionChoice {}
}
Original file line number Diff line number Diff line change
Expand Up @@ -223,8 +223,8 @@ public Set<TdrColumn> generateDatasetSchema(UUID studyEnvironmentId) {

TsvExporter tsvExporter = new TsvExporter(moduleExportInfos, enrolleeMaps);

tsvExporter.applyToEveryColumn((moduleExportInfo, itemExportInfo, isOtherDescription) -> tdrColumns.add(new TdrColumn(
DataRepoExportUtils.juniperToDataRepoColumnName(moduleExportInfo.getFormatter().getColumnKey(moduleExportInfo, itemExportInfo, isOtherDescription, null)),
tsvExporter.applyToEveryColumn((moduleExportInfo, itemExportInfo, choice, isOtherDescription) -> tdrColumns.add(new TdrColumn(
DataRepoExportUtils.juniperToDataRepoColumnName(moduleExportInfo.getFormatter().getColumnKey(moduleExportInfo, itemExportInfo, choice, isOtherDescription)),
DataRepoExportUtils.juniperToDataRepoColumnType(itemExportInfo.getDataType())
)
));
Expand Down
Original file line number Diff line number Diff line change
@@ -1,47 +1,59 @@
package bio.terra.pearl.core.service.export;

import bio.terra.pearl.core.model.survey.QuestionChoice;
import bio.terra.pearl.core.service.export.formatters.SurveyFormatter;
import bio.terra.pearl.core.service.export.instance.ItemExportInfo;
import bio.terra.pearl.core.service.export.instance.ModuleExportInfo;
import java.io.IOException;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

public abstract class BaseExporter {

protected final List<ModuleExportInfo> moduleExportInfos;
protected final List<Map<String, String>> enrolleeMaps;
/**
* map of column keys to the value that should be exported if the value for an enrollee is nullish.
* This saves us from having to include "0" for every option possibility in multiple choice questions
* exported in the analysis-friendly format, which for some Pepper datasets reduced the memory taken by
* the enrolleeMaps by >50%
*/
protected final Map<String, String> columnEmptyValueMap;
public final String DEFAULT_EMPTY_STRING_VALUE = "";

public BaseExporter(List<ModuleExportInfo> moduleExportInfos, List<Map<String, String>> enrolleeMaps) {
this.moduleExportInfos = moduleExportInfos;
this.enrolleeMaps = enrolleeMaps;
this.columnEmptyValueMap = makeEmptyValueMap();
}

public abstract void export(OutputStream os) throws IOException;

protected List<String> getColumnKeys() {
List<String> columnKeys = new ArrayList<>();
applyToEveryColumn((moduleExportInfo, itemExportInfo, isOtherDescription) -> {
columnKeys.add(moduleExportInfo.getFormatter().getColumnKey(moduleExportInfo, itemExportInfo, isOtherDescription, null));
applyToEveryColumn((moduleExportInfo, itemExportInfo, isOtherDescription, choice) -> {
columnKeys.add(moduleExportInfo.getFormatter().getColumnKey(moduleExportInfo, itemExportInfo, isOtherDescription, choice));
});
return columnKeys;
}

/** gets the header row - uses getColumnHeader from ExportFormatter */
protected List<String> getHeaderRow() {
List<String> headers = new ArrayList<>();
applyToEveryColumn((moduleExportInfo, itemExportInfo, isOtherDescription) -> {
headers.add(moduleExportInfo.getFormatter().getColumnHeader(moduleExportInfo, itemExportInfo, isOtherDescription, null));
applyToEveryColumn((moduleExportInfo, itemExportInfo, isOtherDescription, choice) -> {
headers.add(moduleExportInfo.getFormatter().getColumnHeader(moduleExportInfo, itemExportInfo, isOtherDescription, choice));
});
return headers;
}

/** gets the subheader row -- uses getColumnSubHeader from ExportFormatter */
protected List<String> getSubHeaderRow() {
List<String> headers = new ArrayList<>();
applyToEveryColumn((moduleExportInfo, itemExportInfo, isOtherDescription) -> {
headers.add(moduleExportInfo.getFormatter().getColumnSubHeader(moduleExportInfo, itemExportInfo, isOtherDescription, null));
applyToEveryColumn((moduleExportInfo, itemExportInfo, isOtherDescription, choice) -> {
headers.add(moduleExportInfo.getFormatter().getColumnSubHeader(moduleExportInfo, itemExportInfo, isOtherDescription, choice));
});
return headers;
}
Expand All @@ -57,31 +69,57 @@ protected List<String> getRowValues(Map<String, String> enrolleeMap, List<String
List<String> rowValues = new ArrayList(headerRowValues.size());
for (String header : headerRowValues) {
String value = enrolleeMap.get(header);
rowValues.add(sanitizeValue(value));
rowValues.add(sanitizeValue(value, columnEmptyValueMap.getOrDefault(header, DEFAULT_EMPTY_STRING_VALUE)));
}
return rowValues;
}

/** class for operating iteratively over columns (variables) of an export */
public interface ColumnProcessor {
void apply(ModuleExportInfo moduleExportInfo,
ItemExportInfo itemExportInfo, boolean isOtherDescription);
ItemExportInfo itemExportInfo, boolean isOtherDescription, QuestionChoice choice);
}

public void applyToEveryColumn(ColumnProcessor columnProcessor) {
for (ModuleExportInfo moduleExportInfo : moduleExportInfos) {
for (ItemExportInfo itemExportInfo : moduleExportInfo.getItems()) {
columnProcessor.apply(moduleExportInfo, itemExportInfo, false);
if (itemExportInfo.isSplitOptionsIntoColumns()) {
// add a column for each option
for (QuestionChoice choice : itemExportInfo.getChoices()) {
columnProcessor.apply(moduleExportInfo, itemExportInfo, false, choice);
}
} else {
columnProcessor.apply(moduleExportInfo, itemExportInfo,false, null);
}
if (itemExportInfo.isHasOtherDescription()) {
// for questions with free-text other, we add an additional column to capture that value
columnProcessor.apply(moduleExportInfo, itemExportInfo, true);
columnProcessor.apply(moduleExportInfo, itemExportInfo, true, null);
}
}
}
}

protected String sanitizeValue(String value) {
// default is no-op
protected Map<String, String> makeEmptyValueMap() {
Map<String, String> emptyValueMap = new HashMap<>();
applyToEveryColumn((moduleExportInfo, itemExportInfo, isOtherDescription, choice) -> {
String columnKey = moduleExportInfo.getFormatter().getColumnKey(moduleExportInfo, itemExportInfo, isOtherDescription, choice);
if (itemExportInfo.isSplitOptionsIntoColumns()) {
emptyValueMap.put(columnKey, SurveyFormatter.SPLIT_OPTION_UNSELECTED_VALUE);
} else {
emptyValueMap.put(columnKey, DEFAULT_EMPTY_STRING_VALUE);
}
});
return emptyValueMap;
}

/**
* Take a string value and sanitize it for export. E.g. For a TSV exporter, we need to escape double quotes.
*/
protected String sanitizeValue(String value, String nullValueString) {
// default is to just replace nulls with the nullValueString
if (value == null) {
value = nullValueString;
}
return value;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -58,16 +58,6 @@ protected void writeRowToSheet(List<String> rowValues, int rowNum) {
});
}

/**
* we don't need to worry about escaping any characters, we just need to replace null with empty string
*/
protected String sanitizeValue(String value) {
if (value == null) {
value = StringUtils.EMPTY;
}
return value;
}

protected String getSheetName() {
return SHEET_NAME;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,10 +48,8 @@ protected String getRowString(List<String> rowValues) {
* @param value the value to sanitize
* @return the sanitized value, suitable for including in a tsv
*/
protected String sanitizeValue(String value) {
if (value == null) {
value = StringUtils.EMPTY;
}
protected String sanitizeValue(String value, String nullValueString) {
value = super.sanitizeValue(value, nullValueString);
// first replace double quotes with single '
String sanitizedValue = value.replace("\"", "'");
// then quote the whole string if needed
Expand Down
Loading

0 comments on commit 03c8073

Please sign in to comment.