Skip to content

Commit

Permalink
Refactor harvester/indexer interface and remove solrj binding
Browse files Browse the repository at this point in the history
  • Loading branch information
wwelling committed Aug 9, 2023
1 parent 154df64 commit 5fb28f6
Show file tree
Hide file tree
Showing 20 changed files with 170 additions and 573 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ public class DiscoveryConstants {

public static final String MOD_TIME = "modTime";

public static final String SYNC_IDS = "syncIds";

public static final String QUERY_DELIMETER = ":";

public static final String DEFAULT_QUERY = WILDCARD + QUERY_DELIMETER + WILDCARD;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
package edu.tamu.scholars.middleware.discovery.component;

import edu.tamu.scholars.middleware.discovery.model.AbstractIndexDocument;
import edu.tamu.scholars.middleware.discovery.model.Individual;
import reactor.core.publisher.Flux;

public interface Harvester {

public Flux<AbstractIndexDocument> harvest();
public Flux<Individual> harvest();

public AbstractIndexDocument harvest(String subject);
public Individual harvest(String subject);

public Class<AbstractIndexDocument> type();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,15 @@
import java.util.Collection;

import edu.tamu.scholars.middleware.discovery.model.AbstractIndexDocument;
import edu.tamu.scholars.middleware.discovery.model.Individual;

public interface Indexer {

public void init();

public void index(Collection<AbstractIndexDocument> documents);
public void index(Collection<Individual> documents);

public void index(AbstractIndexDocument document);
public void index(Individual document);

public void optimize();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
import edu.tamu.scholars.middleware.discovery.annotation.FieldType;
import edu.tamu.scholars.middleware.discovery.component.Harvester;
import edu.tamu.scholars.middleware.discovery.model.AbstractIndexDocument;
import edu.tamu.scholars.middleware.discovery.model.Individual;
import edu.tamu.scholars.middleware.service.TemplateService;
import edu.tamu.scholars.middleware.service.Triplestore;
import reactor.core.publisher.Flux;
Expand Down Expand Up @@ -66,22 +67,21 @@ public TriplestoreHarvester(Class<AbstractIndexDocument> type) {
this.indexedFields = FieldUtils.getFieldsListWithAnnotation(type, FieldType.class);
}

public Flux<AbstractIndexDocument> harvest() {
public Flux<Individual> harvest() {
CollectionSource source = type.getAnnotation(CollectionSource.class);
String query = templateService.templateSparql(COLLECTION_SPARQL_TEMPLATE, source.predicate());
if (logger.isDebugEnabled()) {
logger.debug(String.format("%s:\n%s", COLLECTION_SPARQL_TEMPLATE, query));
}
logger.debug(String.format("%s:\n%s", COLLECTION_SPARQL_TEMPLATE, query));
QueryExecution queryExecution = triplestore.createQueryExecution(query);
Iterator<Triple> tripleIterator = queryExecution.execConstructTriples();
Iterable<Triple> triples = () -> tripleIterator;

return Flux.fromIterable(triples)
.map(this::subject)
.map(this::harvest)
.doFinally(onFinally -> queryExecution.close());
}

public AbstractIndexDocument harvest(String subject) {
public Individual harvest(String subject) {
try {
return createDocument(subject);
} catch (Exception e) {
Expand All @@ -102,22 +102,31 @@ private String subject(Triple triple) {
return triple.getSubject().toString();
}

private AbstractIndexDocument createDocument(String subject) throws InstantiationException, IllegalAccessException, IllegalArgumentException, InvocationTargetException, NoSuchMethodException, SecurityException {
AbstractIndexDocument document = construct();
Field field = FieldUtils.getField(type, ID, true);
field.set(document, parse(subject));
lookupProperties(document, subject);
lookupSyncIds(document);
return document;
private Individual createDocument(String subject) throws InstantiationException, IllegalAccessException, IllegalArgumentException, InvocationTargetException, NoSuchMethodException, SecurityException {
Individual individual = new Individual();
individual.setId(parse(subject));
individual.setClazz(name());

lookupProperties(individual, subject);
lookupSyncIds(individual);
return individual;
}

private void lookupProperties(AbstractIndexDocument document, String subject) {
private void lookupProperties(Individual individual, String subject) {
propertySourceTypeOps.parallelStream().forEach(typeOp -> {
try {
FieldSource source = typeOp.getPropertySource();
Model model = queryForModel(source, subject);
List<Object> values = lookupProperty(typeOp, source, model);
populate(document, typeOp.getField(), values);

if (values.isEmpty()) {
if (logger.isDebugEnabled()) {
logger.debug(String.format("Could not find values for %s", typeOp.getField().getName()));
}
} else {
individual.getContent().put(typeOp.getField().getName(), values);
}

} catch (Exception e) {
logger.error(String.format("Unable to populate document %s: %s", name(), parse(subject)));
logger.error(String.format("Error: %s", e.getMessage()));
Expand All @@ -130,9 +139,7 @@ private void lookupProperties(AbstractIndexDocument document, String subject) {

private Model queryForModel(FieldSource source, String subject) {
String query = templateService.templateSparql(source.template(), subject);
if (logger.isDebugEnabled()) {
logger.debug(String.format("%s:\n%s", source.template(), query));
}
logger.debug(String.format("%s:\n%s", source.template(), query));
try (QueryExecution qe = triplestore.createQueryExecution(query)) {
Model model = qe.execConstruct();
if (logger.isDebugEnabled()) {
Expand Down Expand Up @@ -187,44 +194,22 @@ private List<Object> queryForProperty(TypeOp typeOp, FieldSource source, Model m
return values;
}

private void populate(AbstractIndexDocument document, Field field, List<Object> values) throws IllegalArgumentException, IllegalAccessException {
if (values.isEmpty()) {
if (logger.isDebugEnabled()) {
logger.debug(String.format("Could not find values for %s", field.getName()));
}
} else {
field.setAccessible(true);
if (List.class.isAssignableFrom(field.getType())) {
field.set(document, values);
} else {
field.set(document, values.get(0));
}
}
}

@SuppressWarnings("unchecked")
private void lookupSyncIds(AbstractIndexDocument document) {
private void lookupSyncIds(Individual individual) {
Set<String> syncIds = new HashSet<String>();
syncIds.add(document.getId());
syncIds.add(individual.getId());
indexedFields.stream().filter(this::isNestedField).peek(field -> field.setAccessible(true)).forEach(field -> {
try {
Object value = field.get(document);
if (value != null) {
if (Collection.class.isAssignableFrom(field.getType())) {
((Collection<String>) value).forEach(v -> addSyncId(syncIds, v));
} else {
addSyncId(syncIds, (String) value);
}
}
} catch (IllegalArgumentException | IllegalAccessException e) {
logger.error(String.format("Unable to get value of %s %s", name(), field.getName()));
logger.error(String.format("Error: %s", e.getMessage()));
if (logger.isDebugEnabled()) {
e.printStackTrace();
String name = field.getName();
Object value = individual.getContent().get(name);
if (value != null) {
if (Collection.class.isAssignableFrom(field.getType())) {
((Collection<String>) value).forEach(v -> addSyncId(syncIds, v));
} else {
addSyncId(syncIds, (String) value);
}
}
});
document.setSyncIds(new ArrayList<>(syncIds));
individual.setSyncIds(new ArrayList<>(syncIds));
}

private boolean isNestedField(Field field) {
Expand All @@ -238,10 +223,6 @@ private void addSyncId(Set<String> syncIds, String value) {
}
}

private AbstractIndexDocument construct() throws InstantiationException, IllegalAccessException, IllegalArgumentException, InvocationTargetException, NoSuchMethodException, SecurityException {
return type.getConstructor().newInstance(new Object[0]);
}

private String name() {
return type.getSimpleName();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import edu.tamu.scholars.middleware.discovery.annotation.FieldType;
import edu.tamu.scholars.middleware.discovery.component.Indexer;
import edu.tamu.scholars.middleware.discovery.model.AbstractIndexDocument;
import edu.tamu.scholars.middleware.discovery.model.Individual;

public class SolrIndexer implements Indexer {

Expand Down Expand Up @@ -79,7 +80,7 @@ public void init() {
}

@Override
public void index(Collection<AbstractIndexDocument> documents) {
public void index(Collection<Individual> documents) {
try {
solrClient.addBeans(COLLECTION, documents);
solrClient.commit(COLLECTION);
Expand All @@ -91,7 +92,7 @@ public void index(Collection<AbstractIndexDocument> documents) {
}

@Override
public void index(AbstractIndexDocument document) {
public void index(Individual document) {
try {
solrClient.addBean(COLLECTION, document);
solrClient.commit(COLLECTION);
Expand Down
Original file line number Diff line number Diff line change
@@ -1,35 +1,35 @@
package edu.tamu.scholars.middleware.discovery.model;

import static edu.tamu.scholars.middleware.discovery.DiscoveryConstants.CLASS;

import java.util.ArrayList;
import java.util.List;

import org.apache.solr.client.solrj.beans.Field;

import com.fasterxml.jackson.annotation.JsonProperty;

import edu.tamu.scholars.middleware.discovery.annotation.FieldSource;
import edu.tamu.scholars.middleware.discovery.annotation.FieldType;

public abstract class AbstractIndexDocument {

@Field
@FieldType(required = true, readonly = true)
private String id;

@JsonProperty("class")
@Field("class")
@FieldType(type = "string", value = "class", required = true)
@JsonProperty(CLASS)
@FieldType(type = "string", value = CLASS, required = true)
private String clazz = this.getClass().getSimpleName();

@Field
@FieldType(type = "whole_strings")
@FieldSource(template = "common/type", predicate = "http://vitro.mannlib.cornell.edu/ns/vitro/0.7#mostSpecificType", parse = true)
private List<String> type;

@Field
@FieldType(type = "strings")
private List<String> syncIds = new ArrayList<>();

@FieldType(type = "pdate")
@FieldSource(template = "common/modTime", predicate = "http://vitro.mannlib.cornell.edu/ns/vitro/0.7#modTime")
private String modTime;

public String getId() {
return id;
}
Expand Down Expand Up @@ -62,4 +62,12 @@ public void setSyncIds(List<String> syncIds) {
this.syncIds = syncIds;
}

public String getModTime() {
return modTime;
}

public void setModTime(String modTime) {
this.modTime = modTime;
}

}
Loading

0 comments on commit 5fb28f6

Please sign in to comment.