diff --git a/citesphere-importer/pom.xml b/citesphere-importer/pom.xml index 4d1ee8d..fd9c032 100644 --- a/citesphere-importer/pom.xml +++ b/citesphere-importer/pom.xml @@ -11,11 +11,12 @@ 1.7.26 5.1.7.RELEASE - 5.1.5.RELEASE + 5.5.7 Lovelace-RELEASE - 3.0.11.RELEASE + 3.0.13.RELEASE 2.2.6.RELEASE - 0.2 + 0.5 + 0.2 $2a$04$oQo44vqcDIFRoYKiAXoNheurzkwX9dcNmowvTX/hsWuBMwijqn44i @@ -38,6 +39,8 @@ citesphere-importer TomcatServer + + UTF-8 @@ -69,6 +72,11 @@ citesphere-messages ${citesphere.messages.version} + + edu.asu.diging + crossref-connect + ${crossref-connect-version} + @@ -191,13 +199,13 @@ mysql mysql-connector-java - 8.0.16 + 8.0.28 org.hibernate hibernate-core - 5.4.2.Final + 5.4.24.Final @@ -249,7 +257,12 @@ 2.2.11 - + + + javax.annotation + javax.annotation-api + 1.3.2 + @@ -263,7 +276,7 @@ commons-io commons-io - 2.7 + 2.14.0 @@ -328,7 +341,7 @@ org.apache.maven.plugins maven-war-plugin - 2.6 + 3.4.0 @@ -341,6 +354,8 @@ **/*.woff **/*.ttf **/*.woff2 + **/*.eot + **/*.otf @@ -350,8 +365,9 @@ maven-compiler-plugin - 1.8 - 1.8 + 11 + 11 + UTF-8 diff --git a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/kafka/impl/ReferenceImportListener.java b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/kafka/impl/ReferenceImportListener.java index 6b4f845..ba6b319 100644 --- a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/kafka/impl/ReferenceImportListener.java +++ b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/kafka/impl/ReferenceImportListener.java @@ -5,6 +5,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.beans.factory.annotation.Qualifier; import org.springframework.kafka.annotation.KafkaListener; import com.fasterxml.jackson.databind.ObjectMapper; @@ -18,7 +19,12 @@ public class ReferenceImportListener { private final Logger logger = LoggerFactory.getLogger(getClass()); @Autowired - private IImportProcessor processor; + @Qualifier("fileImportProcessor") + private IImportProcessor fileProcessor; + + @Autowired + @Qualifier("crossrefReferenceImportProcessor") + private IImportProcessor crossrefProcessor; @KafkaListener(topics = KafkaTopics.REFERENCES_IMPORT_TOPIC) public void receiveMessage(String message) { @@ -32,6 +38,20 @@ public void receiveMessage(String message) { return; } - processor.process(msg); + fileProcessor.process(msg); + } + + @KafkaListener(topics = KafkaTopics.REFERENCES_IMPORT_CROSSREF_TOPIC) + public void receiveCrossrefImportMessage(String message) { + ObjectMapper mapper = new ObjectMapper(); + KafkaJobMessage msg = null; + try { + msg = mapper.readValue(message, KafkaJobMessage.class); + } catch (IOException e) { + logger.error("Could not unmarshall message.", e); + return; + } + + crossrefProcessor.process(msg); } } diff --git a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/model/ItemType.java b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/model/ItemType.java index 78d38be..4fef7d0 100644 --- a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/model/ItemType.java +++ b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/model/ItemType.java @@ -45,7 +45,9 @@ public enum ItemType { THESIS("thesis"), TV_BROADCAST("tvBroadcast"), VIDEO_RECORDIG("videoRecording"), - WEBPAGE("webpage"); + WEBPAGE("webpage"), + DATABASE("database"); + final private String zoteroKey; diff --git a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/model/impl/ArticleMeta.java b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/model/impl/ArticleMeta.java index 1b403f5..2596a37 100644 --- a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/model/impl/ArticleMeta.java +++ b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/model/impl/ArticleMeta.java @@ -7,6 +7,7 @@ public class ArticleMeta { private List articleIds; private String articleTitle; + private String shortTitle; private List categoryGroups = new ArrayList(); private List contributors; private String authorNotesCorrespondence; @@ -56,6 +57,12 @@ public String getArticleTitle() { public void setArticleTitle(String articleTitle) { this.articleTitle = articleTitle; } + public String getShortTitle() { + return shortTitle; + } + public void setShortTitle(String shortTitle) { + this.shortTitle = shortTitle; + } public List getCategories() { return categoryGroups; } diff --git a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/model/impl/ContributionType.java b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/model/impl/ContributionType.java index 5d8c724..e2e5e84 100644 --- a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/model/impl/ContributionType.java +++ b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/model/impl/ContributionType.java @@ -4,4 +4,6 @@ public interface ContributionType { public final static String AUTHOR = "author"; public final static String EDITOR = "editor"; + public final static String TRANSLATOR = "translator"; + public final static String CHAIR = "chair"; } diff --git a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/model/impl/Publication.java b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/model/impl/Publication.java index 6963fdc..31c177f 100644 --- a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/model/impl/Publication.java +++ b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/model/impl/Publication.java @@ -12,6 +12,31 @@ public class Publication implements BibEntry { public final static String NEWS_ITEM = "newspaperArticle"; public final static String PROCEEDINGS_PAPER = "conferencePaper"; public final static String DOCUMENT = "document"; + // publication types in CrossRef + public final static String JOURNAL_ISSUE = "journal-issue"; + public final static String REFERNCE_ENTRY = "reference-entry"; + public final static String POSTED_CONTENT = "posted-content"; + public final static String COMPONENT = "component"; + public final static String EDITED_BOOK = "edited-book"; + public final static String DISSERTATION = "dissertation"; + public final static String REPORT_COMPONENT = "report-component"; + public final static String REPORT = "report"; + public final static String PEER_REVIEW = "peer-review"; + public final static String BOOK_TRACK = "book-track"; + public final static String BOOK_PART = "book-part"; + public final static String OTHER = "other"; + public final static String JORUNAL_VOLUME = "journal-volume"; + public final static String BOOK_SET = "book-set"; + public final static String JOURNAL = "journal"; + public final static String PROCEEDINGS_SERIES = "proceedings-series"; + public final static String REPORT_SERIES = "report-series"; + public final static String PROCEEDINGS = "proceedings"; + public final static String DATABASE = "database"; + public final static String STANDARD = "standard"; + public final static String REFERENCE_BOOK = "reference-book"; + public final static String GRANT = "grant"; + public final static String DATASET = "dataset"; + public final static String BOOK_SERIES = "book-series"; private String articleType; private ContainerMeta containerMeta; diff --git a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/ImportProcessor.java b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/AbstractImportProcessor.java similarity index 75% rename from citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/ImportProcessor.java rename to citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/AbstractImportProcessor.java index b4c5a9b..bcbb890 100644 --- a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/ImportProcessor.java +++ b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/AbstractImportProcessor.java @@ -9,7 +9,6 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Autowired; -import org.springframework.stereotype.Service; import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.JsonNode; @@ -18,7 +17,6 @@ import com.fasterxml.jackson.databind.node.ObjectNode; import edu.asu.diging.citesphere.importer.core.exception.CitesphereCommunicationException; -import edu.asu.diging.citesphere.importer.core.exception.IteratorCreationException; import edu.asu.diging.citesphere.importer.core.exception.MessageCreationException; import edu.asu.diging.citesphere.importer.core.kafka.impl.KafkaRequestProducer; import edu.asu.diging.citesphere.importer.core.model.BibEntry; @@ -27,7 +25,6 @@ import edu.asu.diging.citesphere.importer.core.service.ICitesphereConnector; import edu.asu.diging.citesphere.importer.core.service.IImportProcessor; import edu.asu.diging.citesphere.importer.core.service.parse.BibEntryIterator; -import edu.asu.diging.citesphere.importer.core.service.parse.IHandlerRegistry; import edu.asu.diging.citesphere.importer.core.zotero.IZoteroConnector; import edu.asu.diging.citesphere.importer.core.zotero.template.IJsonGenerationService; import edu.asu.diging.citesphere.messages.KafkaTopics; @@ -37,40 +34,28 @@ import edu.asu.diging.citesphere.messages.model.ResponseCode; import edu.asu.diging.citesphere.messages.model.Status; -/** - * This class coordinates the import process. It connects with Citesphere and - * downloads the files to be imported. It then starts the transformation process from - * import format to internal bibliographical format and then turns the internal - * bibliographical format to Json that can be submitted to Zotero. - * @author jdamerow - * - */ -@Service -public class ImportProcessor implements IImportProcessor { - - private final Logger logger = LoggerFactory.getLogger(getClass()); +public abstract class AbstractImportProcessor implements IImportProcessor { + protected final Logger logger = LoggerFactory.getLogger(getClass()); + @Autowired - private ICitesphereConnector connector; + private KafkaRequestProducer requestProducer; @Autowired - private IHandlerRegistry handlerRegistry; - + private ICitesphereConnector connector; + @Autowired private IZoteroConnector zoteroConnector; - + @Autowired private IJsonGenerationService generationService; - @Autowired - private KafkaRequestProducer requestProducer; - /** * Map that maps internal bibliographical formats (contants of {@link Publication} * class) to Zotero item types ({@link ItemType} enum). */ private Map itemTypeMapping = new HashMap<>(); - + @PostConstruct public void init() { // this needs to be changed and improved, but for now it works @@ -81,15 +66,26 @@ public void init() { itemTypeMapping.put(Publication.NEWS_ITEM, ItemType.NEWSPAPER_ARTICLE); itemTypeMapping.put(Publication.PROCEEDINGS_PAPER, ItemType.CONFERENCE_PAPER); itemTypeMapping.put(Publication.DOCUMENT, ItemType.DOCUMENT); + itemTypeMapping.put(Publication.REFERNCE_ENTRY, ItemType.DICTIONARY_ENTRY); + itemTypeMapping.put(Publication.POSTED_CONTENT, ItemType.WEBPAGE); + itemTypeMapping.put(Publication.COMPONENT, ItemType.ATTACHMENT); + itemTypeMapping.put(Publication.EDITED_BOOK, ItemType.BOOK); + itemTypeMapping.put(Publication.DISSERTATION, ItemType.THESIS); + itemTypeMapping.put(Publication.REPORT_COMPONENT, ItemType.REPORT); + itemTypeMapping.put(Publication.REPORT, ItemType.REPORT); + itemTypeMapping.put(Publication.PEER_REVIEW, ItemType.JOURNAL_ARTICLE); + itemTypeMapping.put(Publication.BOOK_TRACK, ItemType.BOOK); + itemTypeMapping.put(Publication.BOOK_PART, ItemType.BOOK_SECTION); + itemTypeMapping.put(Publication.OTHER, ItemType.DOCUMENT); + itemTypeMapping.put(Publication.BOOK_SET, ItemType.BOOK); + itemTypeMapping.put(Publication.PROCEEDINGS, ItemType.CONFERENCE_PAPER); + itemTypeMapping.put(Publication.DATABASE, ItemType.DATABASE); + itemTypeMapping.put(Publication.STANDARD, ItemType.STATUTE); + itemTypeMapping.put(Publication.REFERENCE_BOOK, ItemType.BOOK); + itemTypeMapping.put(Publication.GRANT, ItemType.DOCUMENT); + itemTypeMapping.put(Publication.DATASET, ItemType.DATABASE); } - - /* - * (non-Javadoc) - * - * @see - * edu.asu.diging.citesphere.importer.core.service.impl.IImportProcessor#process - * (edu.asu.diging.citesphere.importer.core.kafka.impl.KafkaJobMessage) - */ + @Override public void process(KafkaJobMessage message) { JobInfo info = getJobInfo(message); @@ -97,29 +93,69 @@ public void process(KafkaJobMessage message) { sendMessage(null, message.getId(), Status.FAILED, ResponseCode.X10); return; } - - String filePath = downloadFile(message); - if (filePath == null) { - sendMessage(null, message.getId(), Status.FAILED, ResponseCode.X20); - return; + startImport(message, info); + } + + private JobInfo getJobInfo(KafkaJobMessage message) { + JobInfo info = null; + try { + info = connector.getJobInfo(message.getId()); + } catch (CitesphereCommunicationException e) { + logger.error("Could not get Zotero info.", e); + return null; } - - sendMessage(null, message.getId(), Status.PROCESSING, ResponseCode.P00); - BibEntryIterator bibIterator = null; + return info; + } + + protected void sendMessage(ItemCreationResponse message, String jobId, Status status, ResponseCode code) { + KafkaImportReturnMessage returnMessage = new KafkaImportReturnMessage(message, jobId); + returnMessage.setStatus(status); + returnMessage.setCode(code); + try { + requestProducer.sendRequest(returnMessage, KafkaTopics.REFERENCES_IMPORT_DONE_TOPIC); + } catch (MessageCreationException e) { + logger.error("Exception sending message.", e); + } + } + + protected ICitesphereConnector getCitesphereConnector() { + return connector; + } + + private ItemCreationResponse submitEntries(ArrayNode entries, JobInfo info) { + ObjectMapper mapper = new ObjectMapper(); try { - bibIterator = handlerRegistry.handleFile(info, filePath); - } catch (IteratorCreationException e1) { - logger.error("Could not create iterator.", e1); + String msg = mapper.writeValueAsString(entries); + logger.info("Submitting " + msg); + ItemCreationResponse response = zoteroConnector.addEntries(info, entries); + if (response != null) { + logger.info(response.getSuccessful() + ""); + logger.error(response.getFailed() + ""); + } else { + logger.error("Item creation failed."); + } + return response; + } catch (URISyntaxException e) { + logger.error("Could not store new entry.", e); + } catch (JsonProcessingException e) { + logger.error("Could not write JSON."); } + return null; + } + + private void startImport(KafkaJobMessage message, JobInfo info) { + ObjectMapper mapper = new ObjectMapper(); + ArrayNode root = mapper.createArrayNode(); + int entryCounter = 0; + + sendMessage(null, message.getId(), Status.PROCESSING, ResponseCode.P00); + BibEntryIterator bibIterator = getBibEntryIterator(message, info); if (bibIterator == null) { sendMessage(null, message.getId(), Status.FAILED, ResponseCode.X30); return; } - - ObjectMapper mapper = new ObjectMapper(); - ArrayNode root = mapper.createArrayNode(); - int entryCounter = 0; + while (bibIterator.hasNext()) { BibEntry entry = bibIterator.next(); if (entry.getArticleType() == null) { @@ -153,60 +189,5 @@ public void process(KafkaJobMessage message) { sendMessage(response, message.getId(), Status.DONE, ResponseCode.S00); } - private void sendMessage(ItemCreationResponse message, String jobId, Status status, ResponseCode code) { - KafkaImportReturnMessage returnMessage = new KafkaImportReturnMessage(message, jobId); - returnMessage.setStatus(status); - returnMessage.setCode(code); - try { - requestProducer.sendRequest(returnMessage, KafkaTopics.REFERENCES_IMPORT_DONE_TOPIC); - } catch (MessageCreationException e) { - // FIXME handle this case - logger.error("Exception sending message.", e); - } - } - - private ItemCreationResponse submitEntries(ArrayNode entries, JobInfo info) { - ObjectMapper mapper = new ObjectMapper(); - try { - String msg = mapper.writeValueAsString(entries); - logger.info("Submitting " + msg); - ItemCreationResponse response = zoteroConnector.addEntries(info, entries); - if (response != null) { - logger.info(response.getSuccessful() + ""); - logger.error(response.getFailed() + ""); - } else { - logger.error("Item creation failed."); - } - return response; - } catch (URISyntaxException e) { - logger.error("Could not store new entry.", e); - } catch (JsonProcessingException e) { - logger.error("Could not write JSON."); - } - return null; - } - - private JobInfo getJobInfo(KafkaJobMessage message) { - JobInfo info = null; - try { - info = connector.getJobInfo(message.getId()); - } catch (CitesphereCommunicationException e) { - // FIXME this needs to be handled better - logger.error("Could not get Zotero info.", e); - return null; - } - return info; - } - - private String downloadFile(KafkaJobMessage message) { - String file = null; - try { - file = connector.getUploadeFile(message.getId()); - } catch (CitesphereCommunicationException e) { - // FIXME this needs to be handled better - logger.error("Could not get Zotero info.", e); - return null; - } - return file; - } + protected abstract BibEntryIterator getBibEntryIterator(KafkaJobMessage message, JobInfo info); } diff --git a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/CrossrefReferenceImportProcessor.java b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/CrossrefReferenceImportProcessor.java new file mode 100644 index 0000000..9dd6c31 --- /dev/null +++ b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/CrossrefReferenceImportProcessor.java @@ -0,0 +1,21 @@ +package edu.asu.diging.citesphere.importer.core.service.impl; + +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.stereotype.Service; + +import edu.asu.diging.citesphere.importer.core.service.parse.BibEntryIterator; +import edu.asu.diging.citesphere.importer.core.service.parse.impl.CrossRefParser; +import edu.asu.diging.citesphere.importer.core.service.parse.iterators.CrossRefIterator; +import edu.asu.diging.citesphere.messages.model.KafkaJobMessage; + +@Service +public class CrossrefReferenceImportProcessor extends AbstractImportProcessor { + + @Autowired + private CrossRefParser crossRefParser; + + @Override + protected BibEntryIterator getBibEntryIterator(KafkaJobMessage message, JobInfo info) { + return new CrossRefIterator(info, crossRefParser); + } +} diff --git a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/FileImportProcessor.java b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/FileImportProcessor.java new file mode 100644 index 0000000..ce80e41 --- /dev/null +++ b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/FileImportProcessor.java @@ -0,0 +1,56 @@ +package edu.asu.diging.citesphere.importer.core.service.impl; + +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.stereotype.Service; + +import edu.asu.diging.citesphere.importer.core.exception.CitesphereCommunicationException; +import edu.asu.diging.citesphere.importer.core.exception.IteratorCreationException; +import edu.asu.diging.citesphere.importer.core.service.parse.BibEntryIterator; +import edu.asu.diging.citesphere.importer.core.service.parse.IHandlerRegistry; +import edu.asu.diging.citesphere.messages.model.KafkaJobMessage; +import edu.asu.diging.citesphere.messages.model.ResponseCode; +import edu.asu.diging.citesphere.messages.model.Status; + +/** + * This class coordinates the import process. It connects with Citesphere and + * downloads the files to be imported. It then starts the transformation process from + * import format to internal bibliographical format and then turns the internal + * bibliographical format to Json that can be submitted to Zotero. + * @author jdamerow + * + */ +@Service +public class FileImportProcessor extends AbstractImportProcessor { + + @Autowired + private IHandlerRegistry handlerRegistry; + + private String downloadFile(KafkaJobMessage message) { + String file = null; + try { + file = getCitesphereConnector().getUploadeFile(message.getId()); + } catch (CitesphereCommunicationException e) { + logger.error("Could not get Zotero info.", e); + return null; + } + return file; + } + + @Override + protected BibEntryIterator getBibEntryIterator(KafkaJobMessage message, JobInfo info) { + String filePath = downloadFile(message); + if (filePath == null) { + sendMessage(null, message.getId(), Status.FAILED, ResponseCode.X20); + return null; + } + + BibEntryIterator bibIterator = null; + try { + bibIterator = handlerRegistry.handleFile(info, filePath); + } catch (IteratorCreationException e1) { + logger.error("Could not create iterator.", e1); + } + + return bibIterator; + } +} diff --git a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/JobInfo.java b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/JobInfo.java index 8eb8026..2731d51 100644 --- a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/JobInfo.java +++ b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/impl/JobInfo.java @@ -1,5 +1,7 @@ package edu.asu.diging.citesphere.importer.core.service.impl; +import java.util.List; + import com.fasterxml.jackson.annotation.JsonIgnoreProperties; @JsonIgnoreProperties(ignoreUnknown=true) @@ -8,6 +10,7 @@ public class JobInfo { private String zotero; private String zoteroId; private String groupId; + private List dois; public String getZotero() { return zotero; @@ -27,5 +30,11 @@ public String getGroupId() { public void setGroupId(String groupId) { this.groupId = groupId; } + public List getDois() { + return dois; + } + public void setDois(List dois) { + this.dois = dois; + } } diff --git a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/parse/crossref/ICrossRefParser.java b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/parse/crossref/ICrossRefParser.java new file mode 100644 index 0000000..bee8aac --- /dev/null +++ b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/parse/crossref/ICrossRefParser.java @@ -0,0 +1,12 @@ +package edu.asu.diging.citesphere.importer.core.service.parse.crossref; + +import edu.asu.diging.citesphere.importer.core.model.impl.ArticleMeta; +import edu.asu.diging.citesphere.importer.core.model.impl.ContainerMeta; +import edu.asu.diging.crossref.model.Item; + +public interface ICrossRefParser { + + ContainerMeta parseJournalMeta(Item item); + ArticleMeta parseArticleMeta(Item item); + +} diff --git a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/parse/impl/CrossRefParser.java b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/parse/impl/CrossRefParser.java new file mode 100644 index 0000000..99e3e1e --- /dev/null +++ b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/parse/impl/CrossRefParser.java @@ -0,0 +1,174 @@ +package edu.asu.diging.citesphere.importer.core.service.parse.impl; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +import org.springframework.stereotype.Component; + +import edu.asu.diging.citesphere.importer.core.model.impl.Affiliation; +import edu.asu.diging.citesphere.importer.core.model.impl.ArticleMeta; +import edu.asu.diging.citesphere.importer.core.model.impl.ArticlePublicationDate; +import edu.asu.diging.citesphere.importer.core.model.impl.ContainerMeta; +import edu.asu.diging.citesphere.importer.core.model.impl.ContributionType; +import edu.asu.diging.citesphere.importer.core.model.impl.Contributor; +import edu.asu.diging.citesphere.importer.core.model.impl.ContributorId; +import edu.asu.diging.citesphere.importer.core.model.impl.Issn; +import edu.asu.diging.citesphere.importer.core.model.impl.Reference; +import edu.asu.diging.citesphere.importer.core.model.impl.ReviewInfo; +import edu.asu.diging.citesphere.importer.core.service.parse.crossref.ICrossRefParser; +import edu.asu.diging.crossref.model.Institution; +import edu.asu.diging.crossref.model.IssnType; +import edu.asu.diging.crossref.model.Item; +import edu.asu.diging.crossref.model.Person; + +@Component +public class CrossRefParser implements ICrossRefParser { + + /** + * Parses journal metadata from the given {@link Item} object and returns a {@link ContainerMeta} object + * containing the extracted journal metadata. + * + * @param item the {@link Item} object containing journal metadata to be parsed. + * + * @return a {@link ContainerMeta} object containing the parsed journal metadata. + */ + @Override + public ContainerMeta parseJournalMeta(Item item) { + ContainerMeta meta = new ContainerMeta(); + meta.setContainerTitle(item.getContainerTitle().get(0)); + meta.setPublisherName(item.getPublisher()); + meta.setPublisherLocation(item.getPublisherLocation()); + List issnList = new ArrayList(); + if(item.getIssnType() != null) { + for(IssnType issnType : item.getIssnType()) { + Issn issn = new Issn(); + issn.setIssn(issnType.getValue()); + issn.setPubType(issnType.getType()); + issnList.add(issn); + } + } + meta.setIssns(issnList); + return meta; + } + + /** + * Parses article metadata from the given {@link Item} object and returns an {@link ArticleMeta} object + * containing the extracted article metadata. + * + * @param item the {@link Item} object containing article metadata to be parsed. + * + * @return an {@link ArticleMeta} object containing the parsed article metadata. + */ + @Override + public ArticleMeta parseArticleMeta(Item item) { + ArticleMeta meta = new ArticleMeta(); + meta.setArticleTitle(item.getTitle().get(0)); + List contributors = new ArrayList<>(); + // List of authors + if(item.getAuthor() != null) { + contributors.addAll(mapPersonToContributor(item.getAuthor(), ContributionType.AUTHOR)); + } + // List of editors + if(item.getEditor() != null) { + contributors.addAll(mapPersonToContributor(item.getEditor(), ContributionType.EDITOR)); + } + // List of translators + if(item.getTranslator() != null) { + contributors.addAll(mapPersonToContributor(item.getTranslator(), ContributionType.TRANSLATOR)); + } + // List of chair + if(item.getChair() != null) { + contributors.addAll(mapPersonToContributor(Arrays.asList(item.getChair()), ContributionType.CHAIR)); + } + meta.setContributors(contributors); + + ArticlePublicationDate publicationDate = new ArticlePublicationDate(); + List dateParts = item.getPublished().getIndexedDateParts(); + if(dateParts != null) { + publicationDate.setPublicationDate(dateParts.get(2).toString()); + publicationDate.setPublicationMonth(dateParts.get(1).toString()); + publicationDate.setPublicationYear(dateParts.get(0).toString()); + } + meta.setPublicationDate(publicationDate); + meta.setVolume(item.getVolume()); + meta.setIssue(item.getIssue()); + meta.setPartNumber(item.getPartNumber()); + meta.setFirstPage(item.getPage()); + meta.setSelfUri(item.getUrl()); + meta.setArticleAbstract(item.getAbstractText()); + meta.setLanguage(item.getLanguage()); + ReviewInfo review = new ReviewInfo(); + if (item.getReview() != null) { + review.setFullDescription(item.getReview().getCompetingInterestStatement()); + } + meta.setReviewInfo(review); + meta.setDocumentType(item.getType()); + if(item.getReference() != null) { + meta.setReferences(mapReferences(item.getReference())); + } + meta.setReferenceCount(item.getReferenceCount().toString()); + + return meta; + } + + private List mapReferences(List itemReferences) { + List references = new ArrayList<>(); + if(itemReferences != null) { + for(edu.asu.diging.crossref.model.Reference itemRef: itemReferences) { + references.add(mapSingleReference(itemRef)); + } + } + return references; + } + + private Reference mapSingleReference(edu.asu.diging.crossref.model.Reference itemRef) { + Reference ref = new Reference(); + ref.setAuthorString(itemRef.getAuthor()); + ref.setTitle(itemRef.getArticleTitle()); + ref.setYear(itemRef.getYear()); + if(itemRef.getDoi()!=null && !itemRef.getDoi().isBlank()) { + ref.setIdentifier(itemRef.getDoi()); + ref.setIdentifierType("DOI"); + ref.setSource(itemRef.getDoiAssertedBy()); + } else if (itemRef.getIssn()!=null && !itemRef.getIssn().isBlank()) { + ref.setIdentifier(itemRef.getIssn()); + ref.setIdentifierType("ISSN"); + } else if (itemRef.getIsbn()!=null && !itemRef.getIsbn().isBlank()) { + ref.setIdentifier(itemRef.getIsbn()); + ref.setIdentifierType("ISBN"); + } + ref.setFirstPage(itemRef.getFirstPage()); + ref.setVolume(itemRef.getVolume()); + ref.setReferenceId(itemRef.getKey()); + ref.setReferenceString(itemRef.getUnstructured()); + ref.setReferenceStringRaw(itemRef.getUnstructured()); + + return ref; + } + + private List mapPersonToContributor(List personList, String contributionType) { + List contributors = new ArrayList(); + for(Person person: personList) { + Contributor contributor = new Contributor(); + contributor.setContributionType(contributionType); + contributor.setGivenName(person.getGiven()); + contributor.setSurname(person.getFamily()); + contributor.setFullName(person.getName()); + List affiliations = new ArrayList<>(); + for(Institution institute: person.getAffiliation()) { + Affiliation affiliation = new Affiliation(); + affiliation.setName(institute.getName()); + affiliations.add(affiliation); + } + contributor.setAffiliations(affiliations); + ContributorId contributorID = new ContributorId(); + contributorID.setId(person.getOrcid()); + contributorID.setIdSystem("ORCID"); + contributor.setIds(Arrays.asList(contributorID)); + contributors.add(contributor); + } + return contributors; + } + +} diff --git a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/parse/iterators/CrossRefIterator.java b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/parse/iterators/CrossRefIterator.java new file mode 100644 index 0000000..4d11fba --- /dev/null +++ b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/service/parse/iterators/CrossRefIterator.java @@ -0,0 +1,111 @@ +package edu.asu.diging.citesphere.importer.core.service.parse.iterators; + +import java.io.IOException; +import java.util.HashMap; +import java.util.Iterator; +import java.util.Map; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import edu.asu.diging.citesphere.importer.core.model.BibEntry; +import edu.asu.diging.citesphere.importer.core.model.impl.Publication; +import edu.asu.diging.citesphere.importer.core.service.impl.JobInfo; +import edu.asu.diging.citesphere.importer.core.service.parse.BibEntryIterator; +import edu.asu.diging.citesphere.importer.core.service.parse.crossref.ICrossRefParser; +import edu.asu.diging.crossref.exception.RequestFailedException; +import edu.asu.diging.crossref.model.Item; +import edu.asu.diging.crossref.service.CrossrefConfiguration; +import edu.asu.diging.crossref.service.CrossrefWorksService; +import edu.asu.diging.crossref.service.impl.CrossrefWorksServiceImpl; + +public class CrossRefIterator implements BibEntryIterator { + + private final Logger logger = LoggerFactory.getLogger(getClass()); + + private JobInfo info; + + private Map typeMap; + + private CrossrefWorksService crossrefService; + + private Iterator doisIterator; + + private ICrossRefParser crossRefParser; + + public CrossRefIterator(JobInfo info, ICrossRefParser crossRefParser) { + this.info = info; + doisIterator = info.getDois().iterator(); + this.crossRefParser = crossRefParser; + init(); + } + + private void init() { + crossrefService = new CrossrefWorksServiceImpl(CrossrefConfiguration.getDefaultConfig()); + typeMap = new HashMap(); + typeMap.put("journal-article", Publication.ARTICLE); + typeMap.put("book", Publication.BOOK); + typeMap.put("book-chapter", Publication.BOOK_CHAPTER); + typeMap.put("monograph", Publication.BOOK); + typeMap.put("journal-issue", Publication.JOURNAL_ISSUE); + typeMap.put("reference-entry", Publication.REFERNCE_ENTRY); + typeMap.put("posted-content", Publication.POSTED_CONTENT); + typeMap.put("component", Publication.COMPONENT); + typeMap.put("edited-book", Publication.EDITED_BOOK); + typeMap.put("proceedings-article", Publication.PROCEEDINGS_PAPER); + typeMap.put("dissertation", Publication.DISSERTATION); + typeMap.put("book-section", Publication.BOOK_CHAPTER); + typeMap.put("report-component", Publication.REPORT_COMPONENT); + typeMap.put("report", Publication.REPORT); + typeMap.put("peer-review", Publication.PEER_REVIEW); + typeMap.put("book-track", Publication.BOOK_TRACK); + typeMap.put("book-part", Publication.BOOK_PART); + typeMap.put("other", Publication.OTHER); + typeMap.put("journal-volume", Publication.JORUNAL_VOLUME); + typeMap.put("book-set", Publication.BOOK_SET); + typeMap.put("journal", Publication.JOURNAL); + typeMap.put("proceedings-series", Publication.PROCEEDINGS_SERIES); + typeMap.put("report-series", Publication.REPORT_SERIES); + typeMap.put("proceedings", Publication.PROCEEDINGS); + typeMap.put("database", Publication.DATABASE); + typeMap.put("standard", Publication.STANDARD); + typeMap.put("reference-book", Publication.REFERENCE_BOOK); + typeMap.put("grant", Publication.GRANT); + typeMap.put("dataset", Publication.DATASET); + typeMap.put("book-series", Publication.BOOK_SERIES); + } + + @Override + public BibEntry next() { + if (!doisIterator.hasNext()) { + return null; + } + BibEntry nextEntry = new Publication(); + + try { + Item item = crossrefService.get(doisIterator.next()); + nextEntry.setArticleType(typeMap.get(item.getType())); + nextEntry.setJournalMeta(crossRefParser.parseJournalMeta(item)); + nextEntry.setArticleMeta(crossRefParser.parseArticleMeta(item)); + } catch (RequestFailedException | IOException e) { + logger.error("Could not retrieve work for doi: "+ doisIterator.next(), e); + // for now we just log the exceptions + // we might want to devise a way to decide if the + // service might be down and we should stop sending requests. + return null; + } + return nextEntry; + } + + + @Override + public boolean hasNext() { + return doisIterator.hasNext(); + } + + @Override + public void close() { + // do nothing + } + +} diff --git a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/zotero/template/ItemJsonGenerator.java b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/zotero/template/ItemJsonGenerator.java index 2d9240a..d2c16d6 100644 --- a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/zotero/template/ItemJsonGenerator.java +++ b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/zotero/template/ItemJsonGenerator.java @@ -162,7 +162,7 @@ public ArrayNode processCreators(JsonNode node, BibEntry article) { creators.add(contributorNode); } - if (article.getArticleMeta().getReviewInfo() != null) { + if (article.getArticleMeta().getReviewInfo() != null && article.getArticleMeta().getReviewInfo().getContributors() != null) { for (Contributor reviewedAuthor : article.getArticleMeta().getReviewInfo().getContributors()) { ObjectNode contributorNode = getObjectMapper().createObjectNode(); contributorNode.put("creatorType", ZoteroCreatorTypes.REVIEWED_AUTHOR); diff --git a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/zotero/template/impl/JsonGenerationService.java b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/zotero/template/impl/JsonGenerationService.java index 371d029..27fa4d8 100644 --- a/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/zotero/template/impl/JsonGenerationService.java +++ b/citesphere-importer/src/main/java/edu/asu/diging/citesphere/importer/core/zotero/template/impl/JsonGenerationService.java @@ -41,8 +41,7 @@ public ObjectNode generateJson(JsonNode template, BibEntry entry) { ItemJsonGenerator generator = generators.get(entry.getArticleType()); if (generator != null) { return generator.generate(template, entry); - } - + } return null; } } diff --git a/citesphere-importer/src/test/java/edu/asu/diging/citesphere/importer/core/service/parse/crossref/CrossRefParserTest.java b/citesphere-importer/src/test/java/edu/asu/diging/citesphere/importer/core/service/parse/crossref/CrossRefParserTest.java new file mode 100644 index 0000000..9bd0dda --- /dev/null +++ b/citesphere-importer/src/test/java/edu/asu/diging/citesphere/importer/core/service/parse/crossref/CrossRefParserTest.java @@ -0,0 +1,213 @@ +package edu.asu.diging.citesphere.importer.core.service.parse.crossref; + +import java.util.Arrays; +import java.util.Collections; +import java.util.List; + +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; +import org.mockito.Mockito; + +import edu.asu.diging.citesphere.importer.core.model.impl.ArticleMeta; +import edu.asu.diging.citesphere.importer.core.model.impl.ContainerMeta; +import edu.asu.diging.citesphere.importer.core.model.impl.Issn; +import edu.asu.diging.citesphere.importer.core.service.parse.impl.CrossRefParser; +import edu.asu.diging.crossref.model.Date; +import edu.asu.diging.crossref.model.IssnType; +import edu.asu.diging.crossref.model.Item; +import edu.asu.diging.crossref.model.Person; +import edu.asu.diging.crossref.model.Review; +import edu.asu.diging.crossref.model.impl.IssnTypeImpl; + +public class CrossRefParserTest { + private CrossRefParser crossRefParser; + + @Before + public void setUp() { + crossRefParser = new CrossRefParser(); + } + + @Test + public void test_parseJournalMeta_withValidData() { + Item item = Mockito.mock(Item.class); + Mockito.when(item.getContainerTitle()).thenReturn(Collections.singletonList("Journal of Testing")); + Mockito.when(item.getPublisher()).thenReturn("Test Publisher"); + Mockito.when(item.getPublisherLocation()).thenReturn("Test City"); + + IssnType issnType1 = new IssnTypeImpl(); + issnType1.setValue("1234-5678"); + issnType1.setType("issn"); + IssnType issnType2 = new IssnTypeImpl(); + issnType2.setValue("8765-4321"); + issnType2.setType("issn"); + Mockito.when(item.getIssnType()).thenReturn(Arrays.asList(issnType1, issnType2)); + + ContainerMeta result = crossRefParser.parseJournalMeta(item); + + Assert.assertNotNull(result); + Assert.assertEquals("Journal of Testing", result.getContainerTitle()); + Assert.assertEquals("Test Publisher", result.getPublisherName()); + Assert.assertEquals("Test City", result.getPublisherLocation()); + + List issns = result.getIssns(); + Assert.assertNotNull(issns); + Assert.assertEquals(2, issns.size()); + Assert.assertEquals("1234-5678", issns.get(0).getIssn()); + Assert.assertEquals("issn", issns.get(0).getPubType()); + Assert.assertEquals("8765-4321", issns.get(1).getIssn()); + Assert.assertEquals("issn", issns.get(1).getPubType()); + } + + @Test + public void test_parseJournalMeta_withNoIssns() { + Item item = Mockito.mock(Item.class); + Mockito.when(item.getContainerTitle()).thenReturn(Collections.singletonList("Journal of Testing")); + Mockito.when(item.getPublisher()).thenReturn("Test Publisher"); + Mockito.when(item.getPublisherLocation()).thenReturn("Test City"); + Mockito.when(item.getIssnType()).thenReturn(null); + + ContainerMeta result = crossRefParser.parseJournalMeta(item); + + Assert.assertNotNull(result); + Assert.assertEquals("Journal of Testing", result.getContainerTitle()); + Assert.assertEquals("Test Publisher", result.getPublisherName()); + Assert.assertEquals("Test City", result.getPublisherLocation()); + Assert.assertNotNull(result.getIssns()); + Assert.assertTrue(result.getIssns().isEmpty()); + } + + @Test + public void test_parseJournalMeta_withNoContainerTitle() { + Item item = Mockito.mock(Item.class); + Mockito.when(item.getContainerTitle()).thenReturn(Collections.emptyList()); + Mockito.when(item.getPublisher()).thenReturn("Test Publisher"); + Mockito.when(item.getPublisherLocation()).thenReturn("Test City"); + + try { + crossRefParser.parseJournalMeta(item); + Assert.fail("Expected IndexOutOfBoundsException to be thrown"); + } catch (IndexOutOfBoundsException exception) { + Assert.assertNotNull(exception); + } + } + + @Test + public void test_parseJournalMeta_withNullItem() { + try { + crossRefParser.parseJournalMeta(null); + Assert.fail("Expected NullPointerException to be thrown"); + } catch (NullPointerException exception) { + Assert.assertNotNull(exception); + } + } + + @Test + public void test_parseJournalMeta_withEmptyFields() { + Item item = Mockito.mock(Item.class); + Mockito.when(item.getContainerTitle()).thenReturn(Collections.singletonList("")); + Mockito.when(item.getPublisher()).thenReturn(""); + Mockito.when(item.getPublisherLocation()).thenReturn(""); + Mockito.when(item.getIssnType()).thenReturn(Collections.emptyList()); + + ContainerMeta result = crossRefParser.parseJournalMeta(item); + + Assert.assertNotNull(result); + Assert.assertEquals("", result.getContainerTitle()); + Assert.assertEquals("", result.getPublisherName()); + Assert.assertEquals("", result.getPublisherLocation()); + Assert.assertNotNull(result.getIssns()); + Assert.assertTrue(result.getIssns().isEmpty()); + } + + @Test + public void test_parseArticleMeta_withValidData() { + // Arrange + Item item = Mockito.mock(Item.class); + Mockito.when(item.getTitle()).thenReturn(Collections.singletonList("Test Article Title")); + Person author = mockPerson("John", "Doe", "1234"); + Mockito.when(item.getAuthor()).thenReturn(Arrays.asList(author)); + Mockito.when(item.getEditor()).thenReturn(null); + Mockito.when(item.getTranslator()).thenReturn(null); + Mockito.when(item.getChair()).thenReturn(null); + + Date publishedDate = Mockito.mock(Date.class); + Mockito.when(publishedDate.getIndexedDateParts()).thenReturn(Arrays.asList(2023, 12, 25)); + Mockito.when(item.getPublished()).thenReturn(publishedDate); + + Mockito.when(item.getVolume()).thenReturn("10"); + Mockito.when(item.getIssue()).thenReturn("2"); + Mockito.when(item.getPartNumber()).thenReturn("A"); + Mockito.when(item.getPage()).thenReturn("123"); + Mockito.when(item.getUrl()).thenReturn("http://example.com"); + Mockito.when(item.getAbstractText()).thenReturn("Test Abstract"); + Mockito.when(item.getLanguage()).thenReturn("en"); + + Review review = Mockito.mock(Review.class); + Mockito.when(review.getCompetingInterestStatement()).thenReturn("No conflicts"); + Mockito.when(item.getReview()).thenReturn(review); + + Mockito.when(item.getType()).thenReturn("Research Article"); + Mockito.when(item.getReference()).thenReturn(Collections.emptyList()); + Mockito.when(item.getReferenceCount()).thenReturn(5); + + // Act + ArticleMeta result = crossRefParser.parseArticleMeta(item); + + // Assert + Assert.assertNotNull(result); + Assert.assertEquals("Test Article Title", result.getArticleTitle()); + Assert.assertNotNull(result.getContributors()); + Assert.assertEquals(1, result.getContributors().size()); + Assert.assertEquals("John", result.getContributors().get(0).getGivenName()); + Assert.assertEquals("Doe", result.getContributors().get(0).getSurname()); + Assert.assertEquals("25", result.getPublicationDate().getPublicationDate()); + Assert.assertEquals("12", result.getPublicationDate().getPublicationMonth()); + Assert.assertEquals("2023", result.getPublicationDate().getPublicationYear()); + Assert.assertEquals("10", result.getVolume()); + Assert.assertEquals("2", result.getIssue()); + Assert.assertEquals("A", result.getPartNumber()); + Assert.assertEquals("123", result.getFirstPage()); + Assert.assertEquals("http://example.com", result.getSelfUri()); + Assert.assertEquals("Test Abstract", result.getArticleAbstract()); + Assert.assertEquals("en", result.getLanguage()); + Assert.assertEquals("No conflicts", result.getReviewInfo().getFullDescription()); + Assert.assertEquals("Research Article", result.getDocumentType()); + Assert.assertEquals("5", result.getReferenceCount()); + } + + @Test + public void test_parseArticleMeta_withNullItem() { + try { + crossRefParser.parseArticleMeta(null); + Assert.fail("Expected NullPointerException"); + } catch (NullPointerException exception) { + Assert.assertNotNull(exception); + } + } + + @Test + public void test_parseArticleMeta_withEmptyTitle() { + Item item = Mockito.mock(Item.class); + Mockito.when(item.getTitle()).thenReturn(Collections.emptyList()); + + try { + crossRefParser.parseArticleMeta(item); + Assert.fail("Expected IndexOutOfBoundsException"); + } catch (IndexOutOfBoundsException exception) { + // Assert + Assert.assertNotNull(exception); + } + } + + private Person mockPerson(String given, String family, String orcid) { + Person person = Mockito.mock(Person.class); + Mockito.when(person.getGiven()).thenReturn(given); + Mockito.when(person.getFamily()).thenReturn(family); + Mockito.when(person.getName()).thenReturn(given + " " + family); + Mockito.when(person.getOrcid()).thenReturn(orcid); + Mockito.when(person.getAffiliation()).thenReturn(Collections.emptyList()); + return person; + } + +}