Skip to content

Commit

Permalink
Merge pull request #7 from diging/develop
Browse files Browse the repository at this point in the history
Prepare release
  • Loading branch information
jdamerow authored Aug 16, 2019
2 parents 9946024 + d5cbabc commit 5c8109c
Show file tree
Hide file tree
Showing 87 changed files with 873 additions and 207 deletions.
6 changes: 6 additions & 0 deletions citesphere-importer/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -303,6 +303,12 @@
<scope>test</scope>
</dependency>

<dependency>
<groupId>com.opencsv</groupId>
<artifactId>opencsv</artifactId>
<version>4.5</version>
</dependency>

</dependencies>

<build>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,21 @@ public class Reference {

private String authorString;
private List<Contributor> contributors;
private String title;
private String year;
private String identifier;
private String identifierType;
private String firstPage;
private String endPage;
private String volume;
private String source;
private String referenceId;
private String referenceLabel;
private String publicationType;
private String citationId;

private String referenceString;
private String referenceStringRaw;

public String getAuthorString() {
return authorString;
Expand All @@ -27,6 +34,12 @@ public List<Contributor> getContributors() {
public void setContributors(List<Contributor> contributors) {
this.contributors = contributors;
}
public String getTitle() {
return title;
}
public void setTitle(String title) {
this.title = title;
}
public String getYear() {
return year;
}
Expand Down Expand Up @@ -57,6 +70,12 @@ public String getEndPage() {
public void setEndPage(String endPage) {
this.endPage = endPage;
}
public String getVolume() {
return volume;
}
public void setVolume(String volume) {
this.volume = volume;
}
public String getSource() {
return source;
}
Expand All @@ -69,5 +88,35 @@ public String getReferenceString() {
public void setReferenceString(String referenceString) {
this.referenceString = referenceString;
}
public String getReferenceId() {
return referenceId;
}
public void setReferenceId(String referenceId) {
this.referenceId = referenceId;
}
public String getReferenceLabel() {
return referenceLabel;
}
public void setReferenceLabel(String referenceLabel) {
this.referenceLabel = referenceLabel;
}
public String getPublicationType() {
return publicationType;
}
public void setPublicationType(String publicationType) {
this.publicationType = publicationType;
}
public String getCitationId() {
return citationId;
}
public void setCitationId(String citationId) {
this.citationId = citationId;
}
public String getReferenceStringRaw() {
return referenceStringRaw;
}
public void setReferenceStringRaw(String referenceStringRaw) {
this.referenceStringRaw = referenceStringRaw;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,9 @@ public void process(KafkaJobMessage message) {
}

}


bibIterator.close();

ItemCreationResponse response = null;
if (entryCounter > 0) {
response = submitEntries(root, info);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
package edu.asu.diging.citesphere.importer.core.service.parse.impl;

import java.io.File;
import java.io.FileReader;
import java.io.IOException;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;

import com.opencsv.CSVParser;
import com.opencsv.CSVParserBuilder;
import com.opencsv.CSVReader;
import com.opencsv.CSVReaderBuilder;

import edu.asu.diging.citesphere.importer.core.exception.HandlerTestException;
import edu.asu.diging.citesphere.importer.core.exception.IteratorCreationException;
import edu.asu.diging.citesphere.importer.core.service.impl.JobInfo;
import edu.asu.diging.citesphere.importer.core.service.parse.BibEntryIterator;
import edu.asu.diging.citesphere.importer.core.service.parse.FileHandler;
import edu.asu.diging.citesphere.importer.core.service.parse.IHandlerRegistry;
import edu.asu.diging.citesphere.importer.core.service.parse.iterators.WoSTabDelimitedIterator;
import edu.asu.diging.citesphere.importer.core.service.parse.wos.tagged.IArticleWoSTagParser;

@Service
public class WoSTabDelimitedHandler implements FileHandler {

private final Logger logger = LoggerFactory.getLogger(getClass());

@Autowired
private IArticleWoSTagParser parserRegistry;

@Override
public boolean canHandle(String path) throws HandlerTestException {
File file = new File(path);

if ((path.toLowerCase().endsWith(".txt") || path.toLowerCase().endsWith(".csv"))
&& !file.getName().startsWith(".")) {
final CSVParser parser = new CSVParserBuilder().withSeparator('\t').withIgnoreQuotations(true).build();
try (CSVReader reader = new CSVReaderBuilder(new FileReader(path)).withCSVParser(parser)
.build()) {
String[] firstLine = reader.readNext();
for (String heading : firstLine) {
if (heading.trim().length() != 2) {
return false;
}
}
return true;
} catch (IOException e) {
logger.debug("Not a CSV file.", e);
return false;
}
}

return false;
}

@Override
public BibEntryIterator getIterator(String path, IHandlerRegistry callback, JobInfo info)
throws IteratorCreationException {
return new WoSTabDelimitedIterator(path, parserRegistry);
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.TransformerConfigurationException;
import javax.xml.transform.TransformerFactoryConfigurationError;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
Expand Down Expand Up @@ -49,6 +51,7 @@ private void init() {

private void parseDocument() {
DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
dbFactory.setNamespaceAware(true);
DocumentBuilder dBuilder;
Document doc;
try {
Expand All @@ -63,6 +66,11 @@ private void parseDocument() {
article.setArticleType(typeMap.get(doc.getDocumentElement().getAttribute("article-type")));
article.setJournalMeta(parseJournalMeta(doc.getDocumentElement()));
article.setArticleMeta(parseArticleMeta(doc.getDocumentElement()));
try {
parseBack(doc.getDocumentElement(), article.getArticleMeta());
} catch (TransformerConfigurationException | TransformerFactoryConfigurationError e) {
logger.error("Could not parse back.", e);
}

}

Expand Down Expand Up @@ -98,6 +106,19 @@ private ArticleMeta parseArticleMeta(Element element) {
return meta;
}

private void parseBack(Element element, ArticleMeta meta) throws TransformerConfigurationException, TransformerFactoryConfigurationError {
NodeList backList = element.getElementsByTagName("back");
if (backList.getLength() == 0) {
return;
}
Node backNode = backList.item(0);
NodeList children = backNode.getChildNodes();

for (int i = 0; i<children.getLength(); i++) {
tagParserRegistry.parseArticleMetaTag(children.item(i), meta);
}
}

/* (non-Javadoc)
* @see edu.asu.diging.citesphere.importer.core.service.parse.impl.BibEntryIterator#next()
*/
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
package edu.asu.diging.citesphere.importer.core.service.parse.iterators;

import java.io.FileReader;
import java.io.IOException;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.opencsv.CSVParser;
import com.opencsv.CSVParserBuilder;
import com.opencsv.CSVReader;
import com.opencsv.CSVReaderBuilder;

import edu.asu.diging.citesphere.importer.core.model.BibEntry;
import edu.asu.diging.citesphere.importer.core.model.impl.ArticleMeta;
import edu.asu.diging.citesphere.importer.core.model.impl.ContainerMeta;
import edu.asu.diging.citesphere.importer.core.model.impl.Publication;
import edu.asu.diging.citesphere.importer.core.service.parse.BibEntryIterator;
import edu.asu.diging.citesphere.importer.core.service.parse.wos.tagged.IArticleWoSTagParser;

public class WoSTabDelimitedIterator implements BibEntryIterator {

private final Logger logger = LoggerFactory.getLogger(getClass());

private IArticleWoSTagParser tagParserRegistry;
private String filePath;

private CSVReader reader;
private String[] headers;

public WoSTabDelimitedIterator(String filePath, IArticleWoSTagParser parserRegistry) {
this.filePath = filePath;
this.tagParserRegistry = parserRegistry;
init();
}

private void init() {
final CSVParser parser = new CSVParserBuilder().withSeparator('\t').withIgnoreQuotations(true).build();
try {
reader = new CSVReaderBuilder(new FileReader(filePath)).withCSVParser(parser).build();
headers = reader.readNext();
} catch (IOException e) {
logger.error("Could not open CSVReader.", e);
}
}

@Override
public BibEntry next() {
ArticleMeta articleMeta = new ArticleMeta();
ContainerMeta containerMeta = new ContainerMeta();

BibEntry entry = new Publication();
entry.setArticleMeta(articleMeta);
entry.setJournalMeta(containerMeta);

String[] line;
try {
line = reader.readNext();
} catch (IOException e) {
logger.debug("Could not read next line.", e);
return null;
}

if (line != null) {
for (int i = 0; i < line.length; i++) {
// apparently some lines have additional tabs at the end
if (i >= headers.length) {
break;
}
String field = headers[i];
String value = line[i];
tagParserRegistry.parseMetaTag(field, value, null, -1, entry, true);
}
}

return entry;
}

@Override
public boolean hasNext() {
try {
return reader.peek() != null;
} catch (IOException e) {
logger.error("Could not check for next line.", e);
}
return false;
}

@Override
public void close() {
if (reader != null) {
try {
reader.close();
} catch (IOException e) {
logger.error("Couldn't close csv reader.", e);
}
}
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ public BibEntry next() {
} else {
fieldIdx = 0;
}
tagParserRegistry.parseMetaTag(field, value, previousField, fieldIdx, entry);
tagParserRegistry.parseMetaTag(field, value, previousField, fieldIdx, entry, false);

previousField = field;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@
import java.util.List;
import java.util.Map;

import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;

import edu.asu.diging.citesphere.importer.core.model.impl.Affiliation;
import edu.asu.diging.citesphere.importer.core.model.impl.ArticleMeta;
Expand All @@ -17,6 +17,9 @@
@Component
public class ContributorHandler extends TagHandler implements ArticleMetaTagHandler {

@Autowired
private ContributorHelper contributorHelper;

@Override
public String handledTag() {
return "contrib-group";
Expand All @@ -38,8 +41,7 @@ public void handle(Node node, ArticleMeta articleMeta) {
if (!names.isEmpty()) {
// there should be only one name inside a contributor tag
Node stringName = names.get(0);
NodeList nameParts = stringName.getChildNodes();
setContributorName(contributor, nameParts);
contributorHelper.setContributorData(stringName, contributor);

articleMeta.getContributors().add(contributor);
}
Expand Down Expand Up @@ -126,18 +128,5 @@ private void handleUnlinkedAffiliations(ArticleMeta articleMeta, List<Node> affs
}
}

private void setContributorName(Contributor contributor, NodeList nameParts) {
if (nameParts != null) {
for (int i = 0; i<nameParts.getLength(); i++) {
Node namePart = nameParts.item(i);
if (namePart.getNodeName().equals("given-names")) {
contributor.setGivenName(namePart.getTextContent());
continue;
}
if (namePart.getNodeName().equals("surname")) {
contributor.setSurname(namePart.getTextContent());
}
}
}
}

}
Loading

0 comments on commit 5c8109c

Please sign in to comment.