-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #7 from diging/develop
Prepare release
- Loading branch information
Showing
87 changed files
with
873 additions
and
207 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
65 changes: 65 additions & 0 deletions
65
...va/edu/asu/diging/citesphere/importer/core/service/parse/impl/WoSTabDelimitedHandler.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
package edu.asu.diging.citesphere.importer.core.service.parse.impl; | ||
|
||
import java.io.File; | ||
import java.io.FileReader; | ||
import java.io.IOException; | ||
|
||
import org.slf4j.Logger; | ||
import org.slf4j.LoggerFactory; | ||
import org.springframework.beans.factory.annotation.Autowired; | ||
import org.springframework.stereotype.Service; | ||
|
||
import com.opencsv.CSVParser; | ||
import com.opencsv.CSVParserBuilder; | ||
import com.opencsv.CSVReader; | ||
import com.opencsv.CSVReaderBuilder; | ||
|
||
import edu.asu.diging.citesphere.importer.core.exception.HandlerTestException; | ||
import edu.asu.diging.citesphere.importer.core.exception.IteratorCreationException; | ||
import edu.asu.diging.citesphere.importer.core.service.impl.JobInfo; | ||
import edu.asu.diging.citesphere.importer.core.service.parse.BibEntryIterator; | ||
import edu.asu.diging.citesphere.importer.core.service.parse.FileHandler; | ||
import edu.asu.diging.citesphere.importer.core.service.parse.IHandlerRegistry; | ||
import edu.asu.diging.citesphere.importer.core.service.parse.iterators.WoSTabDelimitedIterator; | ||
import edu.asu.diging.citesphere.importer.core.service.parse.wos.tagged.IArticleWoSTagParser; | ||
|
||
@Service | ||
public class WoSTabDelimitedHandler implements FileHandler { | ||
|
||
private final Logger logger = LoggerFactory.getLogger(getClass()); | ||
|
||
@Autowired | ||
private IArticleWoSTagParser parserRegistry; | ||
|
||
@Override | ||
public boolean canHandle(String path) throws HandlerTestException { | ||
File file = new File(path); | ||
|
||
if ((path.toLowerCase().endsWith(".txt") || path.toLowerCase().endsWith(".csv")) | ||
&& !file.getName().startsWith(".")) { | ||
final CSVParser parser = new CSVParserBuilder().withSeparator('\t').withIgnoreQuotations(true).build(); | ||
try (CSVReader reader = new CSVReaderBuilder(new FileReader(path)).withCSVParser(parser) | ||
.build()) { | ||
String[] firstLine = reader.readNext(); | ||
for (String heading : firstLine) { | ||
if (heading.trim().length() != 2) { | ||
return false; | ||
} | ||
} | ||
return true; | ||
} catch (IOException e) { | ||
logger.debug("Not a CSV file.", e); | ||
return false; | ||
} | ||
} | ||
|
||
return false; | ||
} | ||
|
||
@Override | ||
public BibEntryIterator getIterator(String path, IHandlerRegistry callback, JobInfo info) | ||
throws IteratorCreationException { | ||
return new WoSTabDelimitedIterator(path, parserRegistry); | ||
} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
100 changes: 100 additions & 0 deletions
100
.../asu/diging/citesphere/importer/core/service/parse/iterators/WoSTabDelimitedIterator.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,100 @@ | ||
package edu.asu.diging.citesphere.importer.core.service.parse.iterators; | ||
|
||
import java.io.FileReader; | ||
import java.io.IOException; | ||
|
||
import org.slf4j.Logger; | ||
import org.slf4j.LoggerFactory; | ||
|
||
import com.opencsv.CSVParser; | ||
import com.opencsv.CSVParserBuilder; | ||
import com.opencsv.CSVReader; | ||
import com.opencsv.CSVReaderBuilder; | ||
|
||
import edu.asu.diging.citesphere.importer.core.model.BibEntry; | ||
import edu.asu.diging.citesphere.importer.core.model.impl.ArticleMeta; | ||
import edu.asu.diging.citesphere.importer.core.model.impl.ContainerMeta; | ||
import edu.asu.diging.citesphere.importer.core.model.impl.Publication; | ||
import edu.asu.diging.citesphere.importer.core.service.parse.BibEntryIterator; | ||
import edu.asu.diging.citesphere.importer.core.service.parse.wos.tagged.IArticleWoSTagParser; | ||
|
||
public class WoSTabDelimitedIterator implements BibEntryIterator { | ||
|
||
private final Logger logger = LoggerFactory.getLogger(getClass()); | ||
|
||
private IArticleWoSTagParser tagParserRegistry; | ||
private String filePath; | ||
|
||
private CSVReader reader; | ||
private String[] headers; | ||
|
||
public WoSTabDelimitedIterator(String filePath, IArticleWoSTagParser parserRegistry) { | ||
this.filePath = filePath; | ||
this.tagParserRegistry = parserRegistry; | ||
init(); | ||
} | ||
|
||
private void init() { | ||
final CSVParser parser = new CSVParserBuilder().withSeparator('\t').withIgnoreQuotations(true).build(); | ||
try { | ||
reader = new CSVReaderBuilder(new FileReader(filePath)).withCSVParser(parser).build(); | ||
headers = reader.readNext(); | ||
} catch (IOException e) { | ||
logger.error("Could not open CSVReader.", e); | ||
} | ||
} | ||
|
||
@Override | ||
public BibEntry next() { | ||
ArticleMeta articleMeta = new ArticleMeta(); | ||
ContainerMeta containerMeta = new ContainerMeta(); | ||
|
||
BibEntry entry = new Publication(); | ||
entry.setArticleMeta(articleMeta); | ||
entry.setJournalMeta(containerMeta); | ||
|
||
String[] line; | ||
try { | ||
line = reader.readNext(); | ||
} catch (IOException e) { | ||
logger.debug("Could not read next line.", e); | ||
return null; | ||
} | ||
|
||
if (line != null) { | ||
for (int i = 0; i < line.length; i++) { | ||
// apparently some lines have additional tabs at the end | ||
if (i >= headers.length) { | ||
break; | ||
} | ||
String field = headers[i]; | ||
String value = line[i]; | ||
tagParserRegistry.parseMetaTag(field, value, null, -1, entry, true); | ||
} | ||
} | ||
|
||
return entry; | ||
} | ||
|
||
@Override | ||
public boolean hasNext() { | ||
try { | ||
return reader.peek() != null; | ||
} catch (IOException e) { | ||
logger.error("Could not check for next line.", e); | ||
} | ||
return false; | ||
} | ||
|
||
@Override | ||
public void close() { | ||
if (reader != null) { | ||
try { | ||
reader.close(); | ||
} catch (IOException e) { | ||
logger.error("Couldn't close csv reader.", e); | ||
} | ||
} | ||
} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.