Skip to content
This repository has been archived by the owner on Mar 13, 2023. It is now read-only.

Commit

Permalink
Feature/sentences python (#97)
Browse files Browse the repository at this point in the history
* sentences python server service

* code for installing and running the biomedicus python server from biomedicus

* Some changes to word embedding and text segment break patterns.

* Added a newline to the end of a file.

* Added the ability to specify classpath via environment to the runClass.sh script.

* Updated Kotlin version.

* Updated how settings paths are handled.

* Sentence detector that uses python server working.

* Use words list to get indices instead of full vector model.

* Compiles against JDK8-11

* Updated to use the new base pom.

* Started working on newt sentences client

* Some fixes that were done for the sentences project.

* Update to new v of biomedicus-base

* Removed unused/unfinished files
  • Loading branch information
benknoll-umn authored Jan 2, 2019
1 parent 30accf2 commit 7a5bd41
Show file tree
Hide file tree
Showing 79 changed files with 1,102 additions and 2,930 deletions.
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,4 @@ target/
*.orig
./distribution/src/main/resources/logs/biomedicus.log
distribution/src/main/resources/logs/biomedicus.log
~$*
~$*
3 changes: 3 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
dist: trusty
sudo: false
language: java
jdk:
- oraclejdk8
- oraclejdk9
- openjdk8
branches:
only:
Expand Down
38 changes: 37 additions & 1 deletion biomedicus-core/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,9 @@
<description>Core NLP and analysis functionality</description>

<properties>
<jackson.version>2.9.5</jackson.version>
<jackson.version>2.9.7</jackson.version>
<guice.version>4.2.0</guice.version>
<grpc.version>1.15.1</grpc.version>
</properties>

<build>
Expand Down Expand Up @@ -61,6 +62,31 @@
<artifactId>guice-multibindings</artifactId>
<version>${guice.version}</version>
</dependency>
<dependency>
<groupId>io.grpc</groupId>
<artifactId>grpc-netty-shaded</artifactId>
<version>${grpc.version}</version>
</dependency>
<dependency>
<groupId>io.grpc</groupId>
<artifactId>grpc-protobuf</artifactId>
<version>${grpc.version}</version>
</dependency>
<dependency>
<groupId>io.grpc</groupId>
<artifactId>grpc-stub</artifactId>
<version>${grpc.version}</version>
</dependency>
<dependency>
<groupId>com.google.code.gson</groupId>
<artifactId>gson</artifactId>
<version>2.8.5</version>
</dependency>
<dependency>
<groupId>com.squareup.okhttp3</groupId>
<artifactId>okhttp</artifactId>
<version>3.11.0</version>
</dependency>
<dependency>
<groupId>org.yaml</groupId>
<artifactId>snakeyaml</artifactId>
Expand All @@ -86,6 +112,16 @@
<artifactId>opennlp-tools</artifactId>
<version>1.8.4</version>
</dependency>
<dependency>
<groupId>org.glassfish.jersey.core</groupId>
<artifactId>jersey-client</artifactId>
<version>2.27</version>
</dependency>
<dependency>
<groupId>org.glassfish.jersey.media</groupId>
<artifactId>jersey-media-json-jackson</artifactId>
<version>2.27</version>
</dependency>
<dependency>
<groupId>edu.umn.biomedicus</groupId>
<artifactId>biomedicus-measures</artifactId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ static class Loader extends DataLoader<AcronymExpansionsModel> {
private final Path expansionsModelPath;

@Inject
Loader(@Setting("acronym.expansionsModel.path") Path expansionsModelPath) {
Loader(@Setting("acronym.expansionsModel.asDataPath") Path expansionsModelPath) {
this.expansionsModelPath = expansionsModelPath;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -268,8 +268,8 @@ static class Loader extends DataLoader<AcronymVectorModel> {
public Loader(
@Nullable Provider<AlignmentModel> alignmentModel,
@Setting("acronym.useAlignment") Boolean useAlignment,
@Setting("acronym.vector.model.path") Path vectorSpacePath,
@Setting("acronym.senseMap.path") Path senseMapPath,
@Setting("acronym.vector.model.asDataPath") Path vectorSpacePath,
@Setting("acronym.senseMap.senseVectors.asDataPath") Path senseMapPath,
@Setting("acronym.senseMap.inMemory") Boolean sensesInMemory,
@Setting("acronym.cutoffScore") Double cutoffScore,
AcronymExpansionsModel expansionsModel
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -260,7 +260,7 @@ static class Loader extends DataLoader<AlignmentModel> {
private final Path modelPath;

@Inject
public Loader(@Setting("acronym.alignmentModel.path") Path modelPath) {
public Loader(@Setting("acronym.alignmentModel.asDataPath") Path modelPath) {
this.modelPath = modelPath;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,7 @@ static class Loader extends DataLoader<OrthographicAcronymModel> {
private IndexMap<Character> symbols;

@Inject
Loader(@Setting("acronym.orthographicModel.path") Path orthographicModel) {
Loader(@Setting("acronym.orthographicModel.asDataPath") Path orthographicModel) {
this.orthographicModel = orthographicModel;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -102,10 +102,10 @@ public class ConceptDictionaryBuilder {

@Inject
ConceptDictionaryBuilder(
@Setting("concepts.filters.sui.path") Path filteredSuisPath,
@Setting("concepts.filters.cui.path") Path filteredCuisPath,
@Setting("concepts.filters.suicui.path") Path filteredSuiCuisPath,
@Setting("concepts.filters.tui.path") Path filteredTuisPath,
@Setting("concepts.filters.sui.asDataPath") Path filteredSuisPath,
@Setting("concepts.filters.cui.asDataPath") Path filteredCuisPath,
@Setting("concepts.filters.suicui.asDataPath") Path filteredSuiCuisPath,
@Setting("concepts.filters.tui.asDataPath") Path filteredTuisPath,
Vocabulary vocabulary
) throws IOException {
Pattern splitter = Pattern.compile(",");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ public class ConceptDictionaryLoader extends DataLoader<ConceptDictionary> {
private final boolean inMemory;

@Inject
ConceptDictionaryLoader(@Setting("concepts.db.path") Path dbPath,
ConceptDictionaryLoader(@Setting("concepts.db.asDataPath") Path dbPath,
@Setting("concepts.inMemory") boolean inMemory) {
this.dbPath = dbPath;
this.inMemory = inMemory;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,10 @@ public class SemanticTypeNetworkLoader extends DataLoader<SemanticTypeNetwork> {
private final Path semgroupsPath;

@Inject
public SemanticTypeNetworkLoader(@Setting("semanticNetwork.srdef.path") Path srdefPath,
@Setting("semanticNetwork.semgroups.path") Path semgroupsPath) {
public SemanticTypeNetworkLoader(
@Setting("semanticNetwork.srdef.asDataPath") Path srdefPath,
@Setting("semanticNetwork.semgroups.asDataPath") Path semgroupsPath
) {
this.srdefPath = srdefPath;
this.semgroupsPath = semgroupsPath;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ public ExampleJavaResource(List<String> values) {
}

@Inject
public ExampleJavaResource(@Setting("example.valuesFile") Path valuesFile) throws IOException {
public ExampleJavaResource(@Setting("example.valuesFile.asDataPath") Path valuesFile) throws IOException {
this(Files.readAllLines(valuesFile));
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1403,12 +1403,24 @@ State enterLoop(DefaultSearcher search, State state) {
if (0 < min) {
search.locals[countLocal] = 1;
result = body.search(search, state);
State nextResult = next.search(search, result);
if (nextResult.isMiss()) {
return nextResult;
}
return nextResult.setBegin(result.begin);
} else if (0 < max) {
search.locals[countLocal] = 1;
result = body.search(search, state);
State nextResult = next.search(search, state);
if (result.isMiss() || nextResult.begin < result.begin) {
result = nextResult;
} else {
nextResult = next.search(search, result);
if (!nextResult.isMiss()) {
result = nextResult.setBegin(result.begin);
} else {
result = nextResult;
}
}
} else {
search.locals[countLocal] = 1;
Expand Down Expand Up @@ -1444,7 +1456,7 @@ State search(DefaultSearcher search, State state) {
State result = body.search(search, state);
if (result.isMiss()) {
search.locals[countLocal] = count;
return next.search(search, state);
return state;
} else {
return result;
}
Expand All @@ -1456,7 +1468,7 @@ State search(DefaultSearcher search, State state) {
return State.miss();
} else {
search.locals[countLocal] = count;
return next.search(search, state);
return state;
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,19 +21,21 @@
import com.google.inject.Key;
import com.google.inject.name.Named;
import edu.umn.biomedicus.annotations.Setting;

import java.lang.annotation.Annotation;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.HashMap;
import java.util.Map;
import java.util.function.Function;
import javax.annotation.Nullable;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
* Takes dictionaries of settings (usually loaded from configuration files, and turns them into
* a Map of Guice keys so that they can be bound for injection.
* Takes dictionaries of settings (usually loaded from configuration files, and turns them into a
* Map of Guice keys so that they can be bound for injection.
*
* @author Ben Knoll
* @since 1.5.0
Expand Down Expand Up @@ -78,11 +80,35 @@ void addAll(Map<String, ?> settingsMap) {

if (value instanceof Map) {
throw new IllegalStateException("Maps should already be collapsed at this point.");
} else if (value instanceof String && endsWithPathFileDir(key)) {
Path path = absoluteOrResolveAgainstData(Paths.get((String) value));
settings.putIfAbsent(Key.get(Path.class, annotationFunction.apply(key)), path);
settings.putIfAbsent(Key.get(String.class, annotationFunction.apply(key)), path.toString());
settings.putIfAbsent(Key.get(String.class, annotationFunction.apply(key + ".orig")), value);
} else if (value instanceof String) {
settings.putIfAbsent(
Key.get(String.class, annotationFunction.apply(key)),
value
);
Path path = Paths.get((String) value);
settings.putIfAbsent(
Key.get(Path.class, annotationFunction.apply(key + ".asPath")),
path
);
if (!path.isAbsolute()) {
settings.putIfAbsent(
Key.get(Path.class, annotationFunction.apply(key + ".asDataPath")),
dataPath.resolve(path)
);
settings.putIfAbsent(
Key.get(String.class, annotationFunction.apply(key + ".asDataPath")),
dataPath.resolve(path).toString()
);
} else {
settings.putIfAbsent(
Key.get(Path.class, annotationFunction.apply(key + ".asDataPath")),
path
);
settings.putIfAbsent(
Key.get(String.class, annotationFunction.apply(key + ".asDataPath")),
path.toString()
);
}
} else {
addSetting(key, value, value.getClass());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ public class PTBArtifactSource implements ArtifactSource {

@Inject
PTBArtifactSource(
@ComponentSetting("inputDirectory") Path directoryPath,
@ComponentSetting("inputDirectory.asPath") Path directoryPath,
@ComponentSetting("extension") String extension,
@ComponentSetting("charsetName") String charsetName,
@ComponentSetting("documentName") String documentName
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ public class PtbTagsWriter implements DocumentTask {

@Inject
public PtbTagsWriter(
@ComponentSetting("writer.ptbTags.outputDir.path") Path outputDir
@ComponentSetting("writer.ptbTags.outputDir.asPath") Path outputDir
) {
this.outputDir = outputDir;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ public class TextFilesArtifactSource implements ArtifactSource {

@Inject
TextFilesArtifactSource(
@ComponentSetting("inputDirectory.orig") String directoryPath,
@ComponentSetting("inputDirectory") String directoryPath,
@ComponentSetting("extension") String extension,
@ComponentSetting("charsetName") String charsetName,
@ComponentSetting("documentName") String documentName
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ public class NumberContextWriter implements DocumentTask {

@Inject
public NumberContextWriter(
@ComponentSetting("outputDirectory.orig") Path outputDirectory,
@ComponentSetting("outputDirectory.asPath") Path outputDirectory,
@ComponentSetting("contextSize") Integer contextSize
) {
this.outputDirectory = outputDirectory;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,8 @@ public class NumberModelLoader extends DataLoader<NumberModel> {

@Inject
public NumberModelLoader(
@Setting("measures.numbers.nrnumPath") Path nrnumPath,
@Setting("measures.numbers.nrvarPath") Path nrvarPath
@Setting("measures.numbers.nrnum.asDataPath") Path nrnumPath,
@Setting("measures.numbers.nrvar.asDataPath") Path nrvarPath
) {
this.nrnumPath = nrnumPath;
this.nrvarPath = nrvarPath;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ public final class NormalizerModelLoader extends DataLoader<NormalizerModel> {
private final boolean inMemory;

@Inject
NormalizerModelLoader(@Setting("normalization.db.path") Path dbPath,
NormalizerModelLoader(@Setting("normalization.db.asDataPath") Path dbPath,
@Setting("normalization.inMemory") boolean inMemory) {
this.dbPath = dbPath;
this.inMemory = inMemory;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ public class RuleBasedSectionHeaderDetector implements DocumentsProcessor {
*/
@Inject
RuleBasedSectionHeaderDetector(
@ComponentSetting("sections.headers.path") Path path
@ComponentSetting("sections.headers.asDataPath") Path path
) throws BiomedicusException {
headers = Patterns.loadPatternByJoiningLines(path);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ public static class Loader extends DataLoader<ONLPSentenceModel> {
private final Path path;

@Inject
public Loader(@Setting("opennlp.sentence.model.path") Path path) {
public Loader(@Setting("opennlp.sentence.model.asDataPath") Path path) {
this.path = path;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ public class ONLPSentenceTrainer implements ArtifactsProcessor {
@Inject
ONLPSentenceTrainer(
AcronymExpansionsModel acronymExpansionsModel,
@ComponentSetting("outputDirectory.orig") Path outputPath,
@ComponentSetting("outputDirectory.asPath") Path outputPath,
@ComponentSetting("documentName") String documentName,
@ComponentSetting("eosChars") String eosChars,
@ComponentSetting("useTokenEnd") Boolean useTokenEnd,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ public static class Loader extends DataLoader<StopwordsModel> {
private final Path stopwordsPath;

@Inject
public Loader(@Setting("stopwords.fileBased.path") Path stopwordsPath) {
public Loader(@Setting("stopwords.fileBased.asDataPath") Path stopwordsPath) {
this.stopwordsPath = stopwordsPath;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ public final class SyntaxnetParser implements DocumentTask {

@Inject
SyntaxnetParser(
@Setting("syntaxnet.installationDir.path") Path installationDir,
@Setting("syntaxnet.installationDir.asPath") Path installationDir,
@Setting("syntaxnet.modelDir") String modelDirString
) {
this.installationDir = installationDir;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ class RocksDbDataStoreFactory implements DataStoreFactory, LifecycleManaged {
private Path dbPath;

@Inject
public RocksDbDataStoreFactory(@Setting("tnt.word.dbPath") Path dbPath,
public RocksDbDataStoreFactory(@Setting("tnt.word.db.asDataPath") Path dbPath,
@Setting("tnt.word.inMemory") boolean inMemory) {
this.dbPath = dbPath;
this.inMemory = inMemory;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ public class TnTTrainerProcessor implements ArtifactsProcessor {
@Inject
TnTTrainerProcessor(
@ComponentSetting("tnt.train.viewName") String viewName,
@ComponentSetting("tnt.train.outputDir") Path outputDir,
@ComponentSetting("tnt.train.outputDir.asPath") Path outputDir,
DataStoreFactory dataStoreFactory
) {
this.viewName = viewName;
Expand Down
Loading

0 comments on commit 7a5bd41

Please sign in to comment.