Skip to content

Commit

Permalink
Add PMR ETL steps to prepare kinship files for import and trigger imp…
Browse files Browse the repository at this point in the history
…ort (#150)

* Add PMR ETL steps to prepare kinship files for import and trigger import
  • Loading branch information
bbimber authored Mar 25, 2024
1 parent cb5ce6b commit 74bc2b5
Show file tree
Hide file tree
Showing 6 changed files with 330 additions and 3 deletions.
1 change: 1 addition & 0 deletions PMR/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ dependencies {
BuildUtils.addLabKeyDependency(project: project, config: "modules", depProjectPath: ":server:modules:DiscvrLabKeyModules:Studies", depProjectConfig: "published", depExtension: "module")
BuildUtils.addLabKeyDependency(project: project, config: "modules", depProjectPath: ":server:modules:dataintegration", depProjectConfig: "published", depExtension: "module")
BuildUtils.addLabKeyDependency(project: project, config: "modules", depProjectPath: ":server:modules:DiscvrLabKeyModules:discvrcore", depProjectConfig: "published", depExtension: "module")
BuildUtils.addLabKeyDependency(project: project, config: "modules", depProjectPath: ":server:modules:onprcEHRModules:GeneticsCore", depProjectConfig: "published", depExtension: "module")

BuildUtils.addLabKeyDependency(project: project, config: "implementation", depProjectPath: ":server:modules:DiscvrLabKeyModules:Studies", depProjectConfig: "apiJarFile")
BuildUtils.addLabKeyDependency(project: project, config: "implementation", depProjectPath: ":server:modules:dataintegration", depProjectConfig: "apiJarFile")
Expand Down
14 changes: 14 additions & 0 deletions PMR/resources/etls/KinshipDataStaging.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
<?xml version="1.0" encoding="UTF-8"?>
<etl xmlns="http://labkey.org/etl/xml">
<name>KinshipDataStaging</name>
<description>Prepare PRIMe-seq Kinship Data for Import into PRIMe</description>
<transforms>
<transform id="kinship" type="TaskRefTransformStep">
<taskref ref="org.labkey.pmr.etl.TriggerRemoteGeneticsImportStep">
<settings>
<setting name="remoteSource" value="EHR_ClinicalSource"/>
</settings>
</taskref>
</transform>
</transforms>
</etl>
1 change: 1 addition & 0 deletions PMR/resources/folderTypes/PMR.folderType.xml
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@
<modules>
<moduleName>PMR</moduleName>
<moduleName>EHR</moduleName>
<moduleName>GeneticsCore</moduleName>
</modules>
<defaultModule>PMR</defaultModule>
</folderType>
169 changes: 169 additions & 0 deletions PMR/src/org/labkey/pmr/etl/TriggerRemoteGeneticsImportStep.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,169 @@
package org.labkey.pmr.etl;

import org.apache.commons.io.FileUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.hc.client5.http.classic.methods.HttpPost;
import org.apache.hc.client5.http.impl.classic.CloseableHttpClient;
import org.apache.hc.core5.http.HttpResponse;
import org.apache.hc.core5.http.HttpStatus;
import org.apache.xmlbeans.XmlException;
import org.jetbrains.annotations.NotNull;
import org.labkey.api.collections.CaseInsensitiveHashMap;
import org.labkey.api.data.Container;
import org.labkey.api.data.ContainerManager;
import org.labkey.api.di.DataIntegrationService;
import org.labkey.api.di.TaskRefTask;
import org.labkey.api.module.Module;
import org.labkey.api.module.ModuleLoader;
import org.labkey.api.module.ModuleProperty;
import org.labkey.api.pipeline.PipeRoot;
import org.labkey.api.pipeline.PipelineJob;
import org.labkey.api.pipeline.PipelineJobException;
import org.labkey.api.pipeline.PipelineService;
import org.labkey.api.pipeline.RecordedActionSet;
import org.labkey.api.writer.ContainerUser;
import org.labkey.remoteapi.CommandException;
import org.labkey.remoteapi.CommandResponse;
import org.labkey.remoteapi.PostCommand;

import java.io.File;
import java.io.IOException;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Map;

public class TriggerRemoteGeneticsImportStep implements TaskRefTask
{
protected final Map<String, String> _settings = new CaseInsensitiveHashMap<>();
protected ContainerUser _containerUser;

private enum Settings
{
remoteSource()
}

@Override
public RecordedActionSet run(@NotNull PipelineJob job) throws PipelineJobException
{
// First find the last successful pipeline iteration:
Module ehr = ModuleLoader.getInstance().getModule("ehr");
Module geneticsCore = ModuleLoader.getInstance().getModule("GeneticsCore");

ModuleProperty mp = ehr.getModuleProperties().get("EHRStudyContainer");
String ehrContainerPath = StringUtils.trimToNull(mp.getEffectiveValue(_containerUser.getContainer()));
if (ehrContainerPath == null)
{
throw new PipelineJobException("EHRStudyContainer has not been set");
}

Container ehrContainer = ContainerManager.getForPath(ehrContainerPath);
if (ehrContainer == null)
{
throw new PipelineJobException("Invalid container: " + ehrContainerPath);
}

if (!_containerUser.getContainer().equals(ehrContainer))
{
throw new PipelineJobException("This ETL can only be run from the EHRStudyContainer");
}

ModuleProperty mp2 = geneticsCore.getModuleProperties().get("KinshipDataPath");
String pipeDirPath = StringUtils.trimToNull(mp2.getEffectiveValue(ehrContainer));
if (pipeDirPath == null)
{
throw new PipelineJobException("Must provide the filepath to import data using the KinshipDataPath module property");
}

File targetPipelineDir = new File(pipeDirPath);
if (!targetPipelineDir.exists())
{
targetPipelineDir.mkdirs();
}

// Then copy the file to the expected folder:
PipeRoot pr = PipelineService.get().getPipelineRootSetting(ehrContainer);
if (pr == null)
{
throw new PipelineJobException("Unable to find pipeline root for: " + ehrContainer);
}

File sourceDir = new File(pr.getRootPath(), "/kinship/EHR Kinship Calculation");
if (!sourceDir.exists())
{
throw new PipelineJobException("Unable to find source pipeline dir: " + sourceDir.getPath());
}

copyReplaceFile(sourceDir, targetPipelineDir, "kinship.txt");
copyReplaceFile(sourceDir, targetPipelineDir, "inbreeding.txt");

// Then ping the main server to import this file:
DataIntegrationService.RemoteConnection rc = DataIntegrationService.get().getRemoteConnection(_settings.get(Settings.remoteSource.name()), _containerUser.getContainer(), job.getLogger());
if (rc == null)
{
throw new PipelineJobException("Unable to find remote connection: " + _settings.get(Settings.remoteSource.name()));
}

try
{
KinshipCommand command = new KinshipCommand();
command.execute(rc.connection, rc.remoteContainer);
}
catch (CommandException | IOException e)
{
throw new PipelineJobException(e);
}

return new RecordedActionSet();
}

private static class KinshipCommand extends PostCommand<CommandResponse>
{
public KinshipCommand()
{
super("geneticscore", "importGeneticsData");
}
}

private void copyReplaceFile(File sourceDir, File targetDir, String filename) throws PipelineJobException
{
File sourceFile = new File(sourceDir, filename);
if (!sourceFile.exists())
{
throw new PipelineJobException("File does not exist: " + sourceFile.getPath());
}

File destFile = new File(targetDir, filename);
if (destFile.exists())
{
destFile.delete();
}

try
{
FileUtils.copyFile(sourceFile, destFile);
}
catch (IOException e)
{
throw new PipelineJobException(e);
}
}

@Override
public List<String> getRequiredSettings()
{
return Collections.unmodifiableList(Arrays.asList(Settings.remoteSource.name()));
}

@Override
public void setSettings(Map<String, String> settings) throws XmlException
{
_settings.putAll(settings);
}

@Override
public void setContainerUser(ContainerUser containerUser)
{
_containerUser = containerUser;
}
}
10 changes: 10 additions & 0 deletions PMR/test/sampledata/PMR/testPedigree.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
99991 999911 2 Cynomolgus
999910 99993 99998 2 Cynomolgus
99992 999912 1 Cynomolgus
99993 999912 2 Rhesus
99994 999913 999914 1 Cynomolgus
99995 99991 99992 2 Cynomolgus
99996 99991 99992 1 Cynomolgus
99997 99993 99992 1 Cynomolgus
99998 99995 99992 1 Cynomolgus
99999 99995 99994 2 Cynomolgus
Loading

0 comments on commit 74bc2b5

Please sign in to comment.