Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add PMR ETL steps to prepare kinship files for import and trigger import #150

Merged
merged 5 commits into from
Mar 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions PMR/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ dependencies {
BuildUtils.addLabKeyDependency(project: project, config: "modules", depProjectPath: ":server:modules:DiscvrLabKeyModules:Studies", depProjectConfig: "published", depExtension: "module")
BuildUtils.addLabKeyDependency(project: project, config: "modules", depProjectPath: ":server:modules:dataintegration", depProjectConfig: "published", depExtension: "module")
BuildUtils.addLabKeyDependency(project: project, config: "modules", depProjectPath: ":server:modules:DiscvrLabKeyModules:discvrcore", depProjectConfig: "published", depExtension: "module")
BuildUtils.addLabKeyDependency(project: project, config: "modules", depProjectPath: ":server:modules:onprcEHRModules:GeneticsCore", depProjectConfig: "published", depExtension: "module")

BuildUtils.addLabKeyDependency(project: project, config: "implementation", depProjectPath: ":server:modules:DiscvrLabKeyModules:Studies", depProjectConfig: "apiJarFile")
BuildUtils.addLabKeyDependency(project: project, config: "implementation", depProjectPath: ":server:modules:dataintegration", depProjectConfig: "apiJarFile")
Expand Down
14 changes: 14 additions & 0 deletions PMR/resources/etls/KinshipDataStaging.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
<?xml version="1.0" encoding="UTF-8"?>
<etl xmlns="http://labkey.org/etl/xml">
<name>KinshipDataStaging</name>
<description>Prepare PRIMe-seq Kinship Data for Import into PRIMe</description>
<transforms>
<transform id="kinship" type="TaskRefTransformStep">
<taskref ref="org.labkey.pmr.etl.TriggerRemoteGeneticsImportStep">
<settings>
<setting name="remoteSource" value="EHR_ClinicalSource"/>
</settings>
</taskref>
</transform>
</transforms>
</etl>
1 change: 1 addition & 0 deletions PMR/resources/folderTypes/PMR.folderType.xml
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@
<modules>
<moduleName>PMR</moduleName>
<moduleName>EHR</moduleName>
<moduleName>GeneticsCore</moduleName>
</modules>
<defaultModule>PMR</defaultModule>
</folderType>
169 changes: 169 additions & 0 deletions PMR/src/org/labkey/pmr/etl/TriggerRemoteGeneticsImportStep.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,169 @@
package org.labkey.pmr.etl;

import org.apache.commons.io.FileUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.hc.client5.http.classic.methods.HttpPost;
import org.apache.hc.client5.http.impl.classic.CloseableHttpClient;
import org.apache.hc.core5.http.HttpResponse;
import org.apache.hc.core5.http.HttpStatus;
import org.apache.xmlbeans.XmlException;
import org.jetbrains.annotations.NotNull;
import org.labkey.api.collections.CaseInsensitiveHashMap;
import org.labkey.api.data.Container;
import org.labkey.api.data.ContainerManager;
import org.labkey.api.di.DataIntegrationService;
import org.labkey.api.di.TaskRefTask;
import org.labkey.api.module.Module;
import org.labkey.api.module.ModuleLoader;
import org.labkey.api.module.ModuleProperty;
import org.labkey.api.pipeline.PipeRoot;
import org.labkey.api.pipeline.PipelineJob;
import org.labkey.api.pipeline.PipelineJobException;
import org.labkey.api.pipeline.PipelineService;
import org.labkey.api.pipeline.RecordedActionSet;
import org.labkey.api.writer.ContainerUser;
import org.labkey.remoteapi.CommandException;
import org.labkey.remoteapi.CommandResponse;
import org.labkey.remoteapi.PostCommand;

import java.io.File;
import java.io.IOException;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Map;

public class TriggerRemoteGeneticsImportStep implements TaskRefTask
{
protected final Map<String, String> _settings = new CaseInsensitiveHashMap<>();
protected ContainerUser _containerUser;

private enum Settings
{
remoteSource()
}

@Override
public RecordedActionSet run(@NotNull PipelineJob job) throws PipelineJobException
{
// First find the last successful pipeline iteration:
Module ehr = ModuleLoader.getInstance().getModule("ehr");
Module geneticsCore = ModuleLoader.getInstance().getModule("GeneticsCore");

ModuleProperty mp = ehr.getModuleProperties().get("EHRStudyContainer");
String ehrContainerPath = StringUtils.trimToNull(mp.getEffectiveValue(_containerUser.getContainer()));
if (ehrContainerPath == null)
{
throw new PipelineJobException("EHRStudyContainer has not been set");
}

Container ehrContainer = ContainerManager.getForPath(ehrContainerPath);
if (ehrContainer == null)
{
throw new PipelineJobException("Invalid container: " + ehrContainerPath);
}

if (!_containerUser.getContainer().equals(ehrContainer))
{
throw new PipelineJobException("This ETL can only be run from the EHRStudyContainer");
}

ModuleProperty mp2 = geneticsCore.getModuleProperties().get("KinshipDataPath");
String pipeDirPath = StringUtils.trimToNull(mp2.getEffectiveValue(ehrContainer));
if (pipeDirPath == null)
{
throw new PipelineJobException("Must provide the filepath to import data using the KinshipDataPath module property");
}

File targetPipelineDir = new File(pipeDirPath);
if (!targetPipelineDir.exists())
{
targetPipelineDir.mkdirs();
}

// Then copy the file to the expected folder:
PipeRoot pr = PipelineService.get().getPipelineRootSetting(ehrContainer);
if (pr == null)
{
throw new PipelineJobException("Unable to find pipeline root for: " + ehrContainer);
}

File sourceDir = new File(pr.getRootPath(), "/kinship/EHR Kinship Calculation");
if (!sourceDir.exists())
{
throw new PipelineJobException("Unable to find source pipeline dir: " + sourceDir.getPath());
}

copyReplaceFile(sourceDir, targetPipelineDir, "kinship.txt");
copyReplaceFile(sourceDir, targetPipelineDir, "inbreeding.txt");

// Then ping the main server to import this file:
DataIntegrationService.RemoteConnection rc = DataIntegrationService.get().getRemoteConnection(_settings.get(Settings.remoteSource.name()), _containerUser.getContainer(), job.getLogger());
if (rc == null)
{
throw new PipelineJobException("Unable to find remote connection: " + _settings.get(Settings.remoteSource.name()));
}

try
{
KinshipCommand command = new KinshipCommand();
command.execute(rc.connection, rc.remoteContainer);
}
catch (CommandException | IOException e)
{
throw new PipelineJobException(e);
}

return new RecordedActionSet();
}

private static class KinshipCommand extends PostCommand<CommandResponse>
{
public KinshipCommand()
{
super("geneticscore", "importGeneticsData");
}
}

private void copyReplaceFile(File sourceDir, File targetDir, String filename) throws PipelineJobException
{
File sourceFile = new File(sourceDir, filename);
if (!sourceFile.exists())
{
throw new PipelineJobException("File does not exist: " + sourceFile.getPath());
}

File destFile = new File(targetDir, filename);
if (destFile.exists())
{
destFile.delete();
}

try
{
FileUtils.copyFile(sourceFile, destFile);
}
catch (IOException e)
{
throw new PipelineJobException(e);
}
}

@Override
public List<String> getRequiredSettings()
{
return Collections.unmodifiableList(Arrays.asList(Settings.remoteSource.name()));
}

@Override
public void setSettings(Map<String, String> settings) throws XmlException
{
_settings.putAll(settings);
}

@Override
public void setContainerUser(ContainerUser containerUser)
{
_containerUser = containerUser;
}
}
10 changes: 10 additions & 0 deletions PMR/test/sampledata/PMR/testPedigree.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
99991 999911 2 Cynomolgus
999910 99993 99998 2 Cynomolgus
99992 999912 1 Cynomolgus
99993 999912 2 Rhesus
99994 999913 999914 1 Cynomolgus
99995 99991 99992 2 Cynomolgus
99996 99991 99992 1 Cynomolgus
99997 99993 99992 1 Cynomolgus
99998 99995 99992 1 Cynomolgus
99999 99995 99994 2 Cynomolgus
Loading