Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update logic around PolyPhen #142

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions mGAP/resources/etls/prime-seq.xml
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,8 @@
<column>sitesOnlyVcfId/name</column>
<column>novelSitesVcfId/dataid/DataFileUrl</column>
<column>novelSitesVcfId/name</column>
<column>luceneIndex/dataid/DataFileUrl</column>
<column>luceneIndex/name</column>
<column>humanJbrowseId</column>
<column>objectId</column>
</sourceColumns>
Expand All @@ -119,6 +121,9 @@
<column source="variantTable/dataid/DataFileUrl" target="variantTable" transformClass="org.labkey.mgap.columnTransforms.OutputFileTransform" />
<column source="sitesOnlyVcfId/dataid/DataFileUrl" target="sitesOnlyVcfId" transformClass="org.labkey.mgap.columnTransforms.OutputFileTransform" />
<column source="novelSitesVcfId/dataid/DataFileUrl" target="novelSitesVcfId" transformClass="org.labkey.mgap.columnTransforms.OutputFileTransform" />
<column source="luceneIndex/dataid/DataFileUrl" target="novelSitesVcfId" transformClass="org.labkey.mgap.columnTransforms.OutputFileTransform">
<>
</column>
<column source="jbrowseId" transformClass="org.labkey.mgap.columnTransforms.JBrowseSessionTransform"/>
<column source="liftedVcfId/dataid/DataFileUrl" target="liftedVcfId" transformClass="org.labkey.mgap.columnTransforms.LiftedVcfTransform" />
<column source="humanJbrowseId" transformClass="org.labkey.mgap.columnTransforms.JBrowseHumanSessionTransform"/>
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
ALTER TABLE mGAP.variantCatalogReleases ADD luceneIndex int;
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
ALTER TABLE mGAP.variantCatalogReleases ADD luceneIndex int;
9 changes: 9 additions & 0 deletions mGAP/resources/schemas/mgap.xml
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,15 @@
<fkColumnName>RowId</fkColumnName>
</fk>
</column>
<column columnName="luceneIndex">
<columnTitle>Lucene Index File</columnTitle>
<nullable>true</nullable>
<fk>
<fkDbSchema>sequenceanalysis</fkDbSchema>
<fkTable>outputfiles</fkTable>
<fkColumnName>rowid</fkColumnName>
</fk>
</column>
<column columnName="totalSubjects">
<columnTitle>Total Subjects</columnTitle>
</column>
Expand Down
4 changes: 3 additions & 1 deletion mGAP/src/org/labkey/mgap/mGAPModule.java
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
import org.labkey.mgap.pipeline.AnnotationStep;
import org.labkey.mgap.pipeline.GenerateMgapTracksStep;
import org.labkey.mgap.pipeline.GroupCompareStep;
import org.labkey.mgap.pipeline.IndexVariantsForMgapStep;
import org.labkey.mgap.pipeline.RemoveAnnotationsForMgapStep;
import org.labkey.mgap.pipeline.RemoveAnnotationsStep;
import org.labkey.mgap.pipeline.RenameSamplesForMgapStep;
Expand All @@ -71,7 +72,7 @@ public String getName()
@Override
public Double getSchemaVersion()
{
return 16.69;
return 16.70;
}

@Override
Expand Down Expand Up @@ -132,6 +133,7 @@ public PipelineStartup()
SequencePipelineService.get().registerPipelineStep(new SampleSpecificGenotypeFiltrationStep.Provider());
SequencePipelineService.get().registerPipelineStep(new mGapReleaseAnnotateNovelSitesStep.Provider());
SequencePipelineService.get().registerPipelineStep(new GenerateMgapTracksStep.Provider());
SequencePipelineService.get().registerPipelineStep(new IndexVariantsForMgapStep.Provider());

_hasRegistered = true;
}
Expand Down
86 changes: 86 additions & 0 deletions mGAP/src/org/labkey/mgap/pipeline/IndexVariantsForMgapStep.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
package org.labkey.mgap.pipeline;

import htsjdk.samtools.util.Interval;
import org.apache.commons.lang3.StringUtils;
import org.labkey.api.data.SimpleFilter;
import org.labkey.api.data.TableSelector;
import org.labkey.api.jbrowse.JBrowseService;
import org.labkey.api.pipeline.PipelineJob;
import org.labkey.api.pipeline.PipelineJobException;
import org.labkey.api.query.FieldKey;
import org.labkey.api.query.QueryService;
import org.labkey.api.sequenceanalysis.SequenceOutputFile;
import org.labkey.api.sequenceanalysis.pipeline.AbstractVariantProcessingStepProvider;
import org.labkey.api.sequenceanalysis.pipeline.PipelineContext;
import org.labkey.api.sequenceanalysis.pipeline.PipelineStepProvider;
import org.labkey.api.sequenceanalysis.pipeline.ReferenceGenome;
import org.labkey.api.sequenceanalysis.pipeline.SequenceAnalysisJobSupport;
import org.labkey.api.sequenceanalysis.pipeline.ToolParameterDescriptor;
import org.labkey.api.sequenceanalysis.pipeline.VariantProcessingStep;
import org.labkey.api.sequenceanalysis.pipeline.VariantProcessingStepOutputImpl;
import org.labkey.api.sequenceanalysis.run.AbstractCommandPipelineStep;
import org.labkey.api.sequenceanalysis.run.SelectVariantsWrapper;
import org.labkey.api.util.PageFlowUtil;
import org.labkey.mgap.mGAPSchema;

import javax.annotation.Nullable;
import java.io.File;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

public class IndexVariantsForMgapStep extends AbstractCommandPipelineStep<SelectVariantsWrapper> implements VariantProcessingStep
{
public static final String CATEGORY = "mGAP Lucene Index";

public IndexVariantsForMgapStep(PipelineStepProvider<?> provider, PipelineContext ctx)
{
super(provider, ctx, new SelectVariantsWrapper(ctx.getLogger()));
}

public static class Provider extends AbstractVariantProcessingStepProvider<IndexVariantsForMgapStep> implements SupportsScatterGather
{
public Provider()
{
super("IndexVariantsForMgapStep", "Index VCF for mGAP", "DISCVR-seq", "Create a lucene index for the selected fields, using the fields for mGAP", Arrays.asList(
ToolParameterDescriptor.create("allowLenientProcessing", "Allow Lenient Processing", "If selected, many error types will be logged but ignored.", "checkbox", null, false)
), null, "https://github.com/BimberLab/DISCVRSeq");
}

@Override
public IndexVariantsForMgapStep create(PipelineContext ctx)
{
return new IndexVariantsForMgapStep(this, ctx);
}
}

@Override
public void init(PipelineJob job, SequenceAnalysisJobSupport support, List<SequenceOutputFile> inputFiles) throws PipelineJobException
{
ArrayList<String> infoFields = new TableSelector(QueryService.get().getUserSchema(job.getUser(), job.getContainer(), mGAPSchema.NAME).getTable(mGAPSchema.TABLE_VARIANT_ANNOTATIONS), PageFlowUtil.set("infoKey"), new SimpleFilter(FieldKey.fromString("isIndexed"), true), null).getArrayList(String.class);
support.cacheObject("INFO_FIELDS", StringUtils.join(infoFields, ";"));
}

@Override
public Output processVariants(File inputVCF, File outputDirectory, ReferenceGenome genome, @Nullable List<Interval> intervals) throws PipelineJobException
{
VariantProcessingStepOutputImpl output = new VariantProcessingStepOutputImpl();

String infoFieldsRaw = getPipelineCtx().getSequenceSupport().getCachedObject("INFO_FIELDS", String.class);
List<String> infoFields = Arrays.stream(infoFieldsRaw.split(";")).sorted().toList();
boolean allowLenientProcessing = getProvider().getParameterByName("allowLenientProcessing").extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), Boolean.class, false);

File indexDir = new File(outputDirectory, "lucene");
JBrowseService.get().prepareLuceneIndex(inputVCF, indexDir, getPipelineCtx().getLogger(), infoFields, allowLenientProcessing);

File idx = new File(indexDir, "write.lock");
if (!idx.exists())
{
throw new PipelineJobException("Unable to find file: " + idx.getPath());
}

output.addSequenceOutput(idx, "mGAP Lucene index: " + inputVCF.getName(), CATEGORY, null, null, genome.getGenomeId(), "Fields indexed: " + infoFieldsRaw);

return output;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -1287,7 +1287,7 @@ private void inspectAndSummarizeVcf(JobContext ctx, File vcfInput, GeneToNameTra
alleleIdx++;

String prediction = polyphenPredictions.get(alleleIdx);
if (StringUtils.isEmpty(prediction) || "B".equals(prediction) || "P".equals(prediction) || ".".equals(prediction))
if (!prediction.contains("D"))
{
continue;
}
Expand Down