Skip to content

Commit

Permalink
Cleaned up code and made all tests pass
Browse files Browse the repository at this point in the history
  • Loading branch information
nbollis committed Jan 9, 2025
1 parent c736691 commit 2dc2073
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 38 deletions.
21 changes: 12 additions & 9 deletions MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs
Original file line number Diff line number Diff line change
Expand Up @@ -1026,9 +1026,10 @@ private void WritePrunedDatabase()
// find all biopolymers that have at least one confident PSM and their confident localized modifications -> Used for determining which mods to retain
var proteinToConfidentModifiedSequences = GetProteinToConfidentModifiedSequences(Parameters.AllPsms, Parameters.SearchParameters.EvidenceRequiredToWriteLocalizedMod, Parameters.SearchParameters.IncludeProteinAmbiguous);

if (proteinToConfidentBaseSequences.Count == 0) return;

// populate the protein object with the desired modifications with a modify in place operation, original modifications are stored for later restoration
UpdateProteinModifications(proteinToConfidentModifiedSequences, out var proteinsOriginalModifications, out var originalSequenceVariantModifications);

WriteDatabases(proteinToConfidentBaseSequences);

// Restore Original Modifications with a modify in place operation
Expand Down Expand Up @@ -1142,15 +1143,15 @@ public Dictionary<IBioPolymer, List<IBioPolymerWithSetMods>> GetProteinToConfide
if (modAndLocationGrouped.Count() <= 1)
continue;

var dissociationTypeCount = modAndLocationGrouped.GroupBy(p => p.dissociationType).Count();
var digestionAgentCount = modAndLocationGrouped.GroupBy(p => p.digestionAgent).Count();
var dissociationAndDigestionCount = modAndLocationGrouped.GroupBy(p => (p.dissociationType, p.digestionAgent)).Count();

// TODO: Ask Claire about truncy bois. Right now they dont count for anything as they are not true missed cleavages.
var missedCleavageCount = modAndLocationGrouped.GroupBy(p => p.dissociationType)
.Sum(dissGroup => dissGroup.GroupBy(p => p.missedCleavages).Count() - 1);

var conditionCount = dissociationTypeCount + digestionAgentCount + missedCleavageCount - 2;
// missed cleavages should only count if they occur with the same digestion agent
int missedCleavageCount = modAndLocationGrouped.GroupBy(p => p.digestionAgent)
.Sum(dissGroup =>
dissGroup.Select(p => p.missedCleavages).Distinct().Count() - 1);

var conditionCount = dissociationAndDigestionCount + missedCleavageCount;
if (conditionCount >= evidenceRequired)
modificationsToRetain.Add(modAndLocationGrouped.Key);
}
Expand Down Expand Up @@ -1182,17 +1183,19 @@ public Dictionary<IBioPolymer, List<IBioPolymerWithSetMods>> GetProteinToConfide
.Count(mod => !modificationsToRetain.Contains((mod.Key - covGroup.BioPolymerWithSetMods.OneBasedStartResidue + 1, mod.Value))))
.ToList();


// iterate through the sorted list until we cover all modifications or use all biopolymers (we should never hit the second case, but stops and infinite loop just in case)
while (modificationsToRetain.Count > 0 && sortedBioPolymers.Count > 0)
{
// Select the biopolymer that covers the most uncovered modifications
var bestBioPolymer = sortedBioPolymers.First();

minimumSet.Add(bestBioPolymer.BioPolymerWithSetMods);
foreach (var mod in bestBioPolymer.CoveredMods)
modificationsToRetain.Remove(mod);
sortedBioPolymers.RemoveAt(0);

// Remove the selected biopolymer from the list
sortedBioPolymers.Remove(bestBioPolymer);

sortedBioPolymers = sortedBioPolymers
.Where(covGroup => covGroup.CoveredMods.Overlaps(modificationsToRetain)) // retain only those with mods that are not yet covered
Expand Down
50 changes: 21 additions & 29 deletions MetaMorpheus/Test/gptmdPrunedDbTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -642,18 +642,21 @@ private class TestSpectralMatch : SpectralMatch
{
// Construct the bare minimum of our objects needed to run this section of PostSearchAnalysisTask
public TestSpectralMatch(string fullSequence, int startResidue = 0, int endResidue = 13,
string filePath = "default", string protease = "trypsin")
string filePath = "default", string protease = "trypsin", int missedCleavages = 0)
: base(
new PeptideWithSetModifications(fullSequence, GlobalVariables.AllModsKnownDictionary,
p: _testProtein, oneBasedStartResidueInProtein: startResidue,
p: _testProtein, oneBasedStartResidueInProtein: startResidue, missedCleavages: missedCleavages,
oneBasedEndResidueInProtein: endResidue, digestionParams: new DigestionParams(protease)),
0, 10, 0,
new Ms2ScanWithSpecificMass(
new MsDataScan(
new MzSpectrum([], [], false), 0, 0,
true, Polarity.Positive, 0, default, "", 0,
new MzSpectrum([], [], false),
0, 0,
new double[0, 0], ""), 0, 0, filePath, new CommonParameters(), []),
true, Polarity.Positive, 0,
default, "", 0,
0, 0,
new double[0, 0], ""),
0, 0, filePath, new CommonParameters(), []),
new CommonParameters(), [])
{
FdrInfo = new EngineLayer.FdrAnalysis.FdrInfo
Expand All @@ -664,17 +667,6 @@ public TestSpectralMatch(string fullSequence, int startResidue = 0, int endResid
QValueNotch = 0
};
ResolveAllAmbiguities();

// adjust modification index
foreach (var bioPolymer in BestMatchingBioPolymersWithSetMods)
{
var allMods = bioPolymer.Peptide.AllModsOneIsNterminus.ToArray();
bioPolymer.Peptide.AllModsOneIsNterminus.Clear();
foreach (var modification in allMods)
{
bioPolymer.Peptide.AllModsOneIsNterminus.Add(modification.Key + startResidue-1, modification.Value);
}
}
}
};

Expand Down Expand Up @@ -759,8 +751,8 @@ public static void GetProteinToConfidentModifiedSequences_MultipleEvidence_Misse
{
new TestSpectralMatch("PEPTIDE[Common Biological:Carboxylation on E]K", 0 , 8),
new TestSpectralMatch("PE[Common Biological:Carboxylation on E]PTIDEK", 0 , 8),
new TestSpectralMatch("PEPTIDEKPE[Common Biological:Carboxylation on E]PTK", 0 , 13),
new TestSpectralMatch("PE[Common Biological:Carboxylation on E]PTIDEKPEPTK", 0 , 13)
new TestSpectralMatch("PEPTIDEKPE[Common Biological:Carboxylation on E]PTK", 0 , 13, missedCleavages: 1),
new TestSpectralMatch("PE[Common Biological:Carboxylation on E]PTIDEKPEPTK", 0 , 13, missedCleavages: 1)
};
var postSearchAnalysisTask = new PostSearchAnalysisTask()
{
Expand All @@ -780,19 +772,19 @@ public static void GetProteinToConfidentModifiedSequences_MultipleEvidence_Misse
Assert.That(fullSequences, Does.Contain("PE[Common Biological:Carboxylation on E]PTIDEK"));
}

[Test]
[Test] // TODO: UGH
public static void GetProteinToConfidentModifiedSequences_MultipleEvidence_MissedCleavage_TwoSharedMod_AlternatingTerm()
{
// Arrange
var psms = new List<SpectralMatch>
{
new TestSpectralMatch("PEPT[Common Biological:Phosphorylation on T]K", 8, 13),
new TestSpectralMatch("PE[Common Biological:Carboxylation on E]PTIDEK", 0 , 8),
new TestSpectralMatch("PEPTIDEKPE[Common Biological:Carboxylation on E]PTK", 0 , 13),
new TestSpectralMatch("PEPTIDEKPE[Common Biological:Carboxylation on E]PTK", 0 , 13, missedCleavages: 1),

// both of the below satisfy the criteria of covering all modifications, but only one should be selected
new TestSpectralMatch("PE[Common Biological:Carboxylation on E]PTIDEKPEPT[Common Biological:Phosphorylation on T]K", 0 , 13),
new TestSpectralMatch("PE[Common Biological:Carboxylation on E]PTIDEKPE[Common Biological:Carboxylation on E]PT[Common Biological:Phosphorylation on T]K", 0 , 13)
// both of the below satisfy the criteria of covering all modifications, but only one the first should be selected as the second has an extra modification
new TestSpectralMatch("PE[Common Biological:Carboxylation on E]PTIDEKPEPT[Common Biological:Phosphorylation on T]K", 0 , 13, missedCleavages: 1),
new TestSpectralMatch("PE[Common Biological:Carboxylation on E]PTIDEKPE[Common Biological:Carboxylation on E]PT[Common Biological:Phosphorylation on T]K", 0 , 13, missedCleavages: 1)
};
var postSearchAnalysisTask = new PostSearchAnalysisTask()
{
Expand Down Expand Up @@ -855,7 +847,7 @@ public static void GetProteinToConfidentModifiedSequences_MultipleEvidence_Diffe
{
// three mods found in the same condition, one mod of those found in a different condition, two found in the same condition in a missed cleavage product
new TestSpectralMatch("PE[Common Biological:Carboxylation on E]PT[Common Biological:Phosphorylation on T]IDE[Common Biological:Carboxylation on E]K", 0, 8, "hcd"),
new TestSpectralMatch("PE[Common Biological:Carboxylation on E]PT[Common Biological:Phosphorylation on T]IDE[Common Biological:Carboxylation on E]KPEPTK", 0, 13, "hcd"),
new TestSpectralMatch("PE[Common Biological:Carboxylation on E]PT[Common Biological:Phosphorylation on T]IDE[Common Biological:Carboxylation on E]KPEPTK", 0, 13, "hcd", missedCleavages: 1),
new TestSpectralMatch("PE[Common Biological:Carboxylation on E]PTIDEK", 0, 8, "hcd"),
new TestSpectralMatch("PE[Common Biological:Carboxylation on E]PTIDEK", 0 , 8, "etd"),
new TestSpectralMatch("PEPTIDEK", 0 , 8),
Expand Down Expand Up @@ -890,7 +882,7 @@ public static void GetProteinToConfidentModifiedSequences_MultipleEvidence_Diffe
{
// three mods found in the same condition, two mods of those found in a different condition, one found in the same condition in a missed cleavage product
new TestSpectralMatch("PEPT[Common Biological:Phosphorylation on T]IDE[Common Biological:Carboxylation on E]K", 0, 8, "hcd"),
new TestSpectralMatch("PEPTIDE[Common Biological:Carboxylation on E]KPEPTK", 0, 13, "hcd"),
new TestSpectralMatch("PEPTIDE[Common Biological:Carboxylation on E]KPEPTK", 0, 13, "hcd", missedCleavages: 1),
new TestSpectralMatch("PE[Common Biological:Carboxylation on E]PTIDEK", 0, 8, "hcd"),
new TestSpectralMatch("PE[Common Biological:Carboxylation on E]PT[Common Biological:Phosphorylation on T]IDEK", 0 , 8, "etd"),
new TestSpectralMatch("PEPTIDEK", 0 , 8),
Expand Down Expand Up @@ -927,8 +919,8 @@ public static void GetProteinToConfidentModifiedSequences_MultipleEvidence_Diffe
// two mods found in the same condition, one mod of those found in a different contdition
new TestSpectralMatch("PE[Common Biological:Carboxylation on E]PT[Common Biological:Phosphorylation on T]IDEK", 0, 8, "top-down", "top-down"),
new TestSpectralMatch("PE[Common Biological:Carboxylation on E]PT[Common Biological:Phosphorylation on T]IDEK", 0, 8, "top-down", "top-down"),
new TestSpectralMatch("PE[Common Biological:Carboxylation on E]PTIDEK", 0, 8, "top-down", "top-down"),
new TestSpectralMatch("PE[Common Biological:Carboxylation on E]PTIDEK", 0 , 8, "trypsin", "trypsin"),
new TestSpectralMatch("PE[Common Biological:Carboxylation on E]PTIDEK", 0, 8, "top-down", "top-down", missedCleavages: 1),
new TestSpectralMatch("PE[Common Biological:Carboxylation on E]PTIDEK", 0 , 8, "trypsin", "trypsin", missedCleavages: 1),
new TestSpectralMatch("PEPTIDEK", 0 , 8),
};
var postSearchAnalysisTask = new PostSearchAnalysisTask()
Expand Down Expand Up @@ -961,7 +953,7 @@ public static void GetProteinToConfidentModifiedSequences_MultipleEvidence_Diffe
{
// three mods found in the same condition, one mod of those found in a different condition, two found in the same condition in a missed cleavage product
new TestSpectralMatch("PE[Common Biological:Carboxylation on E]PT[Common Biological:Phosphorylation on T]IDE[Common Biological:Carboxylation on E]K", 0, 8, "top-down"),
new TestSpectralMatch("PE[Common Biological:Carboxylation on E]PT[Common Biological:Phosphorylation on T]IDE[Common Biological:Carboxylation on E]KPEPTK", 0, 13, "top-down"),
new TestSpectralMatch("PE[Common Biological:Carboxylation on E]PT[Common Biological:Phosphorylation on T]IDE[Common Biological:Carboxylation on E]KPEPTK", 0, 13, "top-down", missedCleavages: 1),
new TestSpectralMatch("PE[Common Biological:Carboxylation on E]PTIDEK", 0, 8, "top-down"),
new TestSpectralMatch("PE[Common Biological:Carboxylation on E]PTIDEK", 0 , 8, "trypsin"),
new TestSpectralMatch("PEPTIDEK", 0 , 8),
Expand Down Expand Up @@ -996,7 +988,7 @@ public static void GetProteinToConfidentModifiedSequences_MultipleEvidence_Diffe
{
// three mods found in the same condition, two mods of those found in a different condition, one found in the same condition in a missed cleavage product
new TestSpectralMatch("PEPT[Common Biological:Phosphorylation on T]IDE[Common Biological:Carboxylation on E]K", 0, 8, "top-down"),
new TestSpectralMatch("PEPTIDE[Common Biological:Carboxylation on E]KPEPTK", 0, 13, "top-down"),
new TestSpectralMatch("PEPTIDE[Common Biological:Carboxylation on E]KPEPTK", 0, 13, "top-down", missedCleavages: 1),
new TestSpectralMatch("PE[Common Biological:Carboxylation on E]PTIDEK", 0, 8, "top-down"),
new TestSpectralMatch("PE[Common Biological:Carboxylation on E]PT[Common Biological:Phosphorylation on T]IDEK", 0 , 8, "trypsin"),
new TestSpectralMatch("PEPTIDEK", 0 , 8),
Expand Down

0 comments on commit 2dc2073

Please sign in to comment.