diff --git a/MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs b/MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs index fcaf47c915..26ef82fd07 100644 --- a/MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs +++ b/MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs @@ -1026,9 +1026,10 @@ private void WritePrunedDatabase() // find all biopolymers that have at least one confident PSM and their confident localized modifications -> Used for determining which mods to retain var proteinToConfidentModifiedSequences = GetProteinToConfidentModifiedSequences(Parameters.AllPsms, Parameters.SearchParameters.EvidenceRequiredToWriteLocalizedMod, Parameters.SearchParameters.IncludeProteinAmbiguous); + if (proteinToConfidentBaseSequences.Count == 0) return; + // populate the protein object with the desired modifications with a modify in place operation, original modifications are stored for later restoration UpdateProteinModifications(proteinToConfidentModifiedSequences, out var proteinsOriginalModifications, out var originalSequenceVariantModifications); - WriteDatabases(proteinToConfidentBaseSequences); // Restore Original Modifications with a modify in place operation @@ -1142,15 +1143,15 @@ public Dictionary> GetProteinToConfide if (modAndLocationGrouped.Count() <= 1) continue; - var dissociationTypeCount = modAndLocationGrouped.GroupBy(p => p.dissociationType).Count(); - var digestionAgentCount = modAndLocationGrouped.GroupBy(p => p.digestionAgent).Count(); + var dissociationAndDigestionCount = modAndLocationGrouped.GroupBy(p => (p.dissociationType, p.digestionAgent)).Count(); // TODO: Ask Claire about truncy bois. Right now they dont count for anything as they are not true missed cleavages. - var missedCleavageCount = modAndLocationGrouped.GroupBy(p => p.dissociationType) - .Sum(dissGroup => dissGroup.GroupBy(p => p.missedCleavages).Count() - 1); - - var conditionCount = dissociationTypeCount + digestionAgentCount + missedCleavageCount - 2; + // missed cleavages should only count if they occur with the same digestion agent + int missedCleavageCount = modAndLocationGrouped.GroupBy(p => p.digestionAgent) + .Sum(dissGroup => + dissGroup.Select(p => p.missedCleavages).Distinct().Count() - 1); + var conditionCount = dissociationAndDigestionCount + missedCleavageCount; if (conditionCount >= evidenceRequired) modificationsToRetain.Add(modAndLocationGrouped.Key); } @@ -1182,17 +1183,19 @@ public Dictionary> GetProteinToConfide .Count(mod => !modificationsToRetain.Contains((mod.Key - covGroup.BioPolymerWithSetMods.OneBasedStartResidue + 1, mod.Value)))) .ToList(); - + // iterate through the sorted list until we cover all modifications or use all biopolymers (we should never hit the second case, but stops and infinite loop just in case) while (modificationsToRetain.Count > 0 && sortedBioPolymers.Count > 0) { + // Select the biopolymer that covers the most uncovered modifications var bestBioPolymer = sortedBioPolymers.First(); minimumSet.Add(bestBioPolymer.BioPolymerWithSetMods); foreach (var mod in bestBioPolymer.CoveredMods) modificationsToRetain.Remove(mod); - sortedBioPolymers.RemoveAt(0); + // Remove the selected biopolymer from the list + sortedBioPolymers.Remove(bestBioPolymer); sortedBioPolymers = sortedBioPolymers .Where(covGroup => covGroup.CoveredMods.Overlaps(modificationsToRetain)) // retain only those with mods that are not yet covered diff --git a/MetaMorpheus/Test/gptmdPrunedDbTests.cs b/MetaMorpheus/Test/gptmdPrunedDbTests.cs index 9e46529891..e1bc67466a 100644 --- a/MetaMorpheus/Test/gptmdPrunedDbTests.cs +++ b/MetaMorpheus/Test/gptmdPrunedDbTests.cs @@ -642,18 +642,21 @@ private class TestSpectralMatch : SpectralMatch { // Construct the bare minimum of our objects needed to run this section of PostSearchAnalysisTask public TestSpectralMatch(string fullSequence, int startResidue = 0, int endResidue = 13, - string filePath = "default", string protease = "trypsin") + string filePath = "default", string protease = "trypsin", int missedCleavages = 0) : base( new PeptideWithSetModifications(fullSequence, GlobalVariables.AllModsKnownDictionary, - p: _testProtein, oneBasedStartResidueInProtein: startResidue, + p: _testProtein, oneBasedStartResidueInProtein: startResidue, missedCleavages: missedCleavages, oneBasedEndResidueInProtein: endResidue, digestionParams: new DigestionParams(protease)), 0, 10, 0, new Ms2ScanWithSpecificMass( new MsDataScan( - new MzSpectrum([], [], false), 0, 0, - true, Polarity.Positive, 0, default, "", 0, + new MzSpectrum([], [], false), 0, 0, - new double[0, 0], ""), 0, 0, filePath, new CommonParameters(), []), + true, Polarity.Positive, 0, + default, "", 0, + 0, 0, + new double[0, 0], ""), + 0, 0, filePath, new CommonParameters(), []), new CommonParameters(), []) { FdrInfo = new EngineLayer.FdrAnalysis.FdrInfo @@ -664,17 +667,6 @@ public TestSpectralMatch(string fullSequence, int startResidue = 0, int endResid QValueNotch = 0 }; ResolveAllAmbiguities(); - - // adjust modification index - foreach (var bioPolymer in BestMatchingBioPolymersWithSetMods) - { - var allMods = bioPolymer.Peptide.AllModsOneIsNterminus.ToArray(); - bioPolymer.Peptide.AllModsOneIsNterminus.Clear(); - foreach (var modification in allMods) - { - bioPolymer.Peptide.AllModsOneIsNterminus.Add(modification.Key + startResidue-1, modification.Value); - } - } } }; @@ -759,8 +751,8 @@ public static void GetProteinToConfidentModifiedSequences_MultipleEvidence_Misse { new TestSpectralMatch("PEPTIDE[Common Biological:Carboxylation on E]K", 0 , 8), new TestSpectralMatch("PE[Common Biological:Carboxylation on E]PTIDEK", 0 , 8), - new TestSpectralMatch("PEPTIDEKPE[Common Biological:Carboxylation on E]PTK", 0 , 13), - new TestSpectralMatch("PE[Common Biological:Carboxylation on E]PTIDEKPEPTK", 0 , 13) + new TestSpectralMatch("PEPTIDEKPE[Common Biological:Carboxylation on E]PTK", 0 , 13, missedCleavages: 1), + new TestSpectralMatch("PE[Common Biological:Carboxylation on E]PTIDEKPEPTK", 0 , 13, missedCleavages: 1) }; var postSearchAnalysisTask = new PostSearchAnalysisTask() { @@ -780,7 +772,7 @@ public static void GetProteinToConfidentModifiedSequences_MultipleEvidence_Misse Assert.That(fullSequences, Does.Contain("PE[Common Biological:Carboxylation on E]PTIDEK")); } - [Test] + [Test] // TODO: UGH public static void GetProteinToConfidentModifiedSequences_MultipleEvidence_MissedCleavage_TwoSharedMod_AlternatingTerm() { // Arrange @@ -788,11 +780,11 @@ public static void GetProteinToConfidentModifiedSequences_MultipleEvidence_Misse { new TestSpectralMatch("PEPT[Common Biological:Phosphorylation on T]K", 8, 13), new TestSpectralMatch("PE[Common Biological:Carboxylation on E]PTIDEK", 0 , 8), - new TestSpectralMatch("PEPTIDEKPE[Common Biological:Carboxylation on E]PTK", 0 , 13), + new TestSpectralMatch("PEPTIDEKPE[Common Biological:Carboxylation on E]PTK", 0 , 13, missedCleavages: 1), - // both of the below satisfy the criteria of covering all modifications, but only one should be selected - new TestSpectralMatch("PE[Common Biological:Carboxylation on E]PTIDEKPEPT[Common Biological:Phosphorylation on T]K", 0 , 13), - new TestSpectralMatch("PE[Common Biological:Carboxylation on E]PTIDEKPE[Common Biological:Carboxylation on E]PT[Common Biological:Phosphorylation on T]K", 0 , 13) + // both of the below satisfy the criteria of covering all modifications, but only one the first should be selected as the second has an extra modification + new TestSpectralMatch("PE[Common Biological:Carboxylation on E]PTIDEKPEPT[Common Biological:Phosphorylation on T]K", 0 , 13, missedCleavages: 1), + new TestSpectralMatch("PE[Common Biological:Carboxylation on E]PTIDEKPE[Common Biological:Carboxylation on E]PT[Common Biological:Phosphorylation on T]K", 0 , 13, missedCleavages: 1) }; var postSearchAnalysisTask = new PostSearchAnalysisTask() { @@ -855,7 +847,7 @@ public static void GetProteinToConfidentModifiedSequences_MultipleEvidence_Diffe { // three mods found in the same condition, one mod of those found in a different condition, two found in the same condition in a missed cleavage product new TestSpectralMatch("PE[Common Biological:Carboxylation on E]PT[Common Biological:Phosphorylation on T]IDE[Common Biological:Carboxylation on E]K", 0, 8, "hcd"), - new TestSpectralMatch("PE[Common Biological:Carboxylation on E]PT[Common Biological:Phosphorylation on T]IDE[Common Biological:Carboxylation on E]KPEPTK", 0, 13, "hcd"), + new TestSpectralMatch("PE[Common Biological:Carboxylation on E]PT[Common Biological:Phosphorylation on T]IDE[Common Biological:Carboxylation on E]KPEPTK", 0, 13, "hcd", missedCleavages: 1), new TestSpectralMatch("PE[Common Biological:Carboxylation on E]PTIDEK", 0, 8, "hcd"), new TestSpectralMatch("PE[Common Biological:Carboxylation on E]PTIDEK", 0 , 8, "etd"), new TestSpectralMatch("PEPTIDEK", 0 , 8), @@ -890,7 +882,7 @@ public static void GetProteinToConfidentModifiedSequences_MultipleEvidence_Diffe { // three mods found in the same condition, two mods of those found in a different condition, one found in the same condition in a missed cleavage product new TestSpectralMatch("PEPT[Common Biological:Phosphorylation on T]IDE[Common Biological:Carboxylation on E]K", 0, 8, "hcd"), - new TestSpectralMatch("PEPTIDE[Common Biological:Carboxylation on E]KPEPTK", 0, 13, "hcd"), + new TestSpectralMatch("PEPTIDE[Common Biological:Carboxylation on E]KPEPTK", 0, 13, "hcd", missedCleavages: 1), new TestSpectralMatch("PE[Common Biological:Carboxylation on E]PTIDEK", 0, 8, "hcd"), new TestSpectralMatch("PE[Common Biological:Carboxylation on E]PT[Common Biological:Phosphorylation on T]IDEK", 0 , 8, "etd"), new TestSpectralMatch("PEPTIDEK", 0 , 8), @@ -927,8 +919,8 @@ public static void GetProteinToConfidentModifiedSequences_MultipleEvidence_Diffe // two mods found in the same condition, one mod of those found in a different contdition new TestSpectralMatch("PE[Common Biological:Carboxylation on E]PT[Common Biological:Phosphorylation on T]IDEK", 0, 8, "top-down", "top-down"), new TestSpectralMatch("PE[Common Biological:Carboxylation on E]PT[Common Biological:Phosphorylation on T]IDEK", 0, 8, "top-down", "top-down"), - new TestSpectralMatch("PE[Common Biological:Carboxylation on E]PTIDEK", 0, 8, "top-down", "top-down"), - new TestSpectralMatch("PE[Common Biological:Carboxylation on E]PTIDEK", 0 , 8, "trypsin", "trypsin"), + new TestSpectralMatch("PE[Common Biological:Carboxylation on E]PTIDEK", 0, 8, "top-down", "top-down", missedCleavages: 1), + new TestSpectralMatch("PE[Common Biological:Carboxylation on E]PTIDEK", 0 , 8, "trypsin", "trypsin", missedCleavages: 1), new TestSpectralMatch("PEPTIDEK", 0 , 8), }; var postSearchAnalysisTask = new PostSearchAnalysisTask() @@ -961,7 +953,7 @@ public static void GetProteinToConfidentModifiedSequences_MultipleEvidence_Diffe { // three mods found in the same condition, one mod of those found in a different condition, two found in the same condition in a missed cleavage product new TestSpectralMatch("PE[Common Biological:Carboxylation on E]PT[Common Biological:Phosphorylation on T]IDE[Common Biological:Carboxylation on E]K", 0, 8, "top-down"), - new TestSpectralMatch("PE[Common Biological:Carboxylation on E]PT[Common Biological:Phosphorylation on T]IDE[Common Biological:Carboxylation on E]KPEPTK", 0, 13, "top-down"), + new TestSpectralMatch("PE[Common Biological:Carboxylation on E]PT[Common Biological:Phosphorylation on T]IDE[Common Biological:Carboxylation on E]KPEPTK", 0, 13, "top-down", missedCleavages: 1), new TestSpectralMatch("PE[Common Biological:Carboxylation on E]PTIDEK", 0, 8, "top-down"), new TestSpectralMatch("PE[Common Biological:Carboxylation on E]PTIDEK", 0 , 8, "trypsin"), new TestSpectralMatch("PEPTIDEK", 0 , 8), @@ -996,7 +988,7 @@ public static void GetProteinToConfidentModifiedSequences_MultipleEvidence_Diffe { // three mods found in the same condition, two mods of those found in a different condition, one found in the same condition in a missed cleavage product new TestSpectralMatch("PEPT[Common Biological:Phosphorylation on T]IDE[Common Biological:Carboxylation on E]K", 0, 8, "top-down"), - new TestSpectralMatch("PEPTIDE[Common Biological:Carboxylation on E]KPEPTK", 0, 13, "top-down"), + new TestSpectralMatch("PEPTIDE[Common Biological:Carboxylation on E]KPEPTK", 0, 13, "top-down", missedCleavages: 1), new TestSpectralMatch("PE[Common Biological:Carboxylation on E]PTIDEK", 0, 8, "top-down"), new TestSpectralMatch("PE[Common Biological:Carboxylation on E]PT[Common Biological:Phosphorylation on T]IDEK", 0 , 8, "trypsin"), new TestSpectralMatch("PEPTIDEK", 0 , 8),