From de6823963ef36308852870a8af0e4fa6a395f5c0 Mon Sep 17 00:00:00 2001 From: Nic Bollis Date: Wed, 22 Jan 2025 14:36:58 -0600 Subject: [PATCH] Put those mods back where they came from or so help me (#827) * Changed x64;AnyCPU to only AnyCPU in all project files * One more change * Nuget - 3. Me - 0 * Started initial structure * Revert "Started initial structure" This reverts commit f06cbc00e1b197f202b0cec86407b506cae9c7bf. * Reverted mod localization change. * Enhance handling of terminal modifications in digestion Improved logic in `DigestionProduct.cs` to ensure correct application of N-terminal and C-terminal modifications to biopolymers, preventing overwriting unless the new modification is more specific. Updated assertions and modification order in test cases to reflect changes, enhancing accuracy and robustness of the digestion process. --------- Co-authored-by: Nic Bollis --- mzLib/Omics/Digestion/DigestionProduct.cs | 28 ++++++++++- .../Modifications/ModificationLocalization.cs | 17 ++++--- mzLib/Test/TestProteinDigestion.cs | 46 ++++++++++++++++--- mzLib/Test/Transcriptomics/TestDigestion.cs | 18 ++++---- 4 files changed, 84 insertions(+), 25 deletions(-) diff --git a/mzLib/Omics/Digestion/DigestionProduct.cs b/mzLib/Omics/Digestion/DigestionProduct.cs index ba0244c9d..b43980e23 100644 --- a/mzLib/Omics/Digestion/DigestionProduct.cs +++ b/mzLib/Omics/Digestion/DigestionProduct.cs @@ -110,13 +110,25 @@ protected void PopulateFixedModsOneIsNorFivePrimeTerminus(int length, //the modification is protease associated and is applied to the n-terminal cleaved residue, not at the beginning of the protein if (ModificationLocalization.ModFits(mod, Parent.BaseSequence, 1, length, OneBasedStartResidue)) { - if (mod.ModificationType == "Protease") + if (mod.ModificationType == "Protease") // Protease N-terminal or 5' modification { if (OneBasedStartResidue != 1) fixedModsOneIsNterminus[2] = mod; } + else if (OneBasedStartResidue == 1) // Modified BioPolymer Start Residue (e.g. Protein N-Terminal) + { + if (!fixedModsOneIsNterminus.TryAdd(1, mod)) // Check if a protein N-terminal mod is already present + { + if (mod.LocationRestriction is "N-terminal." or "5'-terminal.") // Only overwrite if new mod is N-terminal, not peptide N-terminal + { + fixedModsOneIsNterminus[1] = mod; + } + } + } else //Normal N-terminal peptide modification + { fixedModsOneIsNterminus[1] = mod; + } } break; @@ -137,13 +149,25 @@ protected void PopulateFixedModsOneIsNorFivePrimeTerminus(int length, //the modification is protease associated and is applied to the c-terminal cleaved residue, not if it is at the end of the protein if (ModificationLocalization.ModFits(mod, Parent.BaseSequence, length, length, OneBasedStartResidue + length - 1)) { - if (mod.ModificationType == "Protease") + if (mod.ModificationType == "Protease") // Protease N-terminal or 3' modification { if (OneBasedEndResidue != Parent.Length) fixedModsOneIsNterminus[length + 1] = mod; } + else if (OneBasedEndResidue == Parent.Length) // Modified BioPolymer End Residue (e.g. Protein C-Terminal) + { + if (!fixedModsOneIsNterminus.TryAdd(length + 2, mod)) // Check if a protein C-terminal mod is already present + { + if (mod.LocationRestriction is "C-terminal." or "3'-terminal.") // Only overwrite if new mod is C-terminal, not peptide C-terminal + { + fixedModsOneIsNterminus[length + 2] = mod; + } + } + } else //Normal C-terminal peptide modification + { fixedModsOneIsNterminus[length + 2] = mod; + } } break; diff --git a/mzLib/Omics/Modifications/ModificationLocalization.cs b/mzLib/Omics/Modifications/ModificationLocalization.cs index 8928d8c9a..01dadaa18 100644 --- a/mzLib/Omics/Modifications/ModificationLocalization.cs +++ b/mzLib/Omics/Modifications/ModificationLocalization.cs @@ -31,18 +31,17 @@ public static bool ModFits(Modification attemptToLocalize, string sequence, int } switch (attemptToLocalize.LocationRestriction) { + // Only the intact (undigested) terminus case "N-terminal." when bioPolymerOneBasedIndex > 2: - case "Peptide N-terminal." when digestionProductOneBasedIndex > 1 || bioPolymerOneBasedIndex == 1: - case "C-terminal." when bioPolymerOneBasedIndex < sequence.Length: - case "Peptide C-terminal." when digestionProductOneBasedIndex < digestionProductLength || bioPolymerOneBasedIndex == sequence.Length: case "5'-terminal." when bioPolymerOneBasedIndex > 2: - // first residue in oligo but not first in nucleic acid - case "Oligo 5'-terminal." when digestionProductOneBasedIndex > 1 - || bioPolymerOneBasedIndex == 1: + case "C-terminal." when bioPolymerOneBasedIndex < sequence.Length: case "3'-terminal." when bioPolymerOneBasedIndex < sequence.Length: - // not the last residue in oligo but not in nucleic acid - case "Oligo 3'-terminal." when digestionProductOneBasedIndex < digestionProductLength - || bioPolymerOneBasedIndex == sequence.Length: + + // All Digested Termini AND original undigested termini + case "Peptide N-terminal." when digestionProductOneBasedIndex > 1: + case "Oligo 5'-terminal." when digestionProductOneBasedIndex > 1: + case "Peptide C-terminal." when digestionProductOneBasedIndex < digestionProductLength: + case "Oligo 3'-terminal." when digestionProductOneBasedIndex < digestionProductLength: return false; default: diff --git a/mzLib/Test/TestProteinDigestion.cs b/mzLib/Test/TestProteinDigestion.cs index 263818a4b..bd8b3f36b 100644 --- a/mzLib/Test/TestProteinDigestion.cs +++ b/mzLib/Test/TestProteinDigestion.cs @@ -220,7 +220,7 @@ public static void TestPeptideWithSetModifications() variableModifications.Add(new Modification(_originalId: "ProtCmod", _target: motif, _locationRestriction: "C-terminal.", _chemicalFormula: ChemicalFormula.ParseFormula("H"), _monoisotopicMass: GetElement(1).PrincipalIsotope.AtomicMass)); var ye = prot.Digest(digestionParams, new List(), variableModifications).ToList(); - Assert.AreEqual(2 * 2 * 2, ye.Count); + Assert.AreEqual(3 * 2 * 3, ye.Count); Assert.AreEqual("[H]M[H][H]", ye.Last().SequenceWithChemicalFormulas); double m1 = 5 * GetElement("H").PrincipalIsotope.AtomicMass + Residue.ResidueMonoisotopicMass['M'] + GetElement("O").PrincipalIsotope.AtomicMass; @@ -229,8 +229,9 @@ public static void TestPeptideWithSetModifications() double m3 = m1 - m2; Assert.IsTrue(m3 < 1e-9); } + [Test] - public static void TestPeptideWithFixedModifications() + public static void TestPeptideDigestion_FixedModifications_ProtModsOverwritePepMods() { var prot = new Protein("M", null); DigestionParams digestionParams = new DigestionParams(maxMissedCleavages: 0, minPeptideLength: 1, maxModsForPeptides: 3); // if you pass Custom Protease7 this test gets really flakey. @@ -239,8 +240,8 @@ public static void TestPeptideWithFixedModifications() fixedMods.Add(new Modification(_originalId: "ProtNmod", _target: motif, _locationRestriction: "N-terminal.", _chemicalFormula: ChemicalFormula.ParseFormula("H"), _monoisotopicMass: GetElement(1).PrincipalIsotope.AtomicMass)); fixedMods.Add(new Modification(_originalId: "pepNmod", _target: motif, _locationRestriction: "Peptide N-terminal.", _chemicalFormula: ChemicalFormula.ParseFormula("H"), _monoisotopicMass: GetElement(1).PrincipalIsotope.AtomicMass)); fixedMods.Add(new Modification(_originalId: "resMod", _target: motif, _locationRestriction: "Anywhere.", _chemicalFormula: ChemicalFormula.ParseFormula("H"), _monoisotopicMass: GetElement(1).PrincipalIsotope.AtomicMass)); - fixedMods.Add(new Modification(_originalId: "PepCmod", _target: motif, _locationRestriction: "Peptide C-terminal.", _chemicalFormula: ChemicalFormula.ParseFormula("H"), _monoisotopicMass: GetElement(1).PrincipalIsotope.AtomicMass)); fixedMods.Add(new Modification(_originalId: "ProtCmod", _target: motif, _locationRestriction: "C-terminal.", _chemicalFormula: ChemicalFormula.ParseFormula("H"), _monoisotopicMass: GetElement(1).PrincipalIsotope.AtomicMass)); + fixedMods.Add(new Modification(_originalId: "PepCmod", _target: motif, _locationRestriction: "Peptide C-terminal.", _chemicalFormula: ChemicalFormula.ParseFormula("H"), _monoisotopicMass: GetElement(1).PrincipalIsotope.AtomicMass)); var ok = prot.Digest(digestionParams, fixedMods, new List()).ToList(); Assert.AreEqual(1, ok.Count); @@ -252,7 +253,40 @@ public static void TestPeptideWithFixedModifications() } [Test] - public static void TestPeptideWithFixedModifications_TwoProducts() + public static void TestPeptideDigestion_FixedModifications_ProtModsOverwritePepMods_RandomizedModOrder() + { + var rand = new Random(42); + var prot = new Protein("M", null); + DigestionParams digestionParams = new DigestionParams(maxMissedCleavages: 0, minPeptideLength: 1, maxModsForPeptides: 3); // if you pass Custom Protease7 this test gets really flakey. + List fixedMods = new List(); + ModificationMotif.TryGetMotif("M", out ModificationMotif motif); + fixedMods.Add(new Modification(_originalId: "ProtNmod", _target: motif, _locationRestriction: "N-terminal.", _chemicalFormula: ChemicalFormula.ParseFormula("H"), _monoisotopicMass: GetElement(1).PrincipalIsotope.AtomicMass)); + fixedMods.Add(new Modification(_originalId: "pepNmod", _target: motif, _locationRestriction: "Peptide N-terminal.", _chemicalFormula: ChemicalFormula.ParseFormula("H"), _monoisotopicMass: GetElement(1).PrincipalIsotope.AtomicMass)); + fixedMods.Add(new Modification(_originalId: "resMod", _target: motif, _locationRestriction: "Anywhere.", _chemicalFormula: ChemicalFormula.ParseFormula("H"), _monoisotopicMass: GetElement(1).PrincipalIsotope.AtomicMass)); + fixedMods.Add(new Modification(_originalId: "ProtCmod", _target: motif, _locationRestriction: "C-terminal.", _chemicalFormula: ChemicalFormula.ParseFormula("H"), _monoisotopicMass: GetElement(1).PrincipalIsotope.AtomicMass)); + fixedMods.Add(new Modification(_originalId: "PepCmod", _target: motif, _locationRestriction: "Peptide C-terminal.", _chemicalFormula: ChemicalFormula.ParseFormula("H"), _monoisotopicMass: GetElement(1).PrincipalIsotope.AtomicMass)); + + // set expected values + int expectedDigestionProducts = 1; + string expectedFullSequence = "[:ProtNmod on M]M[:resMod on M][:ProtCmod on M]"; + string expectedSequenceWithChemicalFormulas = "[H]M[H][H]"; + double expectedMonoisotopicMass = 5 * GetElement("H").PrincipalIsotope.AtomicMass + Residue.ResidueMonoisotopicMass['M'] + GetElement("O").PrincipalIsotope.AtomicMass; + + // randomly scramble all mods, digest, and ensure the answer is correct. + for (int i = 0; i < 10; i++) + { + var shuffledFixedMods = fixedMods.OrderBy(a => rand.Next()).ToList(); + var ok = prot.Digest(digestionParams, shuffledFixedMods, new List()).ToList(); + + Assert.AreEqual(expectedDigestionProducts, ok.Count); + Assert.AreEqual(expectedFullSequence, ok.First().FullSequence); + Assert.AreEqual(expectedSequenceWithChemicalFormulas, ok.First().SequenceWithChemicalFormulas); + Assert.AreEqual(expectedMonoisotopicMass, ok.Last().MonoisotopicMass, 1e-9); + } + } + + [Test] + public static void TestPeptideDigestion_FixedModifications_ProtModsOverwritePepMods_TwoProducts() { var prot = new Protein("MKM", null); DigestionParams digestionParams = new DigestionParams(maxMissedCleavages: 0, minPeptideLength: 1, maxModsForPeptides: 3, initiatorMethionineBehavior: InitiatorMethionineBehavior.Retain); @@ -265,10 +299,10 @@ public static void TestPeptideWithFixedModifications_TwoProducts() fixedMods.Add(new Modification(_originalId: "pepNmod", _target: mMotif, _locationRestriction: "Peptide N-terminal.", _chemicalFormula: ChemicalFormula.ParseFormula("H"), _monoisotopicMass: GetElement(1).PrincipalIsotope.AtomicMass)); fixedMods.Add(new Modification(_originalId: "pepNmod", _target: kMotif, _locationRestriction: "Peptide N-terminal.", _chemicalFormula: ChemicalFormula.ParseFormula("H"), _monoisotopicMass: GetElement(1).PrincipalIsotope.AtomicMass)); fixedMods.Add(new Modification(_originalId: "resMod", _target: mMotif, _locationRestriction: "Anywhere.", _chemicalFormula: ChemicalFormula.ParseFormula("H"), _monoisotopicMass: GetElement(1).PrincipalIsotope.AtomicMass)); - fixedMods.Add(new Modification(_originalId: "PepCmod", _target: mMotif, _locationRestriction: "Peptide C-terminal.", _chemicalFormula: ChemicalFormula.ParseFormula("H"), _monoisotopicMass: GetElement(1).PrincipalIsotope.AtomicMass)); - fixedMods.Add(new Modification(_originalId: "PepCmod", _target: kMotif, _locationRestriction: "Peptide C-terminal.", _chemicalFormula: ChemicalFormula.ParseFormula("H"), _monoisotopicMass: GetElement(1).PrincipalIsotope.AtomicMass)); fixedMods.Add(new Modification(_originalId: "ProtCmod", _target: mMotif, _locationRestriction: "C-terminal.", _chemicalFormula: ChemicalFormula.ParseFormula("H"), _monoisotopicMass: GetElement(1).PrincipalIsotope.AtomicMass)); fixedMods.Add(new Modification(_originalId: "ProtCmod", _target: kMotif, _locationRestriction: "C-terminal.", _chemicalFormula: ChemicalFormula.ParseFormula("H"), _monoisotopicMass: GetElement(1).PrincipalIsotope.AtomicMass)); + fixedMods.Add(new Modification(_originalId: "PepCmod", _target: mMotif, _locationRestriction: "Peptide C-terminal.", _chemicalFormula: ChemicalFormula.ParseFormula("H"), _monoisotopicMass: GetElement(1).PrincipalIsotope.AtomicMass)); + fixedMods.Add(new Modification(_originalId: "PepCmod", _target: kMotif, _locationRestriction: "Peptide C-terminal.", _chemicalFormula: ChemicalFormula.ParseFormula("H"), _monoisotopicMass: GetElement(1).PrincipalIsotope.AtomicMass)); var ok = prot.Digest(digestionParams, fixedMods, new List()).ToList(); diff --git a/mzLib/Test/Transcriptomics/TestDigestion.cs b/mzLib/Test/Transcriptomics/TestDigestion.cs index acfcacdef..dc577a6d3 100644 --- a/mzLib/Test/Transcriptomics/TestDigestion.cs +++ b/mzLib/Test/Transcriptomics/TestDigestion.cs @@ -381,8 +381,9 @@ public static void TestTermini_ThreePrimeCyclicPhosphate() variableMods = new List { oligoCyclicPhosphate }; digestionProducts = rna.Digest(digestionParams, new List(), variableMods) .Select(p => (OligoWithSetMods)p).ToList(); - Assert.That(digestionProducts.Count, Is.EqualTo(1)); + Assert.That(digestionProducts.Count, Is.EqualTo(2)); Assert.That(digestionProducts[0].FullSequence, Is.EqualTo("UAGUCGUUGAUAG")); + Assert.That(digestionProducts[1].FullSequence, Is.EqualTo("UAGUCGUUGAUAG[Digestion Termini:Cyclic Phosphate on X]")); // RNase T1 digestion, 3' terminal modification digestionParams = new RnaDigestionParams("RNase T1"); @@ -403,13 +404,13 @@ public static void TestTermini_ThreePrimeCyclicPhosphate() variableMods = new List { oligoCyclicPhosphate }; digestionProducts = rna.Digest(digestionParams, new List(), variableMods) .Select(p => (OligoWithSetMods)p).ToList(); - Assert.That(digestionProducts.Count, Is.EqualTo(7)); + Assert.That(digestionProducts.Count, Is.EqualTo(8)); expected = new List() { "UAG", "UAG[Digestion Termini:Cyclic Phosphate on X]", "UCG", "UCG[Digestion Termini:Cyclic Phosphate on X]", "UUG", "UUG[Digestion Termini:Cyclic Phosphate on X]", - "AUAG", + "AUAG","AUAG[Digestion Termini:Cyclic Phosphate on X]" }; for (int i = 0; i < expected.Count; i++) @@ -431,7 +432,7 @@ public static void TestTermini_FivePrimeLargeMod() out errors).First(); Assert.That(!errors.Any()); - // top-down digestion, 5' terminal modification + // top-down digestion, 5' terminal modification, expect two products var variableMods = new List { nucleicAcidLargeMod }; var digestionParams = new RnaDigestionParams("top-down"); var digestionProducts = rna.Digest(digestionParams, new List(), variableMods) @@ -440,12 +441,13 @@ public static void TestTermini_FivePrimeLargeMod() Assert.That(digestionProducts[0].FullSequence, Is.EqualTo("UAGUCGUUGAUAG")); Assert.That(digestionProducts[1].FullSequence, Is.EqualTo("[Standard:Pfizer 5'-Cap on X]UAGUCGUUGAUAG")); - // top-down digestion, 5' oligo terminal modification + // top-down digestion, 5' oligo terminal modification, expect two products variableMods = new List { oligoLargeMod }; digestionProducts = rna.Digest(digestionParams, new List(), variableMods) .Select(p => (OligoWithSetMods)p).ToList(); - Assert.That(digestionProducts.Count, Is.EqualTo(1)); + Assert.That(digestionProducts.Count, Is.EqualTo(2)); Assert.That(digestionProducts[0].FullSequence, Is.EqualTo("UAGUCGUUGAUAG")); + Assert.That(digestionProducts[1].FullSequence, Is.EqualTo("[Standard:Pfizer 5'-Cap on X]UAGUCGUUGAUAG")); // RNase T1 digestion, 5' terminal modification digestionParams = new RnaDigestionParams("RNase T1"); @@ -466,10 +468,10 @@ public static void TestTermini_FivePrimeLargeMod() variableMods = new List { oligoLargeMod }; digestionProducts = rna.Digest(digestionParams, new List(), variableMods) .Select(p => (OligoWithSetMods)p).ToList(); - Assert.That(digestionProducts.Count, Is.EqualTo(7)); + Assert.That(digestionProducts.Count, Is.EqualTo(8)); expected = new List() { - "UAG", + "UAG", "[Standard:Pfizer 5'-Cap on X]UAG", "UCG", "[Standard:Pfizer 5'-Cap on X]UCG", "UUG", "[Standard:Pfizer 5'-Cap on X]UUG", "AUAG", "[Standard:Pfizer 5'-Cap on X]AUAG"