Skip to content

Commit

Permalink
Put those mods back where they came from or so help me (#827)
Browse files Browse the repository at this point in the history
* Changed x64;AnyCPU to only AnyCPU in all project files

* One more change

* Nuget - 3.
Me - 0

* Started initial structure

* Revert "Started initial structure"

This reverts commit f06cbc0.

* Reverted mod localization change.

* Enhance handling of terminal modifications in digestion

Improved logic in `DigestionProduct.cs` to ensure correct application of N-terminal and C-terminal modifications to biopolymers, preventing overwriting unless the new modification is more specific.

Updated assertions and modification order in test cases to reflect changes, enhancing accuracy and robustness of the digestion process.

---------

Co-authored-by: Nic Bollis <nbollis@wisc.edu>
  • Loading branch information
nbollis and Nic Bollis authored Jan 22, 2025
1 parent e33a478 commit de68239
Show file tree
Hide file tree
Showing 4 changed files with 84 additions and 25 deletions.
28 changes: 26 additions & 2 deletions mzLib/Omics/Digestion/DigestionProduct.cs
Original file line number Diff line number Diff line change
Expand Up @@ -110,13 +110,25 @@ protected void PopulateFixedModsOneIsNorFivePrimeTerminus(int length,
//the modification is protease associated and is applied to the n-terminal cleaved residue, not at the beginning of the protein
if (ModificationLocalization.ModFits(mod, Parent.BaseSequence, 1, length, OneBasedStartResidue))
{
if (mod.ModificationType == "Protease")
if (mod.ModificationType == "Protease") // Protease N-terminal or 5' modification
{
if (OneBasedStartResidue != 1)
fixedModsOneIsNterminus[2] = mod;
}
else if (OneBasedStartResidue == 1) // Modified BioPolymer Start Residue (e.g. Protein N-Terminal)
{
if (!fixedModsOneIsNterminus.TryAdd(1, mod)) // Check if a protein N-terminal mod is already present
{
if (mod.LocationRestriction is "N-terminal." or "5'-terminal.") // Only overwrite if new mod is N-terminal, not peptide N-terminal
{
fixedModsOneIsNterminus[1] = mod;
}
}
}
else //Normal N-terminal peptide modification
{
fixedModsOneIsNterminus[1] = mod;
}
}
break;

Expand All @@ -137,13 +149,25 @@ protected void PopulateFixedModsOneIsNorFivePrimeTerminus(int length,
//the modification is protease associated and is applied to the c-terminal cleaved residue, not if it is at the end of the protein
if (ModificationLocalization.ModFits(mod, Parent.BaseSequence, length, length, OneBasedStartResidue + length - 1))
{
if (mod.ModificationType == "Protease")
if (mod.ModificationType == "Protease") // Protease N-terminal or 3' modification
{
if (OneBasedEndResidue != Parent.Length)
fixedModsOneIsNterminus[length + 1] = mod;
}
else if (OneBasedEndResidue == Parent.Length) // Modified BioPolymer End Residue (e.g. Protein C-Terminal)
{
if (!fixedModsOneIsNterminus.TryAdd(length + 2, mod)) // Check if a protein C-terminal mod is already present
{
if (mod.LocationRestriction is "C-terminal." or "3'-terminal.") // Only overwrite if new mod is C-terminal, not peptide C-terminal
{
fixedModsOneIsNterminus[length + 2] = mod;
}
}
}
else //Normal C-terminal peptide modification
{
fixedModsOneIsNterminus[length + 2] = mod;
}
}
break;

Expand Down
17 changes: 8 additions & 9 deletions mzLib/Omics/Modifications/ModificationLocalization.cs
Original file line number Diff line number Diff line change
Expand Up @@ -31,18 +31,17 @@ public static bool ModFits(Modification attemptToLocalize, string sequence, int
}
switch (attemptToLocalize.LocationRestriction)
{
// Only the intact (undigested) terminus
case "N-terminal." when bioPolymerOneBasedIndex > 2:
case "Peptide N-terminal." when digestionProductOneBasedIndex > 1 || bioPolymerOneBasedIndex == 1:
case "C-terminal." when bioPolymerOneBasedIndex < sequence.Length:
case "Peptide C-terminal." when digestionProductOneBasedIndex < digestionProductLength || bioPolymerOneBasedIndex == sequence.Length:
case "5'-terminal." when bioPolymerOneBasedIndex > 2:
// first residue in oligo but not first in nucleic acid
case "Oligo 5'-terminal." when digestionProductOneBasedIndex > 1
|| bioPolymerOneBasedIndex == 1:
case "C-terminal." when bioPolymerOneBasedIndex < sequence.Length:
case "3'-terminal." when bioPolymerOneBasedIndex < sequence.Length:
// not the last residue in oligo but not in nucleic acid
case "Oligo 3'-terminal." when digestionProductOneBasedIndex < digestionProductLength
|| bioPolymerOneBasedIndex == sequence.Length:

// All Digested Termini AND original undigested termini
case "Peptide N-terminal." when digestionProductOneBasedIndex > 1:
case "Oligo 5'-terminal." when digestionProductOneBasedIndex > 1:
case "Peptide C-terminal." when digestionProductOneBasedIndex < digestionProductLength:
case "Oligo 3'-terminal." when digestionProductOneBasedIndex < digestionProductLength:
return false;

default:
Expand Down
46 changes: 40 additions & 6 deletions mzLib/Test/TestProteinDigestion.cs
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,7 @@ public static void TestPeptideWithSetModifications()
variableModifications.Add(new Modification(_originalId: "ProtCmod", _target: motif, _locationRestriction: "C-terminal.", _chemicalFormula: ChemicalFormula.ParseFormula("H"), _monoisotopicMass: GetElement(1).PrincipalIsotope.AtomicMass));

var ye = prot.Digest(digestionParams, new List<Modification>(), variableModifications).ToList();
Assert.AreEqual(2 * 2 * 2, ye.Count);
Assert.AreEqual(3 * 2 * 3, ye.Count);
Assert.AreEqual("[H]M[H][H]", ye.Last().SequenceWithChemicalFormulas);

double m1 = 5 * GetElement("H").PrincipalIsotope.AtomicMass + Residue.ResidueMonoisotopicMass['M'] + GetElement("O").PrincipalIsotope.AtomicMass;
Expand All @@ -229,8 +229,9 @@ public static void TestPeptideWithSetModifications()
double m3 = m1 - m2;
Assert.IsTrue(m3 < 1e-9);
}

[Test]
public static void TestPeptideWithFixedModifications()
public static void TestPeptideDigestion_FixedModifications_ProtModsOverwritePepMods()
{
var prot = new Protein("M", null);
DigestionParams digestionParams = new DigestionParams(maxMissedCleavages: 0, minPeptideLength: 1, maxModsForPeptides: 3); // if you pass Custom Protease7 this test gets really flakey.
Expand All @@ -239,8 +240,8 @@ public static void TestPeptideWithFixedModifications()
fixedMods.Add(new Modification(_originalId: "ProtNmod", _target: motif, _locationRestriction: "N-terminal.", _chemicalFormula: ChemicalFormula.ParseFormula("H"), _monoisotopicMass: GetElement(1).PrincipalIsotope.AtomicMass));
fixedMods.Add(new Modification(_originalId: "pepNmod", _target: motif, _locationRestriction: "Peptide N-terminal.", _chemicalFormula: ChemicalFormula.ParseFormula("H"), _monoisotopicMass: GetElement(1).PrincipalIsotope.AtomicMass));
fixedMods.Add(new Modification(_originalId: "resMod", _target: motif, _locationRestriction: "Anywhere.", _chemicalFormula: ChemicalFormula.ParseFormula("H"), _monoisotopicMass: GetElement(1).PrincipalIsotope.AtomicMass));
fixedMods.Add(new Modification(_originalId: "PepCmod", _target: motif, _locationRestriction: "Peptide C-terminal.", _chemicalFormula: ChemicalFormula.ParseFormula("H"), _monoisotopicMass: GetElement(1).PrincipalIsotope.AtomicMass));
fixedMods.Add(new Modification(_originalId: "ProtCmod", _target: motif, _locationRestriction: "C-terminal.", _chemicalFormula: ChemicalFormula.ParseFormula("H"), _monoisotopicMass: GetElement(1).PrincipalIsotope.AtomicMass));
fixedMods.Add(new Modification(_originalId: "PepCmod", _target: motif, _locationRestriction: "Peptide C-terminal.", _chemicalFormula: ChemicalFormula.ParseFormula("H"), _monoisotopicMass: GetElement(1).PrincipalIsotope.AtomicMass));
var ok = prot.Digest(digestionParams, fixedMods, new List<Modification>()).ToList();

Assert.AreEqual(1, ok.Count);
Expand All @@ -252,7 +253,40 @@ public static void TestPeptideWithFixedModifications()
}

[Test]
public static void TestPeptideWithFixedModifications_TwoProducts()
public static void TestPeptideDigestion_FixedModifications_ProtModsOverwritePepMods_RandomizedModOrder()
{
var rand = new Random(42);
var prot = new Protein("M", null);
DigestionParams digestionParams = new DigestionParams(maxMissedCleavages: 0, minPeptideLength: 1, maxModsForPeptides: 3); // if you pass Custom Protease7 this test gets really flakey.
List<Modification> fixedMods = new List<Modification>();
ModificationMotif.TryGetMotif("M", out ModificationMotif motif);
fixedMods.Add(new Modification(_originalId: "ProtNmod", _target: motif, _locationRestriction: "N-terminal.", _chemicalFormula: ChemicalFormula.ParseFormula("H"), _monoisotopicMass: GetElement(1).PrincipalIsotope.AtomicMass));
fixedMods.Add(new Modification(_originalId: "pepNmod", _target: motif, _locationRestriction: "Peptide N-terminal.", _chemicalFormula: ChemicalFormula.ParseFormula("H"), _monoisotopicMass: GetElement(1).PrincipalIsotope.AtomicMass));
fixedMods.Add(new Modification(_originalId: "resMod", _target: motif, _locationRestriction: "Anywhere.", _chemicalFormula: ChemicalFormula.ParseFormula("H"), _monoisotopicMass: GetElement(1).PrincipalIsotope.AtomicMass));
fixedMods.Add(new Modification(_originalId: "ProtCmod", _target: motif, _locationRestriction: "C-terminal.", _chemicalFormula: ChemicalFormula.ParseFormula("H"), _monoisotopicMass: GetElement(1).PrincipalIsotope.AtomicMass));
fixedMods.Add(new Modification(_originalId: "PepCmod", _target: motif, _locationRestriction: "Peptide C-terminal.", _chemicalFormula: ChemicalFormula.ParseFormula("H"), _monoisotopicMass: GetElement(1).PrincipalIsotope.AtomicMass));

// set expected values
int expectedDigestionProducts = 1;
string expectedFullSequence = "[:ProtNmod on M]M[:resMod on M][:ProtCmod on M]";
string expectedSequenceWithChemicalFormulas = "[H]M[H][H]";
double expectedMonoisotopicMass = 5 * GetElement("H").PrincipalIsotope.AtomicMass + Residue.ResidueMonoisotopicMass['M'] + GetElement("O").PrincipalIsotope.AtomicMass;

// randomly scramble all mods, digest, and ensure the answer is correct.
for (int i = 0; i < 10; i++)
{
var shuffledFixedMods = fixedMods.OrderBy(a => rand.Next()).ToList();
var ok = prot.Digest(digestionParams, shuffledFixedMods, new List<Modification>()).ToList();

Assert.AreEqual(expectedDigestionProducts, ok.Count);
Assert.AreEqual(expectedFullSequence, ok.First().FullSequence);
Assert.AreEqual(expectedSequenceWithChemicalFormulas, ok.First().SequenceWithChemicalFormulas);
Assert.AreEqual(expectedMonoisotopicMass, ok.Last().MonoisotopicMass, 1e-9);
}
}

[Test]
public static void TestPeptideDigestion_FixedModifications_ProtModsOverwritePepMods_TwoProducts()
{
var prot = new Protein("MKM", null);
DigestionParams digestionParams = new DigestionParams(maxMissedCleavages: 0, minPeptideLength: 1, maxModsForPeptides: 3, initiatorMethionineBehavior: InitiatorMethionineBehavior.Retain);
Expand All @@ -265,10 +299,10 @@ public static void TestPeptideWithFixedModifications_TwoProducts()
fixedMods.Add(new Modification(_originalId: "pepNmod", _target: mMotif, _locationRestriction: "Peptide N-terminal.", _chemicalFormula: ChemicalFormula.ParseFormula("H"), _monoisotopicMass: GetElement(1).PrincipalIsotope.AtomicMass));
fixedMods.Add(new Modification(_originalId: "pepNmod", _target: kMotif, _locationRestriction: "Peptide N-terminal.", _chemicalFormula: ChemicalFormula.ParseFormula("H"), _monoisotopicMass: GetElement(1).PrincipalIsotope.AtomicMass));
fixedMods.Add(new Modification(_originalId: "resMod", _target: mMotif, _locationRestriction: "Anywhere.", _chemicalFormula: ChemicalFormula.ParseFormula("H"), _monoisotopicMass: GetElement(1).PrincipalIsotope.AtomicMass));
fixedMods.Add(new Modification(_originalId: "PepCmod", _target: mMotif, _locationRestriction: "Peptide C-terminal.", _chemicalFormula: ChemicalFormula.ParseFormula("H"), _monoisotopicMass: GetElement(1).PrincipalIsotope.AtomicMass));
fixedMods.Add(new Modification(_originalId: "PepCmod", _target: kMotif, _locationRestriction: "Peptide C-terminal.", _chemicalFormula: ChemicalFormula.ParseFormula("H"), _monoisotopicMass: GetElement(1).PrincipalIsotope.AtomicMass));
fixedMods.Add(new Modification(_originalId: "ProtCmod", _target: mMotif, _locationRestriction: "C-terminal.", _chemicalFormula: ChemicalFormula.ParseFormula("H"), _monoisotopicMass: GetElement(1).PrincipalIsotope.AtomicMass));
fixedMods.Add(new Modification(_originalId: "ProtCmod", _target: kMotif, _locationRestriction: "C-terminal.", _chemicalFormula: ChemicalFormula.ParseFormula("H"), _monoisotopicMass: GetElement(1).PrincipalIsotope.AtomicMass));
fixedMods.Add(new Modification(_originalId: "PepCmod", _target: mMotif, _locationRestriction: "Peptide C-terminal.", _chemicalFormula: ChemicalFormula.ParseFormula("H"), _monoisotopicMass: GetElement(1).PrincipalIsotope.AtomicMass));
fixedMods.Add(new Modification(_originalId: "PepCmod", _target: kMotif, _locationRestriction: "Peptide C-terminal.", _chemicalFormula: ChemicalFormula.ParseFormula("H"), _monoisotopicMass: GetElement(1).PrincipalIsotope.AtomicMass));

var ok = prot.Digest(digestionParams, fixedMods, new List<Modification>()).ToList();

Expand Down
18 changes: 10 additions & 8 deletions mzLib/Test/Transcriptomics/TestDigestion.cs
Original file line number Diff line number Diff line change
Expand Up @@ -381,8 +381,9 @@ public static void TestTermini_ThreePrimeCyclicPhosphate()
variableMods = new List<Modification> { oligoCyclicPhosphate };
digestionProducts = rna.Digest(digestionParams, new List<Modification>(), variableMods)
.Select(p => (OligoWithSetMods)p).ToList();
Assert.That(digestionProducts.Count, Is.EqualTo(1));
Assert.That(digestionProducts.Count, Is.EqualTo(2));
Assert.That(digestionProducts[0].FullSequence, Is.EqualTo("UAGUCGUUGAUAG"));
Assert.That(digestionProducts[1].FullSequence, Is.EqualTo("UAGUCGUUGAUAG[Digestion Termini:Cyclic Phosphate on X]"));

// RNase T1 digestion, 3' terminal modification
digestionParams = new RnaDigestionParams("RNase T1");
Expand All @@ -403,13 +404,13 @@ public static void TestTermini_ThreePrimeCyclicPhosphate()
variableMods = new List<Modification> { oligoCyclicPhosphate };
digestionProducts = rna.Digest(digestionParams, new List<Modification>(), variableMods)
.Select(p => (OligoWithSetMods)p).ToList();
Assert.That(digestionProducts.Count, Is.EqualTo(7));
Assert.That(digestionProducts.Count, Is.EqualTo(8));
expected = new List<string>()
{
"UAG", "UAG[Digestion Termini:Cyclic Phosphate on X]",
"UCG", "UCG[Digestion Termini:Cyclic Phosphate on X]",
"UUG", "UUG[Digestion Termini:Cyclic Phosphate on X]",
"AUAG",
"AUAG","AUAG[Digestion Termini:Cyclic Phosphate on X]"
};

for (int i = 0; i < expected.Count; i++)
Expand All @@ -431,7 +432,7 @@ public static void TestTermini_FivePrimeLargeMod()
out errors).First();
Assert.That(!errors.Any());

// top-down digestion, 5' terminal modification
// top-down digestion, 5' terminal modification, expect two products
var variableMods = new List<Modification> { nucleicAcidLargeMod };
var digestionParams = new RnaDigestionParams("top-down");
var digestionProducts = rna.Digest(digestionParams, new List<Modification>(), variableMods)
Expand All @@ -440,12 +441,13 @@ public static void TestTermini_FivePrimeLargeMod()
Assert.That(digestionProducts[0].FullSequence, Is.EqualTo("UAGUCGUUGAUAG"));
Assert.That(digestionProducts[1].FullSequence, Is.EqualTo("[Standard:Pfizer 5'-Cap on X]UAGUCGUUGAUAG"));

// top-down digestion, 5' oligo terminal modification
// top-down digestion, 5' oligo terminal modification, expect two products
variableMods = new List<Modification> { oligoLargeMod };
digestionProducts = rna.Digest(digestionParams, new List<Modification>(), variableMods)
.Select(p => (OligoWithSetMods)p).ToList();
Assert.That(digestionProducts.Count, Is.EqualTo(1));
Assert.That(digestionProducts.Count, Is.EqualTo(2));
Assert.That(digestionProducts[0].FullSequence, Is.EqualTo("UAGUCGUUGAUAG"));
Assert.That(digestionProducts[1].FullSequence, Is.EqualTo("[Standard:Pfizer 5'-Cap on X]UAGUCGUUGAUAG"));

// RNase T1 digestion, 5' terminal modification
digestionParams = new RnaDigestionParams("RNase T1");
Expand All @@ -466,10 +468,10 @@ public static void TestTermini_FivePrimeLargeMod()
variableMods = new List<Modification> { oligoLargeMod };
digestionProducts = rna.Digest(digestionParams, new List<Modification>(), variableMods)
.Select(p => (OligoWithSetMods)p).ToList();
Assert.That(digestionProducts.Count, Is.EqualTo(7));
Assert.That(digestionProducts.Count, Is.EqualTo(8));
expected = new List<string>()
{
"UAG",
"UAG", "[Standard:Pfizer 5'-Cap on X]UAG",
"UCG", "[Standard:Pfizer 5'-Cap on X]UCG",
"UUG", "[Standard:Pfizer 5'-Cap on X]UUG",
"AUAG", "[Standard:Pfizer 5'-Cap on X]AUAG"
Expand Down

0 comments on commit de68239

Please sign in to comment.