From bd5aefc38c17d0d1fbe0cda8730ccc979c0da175 Mon Sep 17 00:00:00 2001 From: trishorts Date: Mon, 26 Aug 2024 12:54:29 -0500 Subject: [PATCH 01/25] update mzlib nuget package to 551 --- MetaMorpheus/CMD/CMD.csproj | 2 +- MetaMorpheus/EngineLayer/EngineLayer.csproj | 2 +- MetaMorpheus/GUI/GUI.csproj | 2 +- MetaMorpheus/GuiFunctions/GuiFunctions.csproj | 2 +- MetaMorpheus/TaskLayer/TaskLayer.csproj | 2 +- MetaMorpheus/Test/Test.csproj | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/MetaMorpheus/CMD/CMD.csproj b/MetaMorpheus/CMD/CMD.csproj index f278a13ce..44eb33bbe 100644 --- a/MetaMorpheus/CMD/CMD.csproj +++ b/MetaMorpheus/CMD/CMD.csproj @@ -24,7 +24,7 @@ - + diff --git a/MetaMorpheus/EngineLayer/EngineLayer.csproj b/MetaMorpheus/EngineLayer/EngineLayer.csproj index a8621408a..9c82f4ec2 100644 --- a/MetaMorpheus/EngineLayer/EngineLayer.csproj +++ b/MetaMorpheus/EngineLayer/EngineLayer.csproj @@ -21,7 +21,7 @@ - + diff --git a/MetaMorpheus/GUI/GUI.csproj b/MetaMorpheus/GUI/GUI.csproj index d57835e75..df8cfd5df 100644 --- a/MetaMorpheus/GUI/GUI.csproj +++ b/MetaMorpheus/GUI/GUI.csproj @@ -55,7 +55,7 @@ - + diff --git a/MetaMorpheus/GuiFunctions/GuiFunctions.csproj b/MetaMorpheus/GuiFunctions/GuiFunctions.csproj index a45054b18..e2989fbcb 100644 --- a/MetaMorpheus/GuiFunctions/GuiFunctions.csproj +++ b/MetaMorpheus/GuiFunctions/GuiFunctions.csproj @@ -16,7 +16,7 @@ - + diff --git a/MetaMorpheus/TaskLayer/TaskLayer.csproj b/MetaMorpheus/TaskLayer/TaskLayer.csproj index ded75a26e..35f063006 100644 --- a/MetaMorpheus/TaskLayer/TaskLayer.csproj +++ b/MetaMorpheus/TaskLayer/TaskLayer.csproj @@ -21,7 +21,7 @@ - + diff --git a/MetaMorpheus/Test/Test.csproj b/MetaMorpheus/Test/Test.csproj index 12286e77f..a4d8b9f5c 100644 --- a/MetaMorpheus/Test/Test.csproj +++ b/MetaMorpheus/Test/Test.csproj @@ -24,7 +24,7 @@ - + From 0ff7b360888641d38503e2f2ecf17d6e9353983a Mon Sep 17 00:00:00 2001 From: trishorts Date: Thu, 29 Aug 2024 09:28:49 -0500 Subject: [PATCH 02/25] add hyperscore to PEP calculation --- .../FdrAnalysis/PEPValueAnalysisGeneric.cs | 35 +++++++++++++++++++ .../EngineLayer/FdrAnalysis/PsmData.cs | 8 +++-- 2 files changed, 41 insertions(+), 2 deletions(-) diff --git a/MetaMorpheus/EngineLayer/FdrAnalysis/PEPValueAnalysisGeneric.cs b/MetaMorpheus/EngineLayer/FdrAnalysis/PEPValueAnalysisGeneric.cs index 2fa22248f..26ed5ed15 100644 --- a/MetaMorpheus/EngineLayer/FdrAnalysis/PEPValueAnalysisGeneric.cs +++ b/MetaMorpheus/EngineLayer/FdrAnalysis/PEPValueAnalysisGeneric.cs @@ -683,6 +683,38 @@ private static float GetMobilityZScore(SpectralMatch psm, IBioPolymerWithSetMods return (float)mobilityZScore; } + public static float GetFraggerHyperScore(SpectralMatch psm, IBioPolymerWithSetMods selectedPeptide) + { + var peptideFragmentIons = psm.BioPolymersWithSetModsToMatchingFragments[selectedPeptide]; + var nIons = peptideFragmentIons.Where(f => f.NeutralTheoreticalProduct.Terminus == FragmentationTerminus.N).ToList(); + var cIons = peptideFragmentIons.Where(f => f.NeutralTheoreticalProduct.Terminus == FragmentationTerminus.C).ToList(); + double nIonIntensitySum = nIons.Sum(f => f.Intensity); + double cIonIntensitySum = cIons.Sum(f => f.Intensity); + float nIon = GetLog10Factorial((int)nIons.Count); + float cIon = GetLog10Factorial((int)cIons.Count); + + return (float)((nIon + cIon + Math.Log10(nIonIntensitySum * cIonIntensitySum))); + } + + private static ulong GetFactorial(ulong n) + { + if (n == 0) + { + return 1; + } + return n * GetFactorial(n - 1); + } + + public static float GetLog10Factorial(int n) + { + double log10Factorial = 0.0; + for (int i = 1; i <= n; i++) + { + log10Factorial += Math.Log10(i); + } + return (float)log10Factorial; + } + public static IEnumerable CreatePsmData(string searchType, List<(string fileName, CommonParameters fileSpecificParameters)> fileSpecificParameters, List psms, List psmIndicies, Dictionary>> timeDependantHydrophobicityAverageAndDeviation_unmodified, @@ -790,6 +822,7 @@ public static PsmData CreateOnePsmDataEntry(string searchType, List<(string file float peaksInPrecursorEnvelope = 0; float mostAbundantPrecursorPeakIntensity = 0; float fractionalIntensity = 0; + float fraggerHyperScore = 0; float missedCleavages = 0; float longestSeq = 0; @@ -837,6 +870,7 @@ public static PsmData CreateOnePsmDataEntry(string searchType, List<(string file peaksInPrecursorEnvelope = psm.PrecursorScanEnvelopePeakCount; mostAbundantPrecursorPeakIntensity = (float)Math.Round((float)psm.PrecursorScanIntensity / normalizationFactor * multiplier, 0); fractionalIntensity = (float)psm.PrecursorFractionalIntensity; + fraggerHyperScore = GetFraggerHyperScore(psm, selectedPeptide); if (PsmHasSpectralAngle(psm)) { @@ -954,6 +988,7 @@ public static PsmData CreateOnePsmDataEntry(string searchType, List<(string file MostAbundantPrecursorPeakIntensity = mostAbundantPrecursorPeakIntensity, PrecursorFractionalIntensity = fractionalIntensity, InternalIonCount = internalMatchingFragmentCount, + FraggerHyperScore = fraggerHyperScore }; return psm.PsmData_forPEPandPercolator; diff --git a/MetaMorpheus/EngineLayer/FdrAnalysis/PsmData.cs b/MetaMorpheus/EngineLayer/FdrAnalysis/PsmData.cs index 9a6617d5d..acdc11b75 100644 --- a/MetaMorpheus/EngineLayer/FdrAnalysis/PsmData.cs +++ b/MetaMorpheus/EngineLayer/FdrAnalysis/PsmData.cs @@ -15,7 +15,7 @@ public class PsmData "TotalMatchingFragmentCount", "Intensity", "PrecursorChargeDiffToMode", "DeltaScore", "Notch", "ModsCount", "AbsoluteAverageFragmentMassErrorFromMedian", "MissedCleavagesCount", "Ambiguity", "LongestFragmentIonSeries", "ComplementaryIonCount", "HydrophobicityZScore", - "IsVariantPeptide", "IsDeadEnd", "IsLoop", "SpectralAngle", "HasSpectralAngle", + "IsVariantPeptide", "IsDeadEnd", "IsLoop", "SpectralAngle", "HasSpectralAngle", "FraggerHyperScore" } }, @@ -26,7 +26,7 @@ public class PsmData "Notch", "ModsCount", "AbsoluteAverageFragmentMassErrorFromMedian", "Ambiguity", "LongestFragmentIonSeries", "ComplementaryIonCount", "SpectralAngle", "HasSpectralAngle", "PeaksInPrecursorEnvelope", "ChimeraCount", - "MostAbundantPrecursorPeakIntensity", "PrecursorFractionalIntensity", "InternalIonCount" + "MostAbundantPrecursorPeakIntensity", "PrecursorFractionalIntensity", "InternalIonCount", "FraggerHyperScore" } }, @@ -75,6 +75,7 @@ public class PsmData { "MostAbundantPrecursorPeakIntensity", 1 }, { "PrecursorFractionalIntensity", 1 }, { "InternalIonCount", 1}, + { "FraggerHyperScore", 1}, }.ToImmutableDictionary(); public string ToString(string searchType) @@ -179,5 +180,8 @@ public string ToString(string searchType) [LoadColumn(28)] public float InternalIonCount { get; set; } + + [LoadColumn(29)] + public float FraggerHyperScore { get; set; } } } \ No newline at end of file From 929c326d8592123ad1e300a54382c35856f8e95b Mon Sep 17 00:00:00 2001 From: trishorts Date: Thu, 29 Aug 2024 10:21:57 -0500 Subject: [PATCH 03/25] pio --- .../FdrAnalysis/PEPAnalysisEngine.cs | 316 ++---------------- 1 file changed, 32 insertions(+), 284 deletions(-) diff --git a/MetaMorpheus/EngineLayer/FdrAnalysis/PEPAnalysisEngine.cs b/MetaMorpheus/EngineLayer/FdrAnalysis/PEPAnalysisEngine.cs index ecefa7adb..2c618e47c 100644 --- a/MetaMorpheus/EngineLayer/FdrAnalysis/PEPAnalysisEngine.cs +++ b/MetaMorpheus/EngineLayer/FdrAnalysis/PEPAnalysisEngine.cs @@ -35,7 +35,7 @@ public class PepAnalysisEngine //The value Tuple is the average and standard deviation, respectively, of the predicted hydrophobicities of the observed peptides eluting at that rounded retention time. public Dictionary>> FileSpecificTimeDependantHydrophobicityAverageAndDeviation_unmodified { get; private set; } public Dictionary>> FileSpecificTimeDependantHydrophobicityAverageAndDeviation_modified { get; private set; } - public Dictionary>> FileSpecificTimeDependantHydrophobicityAverageAndDeviation_CZE { get; private set; } + public Dictionary>> FileSpecificTimeDependantHydrophobicityAverageAndDeviation_CZE { get; private set; } /// /// A dictionary which stores the chimeric ID string in the key and the number of chimeric identifications as the vale @@ -87,7 +87,7 @@ public string ComputePEPValuesForAllPSMs() ? PeptideMatchGroup.GroupByBaseSequence(AllPsms) : PeptideMatchGroup.GroupByIndividualPsm(AllPsms); - if(UsePeptideLevelQValueForTraining && (peptideGroups.Count(g => g.BestMatch.IsDecoy) < 4 || peptideGroups.Count(g => !g.BestMatch.IsDecoy) < 4)) + if (UsePeptideLevelQValueForTraining && (peptideGroups.Count(g => g.BestMatch.IsDecoy) < 4 || peptideGroups.Count(g => !g.BestMatch.IsDecoy) < 4)) { peptideGroups = PeptideMatchGroup.GroupByIndividualPsm(AllPsms); } @@ -97,9 +97,9 @@ public string ComputePEPValuesForAllPSMs() IEnumerable[] PSMDataGroups = new IEnumerable[numGroups]; for (int i = 0; i < numGroups; i++) { - PSMDataGroups[i] = CreatePsmData(SearchType, peptideGroups, peptideGroupIndices[i]); + PSMDataGroups[i] = CreatePsmData(SearchType, peptideGroups, peptideGroupIndices[i]); - if(!PSMDataGroups[i].Any(p => p.Label) || !PSMDataGroups[i].Any(p => !p.Label)) + if (!PSMDataGroups[i].Any(p => p.Label) || !PSMDataGroups[i].Any(p => !p.Label)) { return "Posterior error probability analysis failed. This can occur for small data sets when some sample groups are missing positive or negative training examples."; } @@ -152,11 +152,11 @@ public void BuildFileSpecificDictionaries(List trainingData, stri { FileSpecificMedianFragmentMassErrors = GetFileSpecificMedianFragmentMassError(trainingData); ChargeStateMode = GetChargeStateMode(trainingData); - + if (trainingVariables.Contains("HydrophobicityZScore")) { FileSpecificTimeDependantHydrophobicityAverageAndDeviation_unmodified = ComputeHydrophobicityValues(trainingData, false); - FileSpecificTimeDependantHydrophobicityAverageAndDeviation_modified = ComputeHydrophobicityValues(trainingData, true); + FileSpecificTimeDependantHydrophobicityAverageAndDeviation_modified = ComputeHydrophobicityValues(trainingData, true); FileSpecificTimeDependantHydrophobicityAverageAndDeviation_CZE = ComputeMobilityValues(trainingData); } } @@ -434,7 +434,7 @@ public int Compute_PSM_PEP(List peptideGroups, psm.PsmFdrInfo.PEP = 1 - pepValuePredictions.Max(); psm.PeptideFdrInfo.PEP = 1 - pepValuePredictions.Max(); } - + } } @@ -464,6 +464,7 @@ public PsmData CreateOnePsmDataEntry(string searchType, SpectralMatch psm, IBioP float peaksInPrecursorEnvelope = 0; float mostAbundantPrecursorPeakIntensity = 0; float fractionalIntensity = 0; + float fraggerHyperScore = 0; float missedCleavages = 0; float longestSeq = 0; @@ -512,6 +513,7 @@ public PsmData CreateOnePsmDataEntry(string searchType, SpectralMatch psm, IBioP peaksInPrecursorEnvelope = psm.PrecursorScanEnvelopePeakCount; mostAbundantPrecursorPeakIntensity = (float)Math.Round((float)psm.PrecursorScanIntensity / normalizationFactor * multiplier, 0); fractionalIntensity = (float)psm.PrecursorFractionalIntensity; + fraggerHyperScore = GetFraggerHyperScore(psm, selectedPeptide); if (PsmHasSpectralAngle(psm)) { @@ -629,6 +631,7 @@ public PsmData CreateOnePsmDataEntry(string searchType, SpectralMatch psm, IBioP MostAbundantPrecursorPeakIntensity = mostAbundantPrecursorPeakIntensity, PrecursorFractionalIntensity = fractionalIntensity, InternalIonCount = internalMatchingFragmentCount, + FraggerHyperScore = fraggerHyperScore }; return psm.PsmData_forPEPandPercolator; @@ -701,7 +704,7 @@ public Dictionary>> ComputeHydroph } fullSequences.Add(pwsm.FullSequence); - double predictedHydrophobicity = pwsm is PeptideWithSetModifications pep ? calc.ScoreSequence(pep) : 0; + double predictedHydrophobicity = pwsm is PeptideWithSetModifications pep ? calc.ScoreSequence(pep) : 0; //here i'm grouping this in 2 minute increments becuase there are cases where you get too few data points to get a good standard deviation an average. This is for stability. int possibleKey = (int)(2 * Math.Round(psm.ScanRetentionTime / 2d, 0)); @@ -940,282 +943,6 @@ private float GetMobilityZScore(SpectralMatch psm, IBioPolymerWithSetMods select return (float)mobilityZScore; } - public static IEnumerable CreatePsmData(string searchType, List<(string fileName, CommonParameters fileSpecificParameters)> fileSpecificParameters, - List psms, List psmIndicies, - Dictionary>> timeDependantHydrophobicityAverageAndDeviation_unmodified, - Dictionary>> timeDependantHydrophobicityAverageAndDeviation_modified, - Dictionary fileSpecificMedianFragmentMassErrors, int chargeStateMode) - { - object psmDataListLock = new object(); - List psmDataList = new List(); - List psmOrder = new List(); - int maxThreads = fileSpecificParameters.FirstOrDefault().fileSpecificParameters.MaxThreadsToUsePerFile; - int[] threads = Enumerable.Range(0, maxThreads).ToArray(); - - Parallel.ForEach(Partitioner.Create(0, psmIndicies.Count), - new ParallelOptions { MaxDegreeOfParallelism = maxThreads }, - (range, loopState) => - { - List localPsmDataList = new List(); - List localPsmOrder = new List(); - for (int i = range.Item1; i < range.Item2; i++) - { - SpectralMatch psm = psms[psmIndicies[i]]; - - // Stop loop if canceled - if (GlobalVariables.StopLoops) { return; } - - PsmData newPsmData = new PsmData(); - if (searchType == "crosslink") - { - CrosslinkSpectralMatch csm = (CrosslinkSpectralMatch)psms[i]; - - bool label; - if (csm.IsDecoy || csm.BetaPeptide.IsDecoy) - { - label = false; - newPsmData = CreateOnePsmDataEntry(searchType, fileSpecificParameters, psm, timeDependantHydrophobicityAverageAndDeviation_unmodified, timeDependantHydrophobicityAverageAndDeviation_modified, fileSpecificMedianFragmentMassErrors, chargeStateMode, csm.BestMatchingBioPolymersWithSetMods.First().Peptide, 0, label); - } - else if (!csm.IsDecoy && !csm.BetaPeptide.IsDecoy && psm.GetFdrInfo(UsePeptideLevelQValueForTraining).QValue <= QValueCutoff) - { - label = true; - newPsmData = CreateOnePsmDataEntry(searchType, fileSpecificParameters, psm, timeDependantHydrophobicityAverageAndDeviation_unmodified, timeDependantHydrophobicityAverageAndDeviation_modified, fileSpecificMedianFragmentMassErrors, chargeStateMode, csm.BestMatchingBioPolymersWithSetMods.First().Peptide, 0, label); - } - else - { - continue; - } - localPsmDataList.Add(newPsmData); - localPsmOrder.Add(i); - } - else - { - double bmp = 0; - foreach (var (notch, peptideWithSetMods) in psm.BestMatchingBioPolymersWithSetMods) - { - bool label; - double bmpc = psm.BestMatchingBioPolymersWithSetMods.Count(); - if (peptideWithSetMods.Parent.IsDecoy) - { - label = false; - newPsmData = CreateOnePsmDataEntry(searchType, fileSpecificParameters, psm, timeDependantHydrophobicityAverageAndDeviation_unmodified, timeDependantHydrophobicityAverageAndDeviation_modified, fileSpecificMedianFragmentMassErrors, chargeStateMode, peptideWithSetMods, notch, label); - } - else if (!peptideWithSetMods.Parent.IsDecoy && psm.GetFdrInfo(UsePeptideLevelQValueForTraining).QValue <= QValueCutoff) - { - label = true; - newPsmData = CreateOnePsmDataEntry(searchType, fileSpecificParameters, psm, timeDependantHydrophobicityAverageAndDeviation_unmodified, timeDependantHydrophobicityAverageAndDeviation_modified, fileSpecificMedianFragmentMassErrors, chargeStateMode, peptideWithSetMods, notch, label); - } - else - { - continue; - } - localPsmDataList.Add(newPsmData); - localPsmOrder.Add(i + (bmp / bmpc / 2.0)); - bmp += 1.0; - } - } - } - lock (psmDataListLock) - { - psmDataList.AddRange(localPsmDataList); - psmOrder.AddRange(localPsmOrder); - } - }); - PsmData[] pda = psmDataList.ToArray(); - double[] order = psmOrder.ToArray(); - - Array.Sort(order, pda);//this sorts both arrays thru sorting the array in position one. The order array, keeps track of the positon in the original psms list and returns the PsmData array in that same order. - - return pda.AsEnumerable(); - } - - public static PsmData CreateOnePsmDataEntry(string searchType, List<(string fileName, CommonParameters fileSpecificParameters)> fileSpecificParameters, SpectralMatch psm, Dictionary>> timeDependantHydrophobicityAverageAndDeviation_unmodified, Dictionary>> timeDependantHydrophobicityAverageAndDeviation_modified, Dictionary fileSpecificMedianFragmentMassErrors, int chargeStateMode, IBioPolymerWithSetMods selectedPeptide, int notchToUse, bool label) - { - double normalizationFactor = selectedPeptide.BaseSequence.Length; - float totalMatchingFragmentCount = 0; - float internalMatchingFragmentCount = 0; - float intensity = 0; - float chargeDifference = 0; - float deltaScore = 0; - int notch = 0; - float ambiguity = 0; - float modCount = 0; - float absoluteFragmentMassError = 0; - float spectralAngle = 0; - float hasSpectralAngle = 0; - float chimeraCount = 0; - float peaksInPrecursorEnvelope = 0; - float mostAbundantPrecursorPeakIntensity = 0; - float fractionalIntensity = 0; - - float missedCleavages = 0; - float longestSeq = 0; - float complementaryIonCount = 0; - float hydrophobicityZscore = float.NaN; - bool isVariantPeptide = false; - - //crosslink specific features - float alphaIntensity = 0; - float betaIntensity = 0; - float longestFragmentIonSeries_Alpha = 0; - float longestFragmentIonSeries_Beta = 0; - float isDeadEnd = 0; - float isLoop = 0; - float isInter = 0; - float isIntra = 0; - - double multiplier = 10; - if (searchType != "crosslink") - { - if (searchType == "top-down") - { - normalizationFactor = 1.0; - } - // count only terminal fragment ions - totalMatchingFragmentCount = (float)(Math.Round(psm.BioPolymersWithSetModsToMatchingFragments[selectedPeptide].Count(p => p.NeutralTheoreticalProduct.SecondaryProductType == null) / normalizationFactor * multiplier, 0)); - internalMatchingFragmentCount = (float)(Math.Round(psm.BioPolymersWithSetModsToMatchingFragments[selectedPeptide].Count(p => p.NeutralTheoreticalProduct.SecondaryProductType != null) / normalizationFactor * multiplier, 0)); - intensity = (float)Math.Min(50, Math.Round((psm.Score - (int)psm.Score) / normalizationFactor * Math.Pow(multiplier, 2), 0)); - chargeDifference = -Math.Abs(chargeStateMode - psm.ScanPrecursorCharge); - deltaScore = (float)Math.Round(psm.DeltaScore / normalizationFactor * multiplier, 0); - notch = notchToUse; - modCount = Math.Min((float)selectedPeptide.AllModsOneIsNterminus.Keys.Count(), 10); - if (psm.BioPolymersWithSetModsToMatchingFragments[selectedPeptide]?.Count() > 0) - { - absoluteFragmentMassError = (float)Math.Min(100.0, Math.Round(10.0 * Math.Abs(GetAverageFragmentMassError(psm.BioPolymersWithSetModsToMatchingFragments[selectedPeptide]) - fileSpecificMedianFragmentMassErrors[Path.GetFileName(psm.FullFilePath)]))); - } - - ambiguity = Math.Min((float)(psm.BioPolymersWithSetModsToMatchingFragments.Keys.Count - 1), 10); - longestSeq = (float)Math.Round(SpectralMatch.GetLongestIonSeriesBidirectional(psm.BioPolymersWithSetModsToMatchingFragments, selectedPeptide) / normalizationFactor * multiplier, 0); - complementaryIonCount = (float)Math.Round(SpectralMatch.GetCountComplementaryIons(psm.BioPolymersWithSetModsToMatchingFragments, selectedPeptide) / normalizationFactor * multiplier, 0); - isVariantPeptide = PeptideIsVariant(selectedPeptide); - spectralAngle = (float)psm.SpectralAngle; - if (chimeraCountDictionary.TryGetValue(psm.ChimeraIdString, out int val)) - chimeraCount = val; - peaksInPrecursorEnvelope = psm.PrecursorScanEnvelopePeakCount; - mostAbundantPrecursorPeakIntensity = (float)Math.Round((float)psm.PrecursorScanIntensity / normalizationFactor * multiplier, 0); - fractionalIntensity = (float)psm.PrecursorFractionalIntensity; - - if (PsmHasSpectralAngle(psm)) - { - hasSpectralAngle = 1; - } - - if (psm.DigestionParams.Protease.Name != "top-down") - { - missedCleavages = selectedPeptide.MissedCleavages; - bool fileIsCzeSeparationType = fileSpecificParameters.Any(p => Path.GetFileName(p.fileName) == Path.GetFileName(psm.FullFilePath) && p.fileSpecificParameters.SeparationType == "CZE"); - - if (!fileIsCzeSeparationType) - { - if (selectedPeptide.BaseSequence.Equals(selectedPeptide.FullSequence)) - { - hydrophobicityZscore = (float)Math.Round(GetSSRCalcHydrophobicityZScore(psm, selectedPeptide, timeDependantHydrophobicityAverageAndDeviation_unmodified) * 10.0, 0); - } - else - { - hydrophobicityZscore = (float)Math.Round(GetSSRCalcHydrophobicityZScore(psm, selectedPeptide, timeDependantHydrophobicityAverageAndDeviation_modified) * 10.0, 0); - } - } - else - { - hydrophobicityZscore = (float)Math.Round(GetMobilityZScore(psm, selectedPeptide) * 10.0, 0); - } - } - //this is not for actual crosslinks but for the byproducts of crosslink loop links, deadends, etc. - if (psm is CrosslinkSpectralMatch) - { - CrosslinkSpectralMatch csm = (CrosslinkSpectralMatch)psm; - isDeadEnd = Convert.ToSingle((csm.CrossType == PsmCrossType.DeadEnd) || (csm.CrossType == PsmCrossType.DeadEndH2O) || (csm.CrossType == PsmCrossType.DeadEndNH2) || (csm.CrossType == PsmCrossType.DeadEndTris)); - isLoop = Convert.ToSingle(csm.CrossType == PsmCrossType.Loop); - } - } - else - { - CrosslinkSpectralMatch csm = (CrosslinkSpectralMatch)psm; - PeptideWithSetModifications selectedAlphaPeptide = csm.BestMatchingBioPolymersWithSetMods.Select(p => p.Peptide as PeptideWithSetModifications).First(); - PeptideWithSetModifications selectedBetaPeptide = csm.BetaPeptide?.BestMatchingBioPolymersWithSetMods.Select(p => p.Peptide as PeptideWithSetModifications).First(); - - float alphaNormalizationFactor = selectedAlphaPeptide.BaseSequence.Length; - float betaNormalizationFactor = selectedBetaPeptide == null ? (float)0 : selectedBetaPeptide.BaseSequence.Length; - float totalNormalizationFactor = alphaNormalizationFactor + betaNormalizationFactor; - - totalMatchingFragmentCount = (float)Math.Round(csm.XLTotalScore / totalNormalizationFactor * 10, 0); - - //Compute fragment mass error - int alphaCount = 0; - float alphaError = 0; - if (csm.BioPolymersWithSetModsToMatchingFragments[selectedAlphaPeptide]?.Count > 0) - { - alphaCount = csm.BioPolymersWithSetModsToMatchingFragments[selectedAlphaPeptide].Count; - alphaError = Math.Abs(GetAverageFragmentMassError(csm.BioPolymersWithSetModsToMatchingFragments[selectedAlphaPeptide])); - } - int betaCount = 0; - float betaError = 0; - if (csm.BetaPeptide.BioPolymersWithSetModsToMatchingFragments[selectedBetaPeptide]?.Count > 0) - { - betaCount = csm.BetaPeptide.BioPolymersWithSetModsToMatchingFragments[selectedBetaPeptide].Count; - betaError = Math.Abs(GetAverageFragmentMassError(csm.BetaPeptide.BioPolymersWithSetModsToMatchingFragments[selectedBetaPeptide])); - } - - float averageError = 0; - if ((alphaCount + betaCount) > 0) - { - averageError = (alphaCount * alphaError + betaCount * betaError) / (alphaCount + betaCount); - } - - absoluteFragmentMassError = (float)Math.Min(100, Math.Round(averageError - fileSpecificMedianFragmentMassErrors[Path.GetFileName(csm.FullFilePath)] * 10.0, 0)); - //End compute fragment mass error - - deltaScore = (float)Math.Round(csm.DeltaScore / totalNormalizationFactor * 10.0, 0); - chargeDifference = -Math.Abs(chargeStateMode - psm.ScanPrecursorCharge); - alphaIntensity = (float)Math.Min(100, Math.Round((csm.Score - (int)csm.Score) / alphaNormalizationFactor * 100.0, 0)); - betaIntensity = csm.BetaPeptide == null ? (float)0 : (float)Math.Min(100.0, Math.Round((csm.BetaPeptide.Score - (int)csm.BetaPeptide.Score) / betaNormalizationFactor * 100.0, 0)); - longestFragmentIonSeries_Alpha = (float)Math.Round(SpectralMatch.GetLongestIonSeriesBidirectional(csm.BioPolymersWithSetModsToMatchingFragments, selectedAlphaPeptide) / alphaNormalizationFactor * 10.0, 0); - longestFragmentIonSeries_Beta = selectedBetaPeptide == null ? (float)0 : SpectralMatch.GetLongestIonSeriesBidirectional(csm.BetaPeptide.BioPolymersWithSetModsToMatchingFragments, selectedBetaPeptide) / betaNormalizationFactor; - longestFragmentIonSeries_Beta = (float)Math.Round(longestFragmentIonSeries_Beta * 10.0, 0); - isInter = Convert.ToSingle(csm.CrossType == PsmCrossType.Inter); - isIntra = Convert.ToSingle(csm.CrossType == PsmCrossType.Intra); - } - - psm.PsmData_forPEPandPercolator = new PsmData - { - TotalMatchingFragmentCount = totalMatchingFragmentCount, - Intensity = intensity, - PrecursorChargeDiffToMode = chargeDifference, - DeltaScore = deltaScore, - Notch = notch, - ModsCount = modCount, - AbsoluteAverageFragmentMassErrorFromMedian = absoluteFragmentMassError, - MissedCleavagesCount = missedCleavages, - Ambiguity = ambiguity, - LongestFragmentIonSeries = longestSeq, - ComplementaryIonCount = complementaryIonCount, - HydrophobicityZScore = hydrophobicityZscore, - IsVariantPeptide = Convert.ToSingle(isVariantPeptide), - - AlphaIntensity = alphaIntensity, - BetaIntensity = betaIntensity, - LongestFragmentIonSeries_Alpha = longestFragmentIonSeries_Alpha, - LongestFragmentIonSeries_Beta = longestFragmentIonSeries_Beta, - IsDeadEnd = isDeadEnd, - IsLoop = isLoop, - IsInter = isInter, - IsIntra = isIntra, - - Label = label, - - SpectralAngle = spectralAngle, - HasSpectralAngle = hasSpectralAngle, - PeaksInPrecursorEnvelope = peaksInPrecursorEnvelope, - ChimeraCount = chimeraCount, - MostAbundantPrecursorPeakIntensity = mostAbundantPrecursorPeakIntensity, - PrecursorFractionalIntensity = fractionalIntensity, - InternalIonCount = internalMatchingFragmentCount, - }; - - return psm.PsmData_forPEPandPercolator; - } - private static bool PeptideIsVariant(IBioPolymerWithSetMods bpwsm) { if (bpwsm is not PeptideWithSetModifications pwsm) @@ -1310,7 +1037,28 @@ public static float GetAverageFragmentMassError(IEnumerable return massErrors.Average(); } + public static float GetFraggerHyperScore(SpectralMatch psm, IBioPolymerWithSetMods selectedPeptide) + { + var peptideFragmentIons = psm.BioPolymersWithSetModsToMatchingFragments[selectedPeptide]; + var nIons = peptideFragmentIons.Where(f => f.NeutralTheoreticalProduct.Terminus == FragmentationTerminus.N).ToList(); + var cIons = peptideFragmentIons.Where(f => f.NeutralTheoreticalProduct.Terminus == FragmentationTerminus.C).ToList(); + double nIonIntensitySum = nIons.Sum(f => f.Intensity); + double cIonIntensitySum = cIons.Sum(f => f.Intensity); + float nIon = GetLog10Factorial((int)nIons.Count); + float cIon = GetLog10Factorial((int)cIons.Count); + + return (float)((nIon + cIon + Math.Log10(nIonIntensitySum * cIonIntensitySum))); + } + public static float GetLog10Factorial(int n) + { + double log10Factorial = 0.0; + for (int i = 1; i <= n; i++) + { + log10Factorial += Math.Log10(i); + } + return (float)log10Factorial; + } #endregion } } \ No newline at end of file From fa1657129a19bbec73242ed7e8f0aef7e0728df6 Mon Sep 17 00:00:00 2001 From: trishorts Date: Thu, 29 Aug 2024 10:46:49 -0500 Subject: [PATCH 04/25] lets see --- MetaMorpheus/EngineLayer/FdrAnalysis/PEPAnalysisEngine.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/MetaMorpheus/EngineLayer/FdrAnalysis/PEPAnalysisEngine.cs b/MetaMorpheus/EngineLayer/FdrAnalysis/PEPAnalysisEngine.cs index 2c618e47c..70e90f70a 100644 --- a/MetaMorpheus/EngineLayer/FdrAnalysis/PEPAnalysisEngine.cs +++ b/MetaMorpheus/EngineLayer/FdrAnalysis/PEPAnalysisEngine.cs @@ -490,9 +490,9 @@ public PsmData CreateOnePsmDataEntry(string searchType, SpectralMatch psm, IBioP normalizationFactor = 1.0; } // count only terminal fragment ions - totalMatchingFragmentCount = (float)(Math.Round(psm.BioPolymersWithSetModsToMatchingFragments[selectedPeptide].Count(p => p.NeutralTheoreticalProduct.SecondaryProductType == null) / normalizationFactor * multiplier, 0)); + totalMatchingFragmentCount = (float)psm.BioPolymersWithSetModsToMatchingFragments[selectedPeptide].Count(); internalMatchingFragmentCount = (float)(Math.Round(psm.BioPolymersWithSetModsToMatchingFragments[selectedPeptide].Count(p => p.NeutralTheoreticalProduct.SecondaryProductType != null) / normalizationFactor * multiplier, 0)); - intensity = (float)Math.Min(50, Math.Round((psm.Score - (int)psm.Score) / normalizationFactor * Math.Pow(multiplier, 2), 0)); + intensity = (float)(psm.Score - (int)psm.Score); chargeDifference = -Math.Abs(ChargeStateMode - psm.ScanPrecursorCharge); deltaScore = (float)Math.Round(psm.DeltaScore / normalizationFactor * multiplier, 0); notch = notchToUse; From 0c3f584e92272a922183805e808d8506f18fb573 Mon Sep 17 00:00:00 2001 From: trishorts Date: Tue, 3 Sep 2024 11:06:12 -0500 Subject: [PATCH 05/25] piu --- .../FdrAnalysis/PEPAnalysisEngine.cs | 23 ++++++++++++++----- 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/MetaMorpheus/EngineLayer/FdrAnalysis/PEPAnalysisEngine.cs b/MetaMorpheus/EngineLayer/FdrAnalysis/PEPAnalysisEngine.cs index 70e90f70a..b8a9bcefd 100644 --- a/MetaMorpheus/EngineLayer/FdrAnalysis/PEPAnalysisEngine.cs +++ b/MetaMorpheus/EngineLayer/FdrAnalysis/PEPAnalysisEngine.cs @@ -17,6 +17,7 @@ using Omics.Modifications; using Omics; using Easy.Common.Extensions; +using MathNet.Numerics.LinearAlgebra.Solvers; namespace EngineLayer { @@ -1042,15 +1043,25 @@ public static float GetFraggerHyperScore(SpectralMatch psm, IBioPolymerWithSetMo var peptideFragmentIons = psm.BioPolymersWithSetModsToMatchingFragments[selectedPeptide]; var nIons = peptideFragmentIons.Where(f => f.NeutralTheoreticalProduct.Terminus == FragmentationTerminus.N).ToList(); var cIons = peptideFragmentIons.Where(f => f.NeutralTheoreticalProduct.Terminus == FragmentationTerminus.C).ToList(); - double nIonIntensitySum = nIons.Sum(f => f.Intensity); - double cIonIntensitySum = cIons.Sum(f => f.Intensity); - float nIon = GetLog10Factorial((int)nIons.Count); - float cIon = GetLog10Factorial((int)cIons.Count); + float nIonIntensitySum = 0; + if(nIons.Any()) + { + nIons.Sum(f => f.Intensity); + } + float cIonIntensitySum = 0; + if (cIons.Any()) + { + cIons.Sum(f => f.Intensity); + } + float nIon = GetLog10Factorial((int)nIons.Count) ?? 0; + float cIon = GetLog10Factorial((int)cIons.Count) ?? 0; + var log10IntensitySum = Math.Log10(nIonIntensitySum * cIonIntensitySum); + log10IntensitySum = log10IntensitySum ?? log10IntensitySum ?? 0; - return (float)((nIon + cIon + Math.Log10(nIonIntensitySum * cIonIntensitySum))); + return (float)((nIon + cIon + log10IntensitySum)); } - public static float GetLog10Factorial(int n) + public static float? GetLog10Factorial(int n) { double log10Factorial = 0.0; for (int i = 1; i <= n; i++) From bf7b078bcb59be1984b85af828233a15532d9b34 Mon Sep 17 00:00:00 2001 From: trishorts Date: Mon, 9 Sep 2024 14:05:58 -0500 Subject: [PATCH 06/25] length specific hyperscore --- .../EngineLayer/FdrAnalysis/PEPAnalysisEngine.cs | 10 ++++++---- MetaMorpheus/EngineLayer/FdrAnalysis/PsmData.cs | 10 +++++----- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/MetaMorpheus/EngineLayer/FdrAnalysis/PEPAnalysisEngine.cs b/MetaMorpheus/EngineLayer/FdrAnalysis/PEPAnalysisEngine.cs index b8a9bcefd..cefb90a67 100644 --- a/MetaMorpheus/EngineLayer/FdrAnalysis/PEPAnalysisEngine.cs +++ b/MetaMorpheus/EngineLayer/FdrAnalysis/PEPAnalysisEngine.cs @@ -503,6 +503,7 @@ public PsmData CreateOnePsmDataEntry(string searchType, SpectralMatch psm, IBioP absoluteFragmentMassError = (float)Math.Min(100.0, Math.Round(10.0 * Math.Abs(GetAverageFragmentMassError(psm.BioPolymersWithSetModsToMatchingFragments[selectedPeptide]) - FileSpecificMedianFragmentMassErrors[Path.GetFileName(psm.FullFilePath)]))); } + ambiguity = Math.Min((float)(psm.BioPolymersWithSetModsToMatchingFragments.Keys.Count - 1), 10); //ambiguity = 10; // I'm pretty sure that you shouldn't train on ambiguity and its skewing the results longestSeq = (float)Math.Round(SpectralMatch.GetLongestIonSeriesBidirectional(psm.BioPolymersWithSetModsToMatchingFragments, selectedPeptide) / normalizationFactor * multiplier, 0); @@ -514,7 +515,8 @@ public PsmData CreateOnePsmDataEntry(string searchType, SpectralMatch psm, IBioP peaksInPrecursorEnvelope = psm.PrecursorScanEnvelopePeakCount; mostAbundantPrecursorPeakIntensity = (float)Math.Round((float)psm.PrecursorScanIntensity / normalizationFactor * multiplier, 0); fractionalIntensity = (float)psm.PrecursorFractionalIntensity; - fraggerHyperScore = GetFraggerHyperScore(psm, selectedPeptide); + float[] fragerHyperScoreByLength = new float[75]; + fragerHyperScoreByLength[selectedPeptide.BaseSequence.Length - 1] = GetFraggerHyperScore(psm, selectedPeptide); if (PsmHasSpectralAngle(psm)) { @@ -632,7 +634,7 @@ public PsmData CreateOnePsmDataEntry(string searchType, SpectralMatch psm, IBioP MostAbundantPrecursorPeakIntensity = mostAbundantPrecursorPeakIntensity, PrecursorFractionalIntensity = fractionalIntensity, InternalIonCount = internalMatchingFragmentCount, - FraggerHyperScore = fraggerHyperScore + FraggerHyperScorebyLength = fraggerHyperScore }; return psm.PsmData_forPEPandPercolator; @@ -1055,8 +1057,8 @@ public static float GetFraggerHyperScore(SpectralMatch psm, IBioPolymerWithSetMo } float nIon = GetLog10Factorial((int)nIons.Count) ?? 0; float cIon = GetLog10Factorial((int)cIons.Count) ?? 0; - var log10IntensitySum = Math.Log10(nIonIntensitySum * cIonIntensitySum); - log10IntensitySum = log10IntensitySum ?? log10IntensitySum ?? 0; + double log10IntensitySum = 0; + log10IntensitySum = Math.Log10(nIonIntensitySum * cIonIntensitySum); return (float)((nIon + cIon + log10IntensitySum)); } diff --git a/MetaMorpheus/EngineLayer/FdrAnalysis/PsmData.cs b/MetaMorpheus/EngineLayer/FdrAnalysis/PsmData.cs index acdc11b75..2124f9a6d 100644 --- a/MetaMorpheus/EngineLayer/FdrAnalysis/PsmData.cs +++ b/MetaMorpheus/EngineLayer/FdrAnalysis/PsmData.cs @@ -15,7 +15,7 @@ public class PsmData "TotalMatchingFragmentCount", "Intensity", "PrecursorChargeDiffToMode", "DeltaScore", "Notch", "ModsCount", "AbsoluteAverageFragmentMassErrorFromMedian", "MissedCleavagesCount", "Ambiguity", "LongestFragmentIonSeries", "ComplementaryIonCount", "HydrophobicityZScore", - "IsVariantPeptide", "IsDeadEnd", "IsLoop", "SpectralAngle", "HasSpectralAngle", "FraggerHyperScore" + "IsVariantPeptide", "IsDeadEnd", "IsLoop", "SpectralAngle", "HasSpectralAngle", "FraggerHyperScorebyLength" } }, @@ -26,7 +26,7 @@ public class PsmData "Notch", "ModsCount", "AbsoluteAverageFragmentMassErrorFromMedian", "Ambiguity", "LongestFragmentIonSeries", "ComplementaryIonCount", "SpectralAngle", "HasSpectralAngle", "PeaksInPrecursorEnvelope", "ChimeraCount", - "MostAbundantPrecursorPeakIntensity", "PrecursorFractionalIntensity", "InternalIonCount", "FraggerHyperScore" + "MostAbundantPrecursorPeakIntensity", "PrecursorFractionalIntensity", "InternalIonCount", "FraggerHyperScorebyLength" } }, @@ -75,7 +75,7 @@ public class PsmData { "MostAbundantPrecursorPeakIntensity", 1 }, { "PrecursorFractionalIntensity", 1 }, { "InternalIonCount", 1}, - { "FraggerHyperScore", 1}, + { "FraggerHyperScorebyLength", 1}, }.ToImmutableDictionary(); public string ToString(string searchType) @@ -181,7 +181,7 @@ public string ToString(string searchType) [LoadColumn(28)] public float InternalIonCount { get; set; } - [LoadColumn(29)] - public float FraggerHyperScore { get; set; } + [LoadColumn(29,103)] + public float FraggerHyperScorebyLength { get; set; } } } \ No newline at end of file From d08d03d5503529003812c27f38f2188c7965d934 Mon Sep 17 00:00:00 2001 From: trishorts Date: Mon, 9 Sep 2024 14:07:09 -0500 Subject: [PATCH 07/25] length 150 --- MetaMorpheus/EngineLayer/FdrAnalysis/PEPAnalysisEngine.cs | 2 +- MetaMorpheus/EngineLayer/FdrAnalysis/PsmData.cs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/MetaMorpheus/EngineLayer/FdrAnalysis/PEPAnalysisEngine.cs b/MetaMorpheus/EngineLayer/FdrAnalysis/PEPAnalysisEngine.cs index cefb90a67..19bc76d74 100644 --- a/MetaMorpheus/EngineLayer/FdrAnalysis/PEPAnalysisEngine.cs +++ b/MetaMorpheus/EngineLayer/FdrAnalysis/PEPAnalysisEngine.cs @@ -515,7 +515,7 @@ public PsmData CreateOnePsmDataEntry(string searchType, SpectralMatch psm, IBioP peaksInPrecursorEnvelope = psm.PrecursorScanEnvelopePeakCount; mostAbundantPrecursorPeakIntensity = (float)Math.Round((float)psm.PrecursorScanIntensity / normalizationFactor * multiplier, 0); fractionalIntensity = (float)psm.PrecursorFractionalIntensity; - float[] fragerHyperScoreByLength = new float[75]; + float[] fragerHyperScoreByLength = new float[150]; fragerHyperScoreByLength[selectedPeptide.BaseSequence.Length - 1] = GetFraggerHyperScore(psm, selectedPeptide); if (PsmHasSpectralAngle(psm)) diff --git a/MetaMorpheus/EngineLayer/FdrAnalysis/PsmData.cs b/MetaMorpheus/EngineLayer/FdrAnalysis/PsmData.cs index 2124f9a6d..3a6a57064 100644 --- a/MetaMorpheus/EngineLayer/FdrAnalysis/PsmData.cs +++ b/MetaMorpheus/EngineLayer/FdrAnalysis/PsmData.cs @@ -181,7 +181,7 @@ public string ToString(string searchType) [LoadColumn(28)] public float InternalIonCount { get; set; } - [LoadColumn(29,103)] + [LoadColumn(29,178)] public float FraggerHyperScorebyLength { get; set; } } } \ No newline at end of file From 7a8a3035c1126eb0b54315e12c58ccbdf01832d0 Mon Sep 17 00:00:00 2001 From: trishorts Date: Tue, 10 Sep 2024 10:43:58 -0500 Subject: [PATCH 08/25] sfad --- .../FdrAnalysis/PEPAnalysisEngine.cs | 39 ++++++++++++++----- .../EngineLayer/FdrAnalysis/PsmData.cs | 17 ++++---- 2 files changed, 38 insertions(+), 18 deletions(-) diff --git a/MetaMorpheus/EngineLayer/FdrAnalysis/PEPAnalysisEngine.cs b/MetaMorpheus/EngineLayer/FdrAnalysis/PEPAnalysisEngine.cs index 19bc76d74..910ec5780 100644 --- a/MetaMorpheus/EngineLayer/FdrAnalysis/PEPAnalysisEngine.cs +++ b/MetaMorpheus/EngineLayer/FdrAnalysis/PEPAnalysisEngine.cs @@ -465,7 +465,7 @@ public PsmData CreateOnePsmDataEntry(string searchType, SpectralMatch psm, IBioP float peaksInPrecursorEnvelope = 0; float mostAbundantPrecursorPeakIntensity = 0; float fractionalIntensity = 0; - float fraggerHyperScore = 0; + float[] fragerHyperScoreByLength = new float[150]; float missedCleavages = 0; float longestSeq = 0; @@ -515,7 +515,6 @@ public PsmData CreateOnePsmDataEntry(string searchType, SpectralMatch psm, IBioP peaksInPrecursorEnvelope = psm.PrecursorScanEnvelopePeakCount; mostAbundantPrecursorPeakIntensity = (float)Math.Round((float)psm.PrecursorScanIntensity / normalizationFactor * multiplier, 0); fractionalIntensity = (float)psm.PrecursorFractionalIntensity; - float[] fragerHyperScoreByLength = new float[150]; fragerHyperScoreByLength[selectedPeptide.BaseSequence.Length - 1] = GetFraggerHyperScore(psm, selectedPeptide); if (PsmHasSpectralAngle(psm)) @@ -634,7 +633,7 @@ public PsmData CreateOnePsmDataEntry(string searchType, SpectralMatch psm, IBioP MostAbundantPrecursorPeakIntensity = mostAbundantPrecursorPeakIntensity, PrecursorFractionalIntensity = fractionalIntensity, InternalIonCount = internalMatchingFragmentCount, - FraggerHyperScorebyLength = fraggerHyperScore + FraggerHyperScorebyLength = fragerHyperScoreByLength }; return psm.PsmData_forPEPandPercolator; @@ -1040,6 +1039,14 @@ public static float GetAverageFragmentMassError(IEnumerable return massErrors.Average(); } + /// + /// Taken from Nat. Methods.https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5409104/ + /// "MSFragger: ultrafast and comprehensive peptide identification in shotgun proteomics" + /// Andy T. Kong,1,2 Felipe V. Leprevost,2 Dmitry M. Avtonomov,2 Dattatreya Mellacheruvu,2 and Alexey I. Nesvizhskii1,2,* + /// + /// + /// + /// public static float GetFraggerHyperScore(SpectralMatch psm, IBioPolymerWithSetMods selectedPeptide) { var peptideFragmentIons = psm.BioPolymersWithSetModsToMatchingFragments[selectedPeptide]; @@ -1048,19 +1055,31 @@ public static float GetFraggerHyperScore(SpectralMatch psm, IBioPolymerWithSetMo float nIonIntensitySum = 0; if(nIons.Any()) { - nIons.Sum(f => f.Intensity); + nIonIntensitySum = (float)nIons.Sum(f => f.Intensity); } float cIonIntensitySum = 0; if (cIons.Any()) { - cIons.Sum(f => f.Intensity); + cIonIntensitySum = (float)cIons.Sum(f => f.Intensity); + } + float matched_n_IonCountFactorial = 0; + if(nIons.Count > 0) + { + matched_n_IonCountFactorial = GetLog10Factorial((int)nIons.Count).Value; + } + float matched_c_IonCountFactorial = 0; + if (nIons.Count > 0) + { + matched_c_IonCountFactorial = GetLog10Factorial((int)cIons.Count).Value; } - float nIon = GetLog10Factorial((int)nIons.Count) ?? 0; - float cIon = GetLog10Factorial((int)cIons.Count) ?? 0; - double log10IntensitySum = 0; - log10IntensitySum = Math.Log10(nIonIntensitySum * cIonIntensitySum); - return (float)((nIon + cIon + log10IntensitySum)); + double log10IntensitySum = 0.1; + if(nIonIntensitySum > 0 && cIonIntensitySum > 0) + { + log10IntensitySum = Math.Log10(nIonIntensitySum * cIonIntensitySum); + } + + return (float)((matched_n_IonCountFactorial + matched_c_IonCountFactorial + log10IntensitySum));; } public static float? GetLog10Factorial(int n) diff --git a/MetaMorpheus/EngineLayer/FdrAnalysis/PsmData.cs b/MetaMorpheus/EngineLayer/FdrAnalysis/PsmData.cs index 3a6a57064..d2719fee4 100644 --- a/MetaMorpheus/EngineLayer/FdrAnalysis/PsmData.cs +++ b/MetaMorpheus/EngineLayer/FdrAnalysis/PsmData.cs @@ -83,13 +83,13 @@ public string ToString(string searchType) StringBuilder sb = new StringBuilder(); var variablesToOutput = PsmData.trainingInfos[searchType]; - foreach (var variable in variablesToOutput) - { - var property = typeof(PsmData).GetProperty(variable).GetValue(this, null); - var floatValue = (float)property; - sb.Append("\t"); - sb.Append(floatValue.ToString()); - } + //foreach (var variable in variablesToOutput) + //{ + // var property = typeof(PsmData).GetProperty(variable).GetValue(this, null); + // var floatValue = (float)property; + // sb.Append("\t"); + // sb.Append(floatValue.ToString()); + //} return sb.ToString(); } @@ -182,6 +182,7 @@ public string ToString(string searchType) public float InternalIonCount { get; set; } [LoadColumn(29,178)] - public float FraggerHyperScorebyLength { get; set; } + [VectorType(150)] + public float[] FraggerHyperScorebyLength { get; set; } } } \ No newline at end of file From 79282d4aef9344e6a1c7dc9c5fa167bc5fedd81e Mon Sep 17 00:00:00 2001 From: trishorts Date: Tue, 10 Sep 2024 13:57:42 -0500 Subject: [PATCH 09/25] works --- .../FdrAnalysis/PEPAnalysisEngine.cs | 4 +-- .../EngineLayer/FdrAnalysis/PsmData.cs | 26 ++++++++++++++----- 2 files changed, 21 insertions(+), 9 deletions(-) diff --git a/MetaMorpheus/EngineLayer/FdrAnalysis/PEPAnalysisEngine.cs b/MetaMorpheus/EngineLayer/FdrAnalysis/PEPAnalysisEngine.cs index 910ec5780..1279dee51 100644 --- a/MetaMorpheus/EngineLayer/FdrAnalysis/PEPAnalysisEngine.cs +++ b/MetaMorpheus/EngineLayer/FdrAnalysis/PEPAnalysisEngine.cs @@ -109,7 +109,7 @@ public string ComputePEPValuesForAllPSMs() MLContext mlContext = new MLContext(); TransformerChain>>[] trainedModels = new TransformerChain>>[numGroups]; - var trainer = mlContext.BinaryClassification.Trainers.FastTree(labelColumnName: "Label", featureColumnName: "Features", numberOfTrees: 400); + var trainer = mlContext.BinaryClassification.Trainers.FastTree(labelColumnName: "Label", featureColumnName: "Features", numberOfTrees: 400, numberOfLeaves: 20, minimumExampleCountPerLeaf: 10); var pipeline = mlContext.Transforms.Concatenate("Features", TrainingVariables) .Append(trainer); @@ -1078,7 +1078,7 @@ public static float GetFraggerHyperScore(SpectralMatch psm, IBioPolymerWithSetMo { log10IntensitySum = Math.Log10(nIonIntensitySum * cIonIntensitySum); } - + return (float)((matched_n_IonCountFactorial + matched_c_IonCountFactorial + log10IntensitySum));; } diff --git a/MetaMorpheus/EngineLayer/FdrAnalysis/PsmData.cs b/MetaMorpheus/EngineLayer/FdrAnalysis/PsmData.cs index d2719fee4..b286327f5 100644 --- a/MetaMorpheus/EngineLayer/FdrAnalysis/PsmData.cs +++ b/MetaMorpheus/EngineLayer/FdrAnalysis/PsmData.cs @@ -83,13 +83,25 @@ public string ToString(string searchType) StringBuilder sb = new StringBuilder(); var variablesToOutput = PsmData.trainingInfos[searchType]; - //foreach (var variable in variablesToOutput) - //{ - // var property = typeof(PsmData).GetProperty(variable).GetValue(this, null); - // var floatValue = (float)property; - // sb.Append("\t"); - // sb.Append(floatValue.ToString()); - //} + foreach (var variable in variablesToOutput) + { + var property = typeof(PsmData).GetProperty(variable).GetValue(this, null); + if(property is float[]) + { + foreach (var value in (float[])property) + { + sb.Append("\t"); + sb.Append(value.ToString()); + } + continue; + } + else + { + var floatValue = (float)property; + sb.Append("\t"); + sb.Append(floatValue.ToString()); + } + } return sb.ToString(); } From 36f489f438e8c90f3824c51f6e3a514f56139df8 Mon Sep 17 00:00:00 2001 From: trishorts Date: Tue, 10 Sep 2024 14:49:34 -0500 Subject: [PATCH 10/25] correctly formated percolator output --- .../SearchTask/PostSearchAnalysisTask.cs | 43 ++++++++++++++++--- 1 file changed, 36 insertions(+), 7 deletions(-) diff --git a/MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs b/MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs index 69e73a402..46fb89539 100644 --- a/MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs +++ b/MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs @@ -1778,19 +1778,39 @@ private static void WritePsmsForPercolator(List psmList, string w { using (StreamWriter output = new StreamWriter(writtenFileForPercolator)) { - string searchType; + string searchType = "standard"; if (psmList.Where(p => p != null).Any() && psmList[0].DigestionParams.Protease.Name != null && psmList[0].DigestionParams.Protease.Name == "top-down") { searchType = "top-down"; } - else + + string header = "SpecId\tLabel\tScanNr"; + + header += "\tPeptide\tProteins"; + + StringBuilder sb = new StringBuilder(); + var variablesToOutput = PsmData.trainingInfos[searchType]; + + foreach (var variable in variablesToOutput) { - searchType = "standard"; + if (variable is "FraggerHyperScorebyLength") + { + for(int i = 0; i < 150; i++) + { + sb.Append("\t"); + sb.Append("Length " + (i + 1).ToString()); + } + continue; + } + else + { + sb.Append("\t"); + sb.Append(variable.ToString()); + } } - string header = "SpecId\tLabel\tScanNr\t"; - header += String.Join("\t", PsmData.trainingInfos[searchType]); - header += "\tPeptide\tProteins"; + header += sb.ToString(); + output.WriteLine(header); @@ -1799,6 +1819,15 @@ private static void WritePsmsForPercolator(List psmList, string w foreach (var headerVariable in PsmData.trainingInfos[searchType]) { + if(headerVariable is "FraggerHyperScorebyLength") + { + for (int i = 0; i < 150; i++) + { + directions.Append("\t"); + directions.Append(PsmData.assumedAttributeDirection[headerVariable]); + } + continue; + } directions.Append("\t"); directions.Append(PsmData.assumedAttributeDirection[headerVariable]); } @@ -1814,9 +1843,9 @@ private static void WritePsmsForPercolator(List psmList, string w output.Write(idNumber.ToString()); output.Write('\t' + (peptide.Peptide.Parent.IsDecoy ? -1 : 1).ToString()); output.Write('\t' + psm.ScanNumber.ToString()); - output.Write(psm.PsmData_forPEPandPercolator.ToString(searchType)); output.Write('\t' + (peptide.Peptide.PreviousResidue + "." + peptide.Peptide.FullSequence + "." + peptide.Peptide.NextResidue).ToString()); output.Write('\t' + (peptide.Peptide.Parent.Accession).ToString()); + output.Write(psm.PsmData_forPEPandPercolator.ToString(searchType)); output.WriteLine(); } idNumber++; From 719f33dcf6675d37c74bbf059db34488fcfba652 Mon Sep 17 00:00:00 2001 From: trishorts Date: Wed, 11 Sep 2024 10:41:42 -0500 Subject: [PATCH 11/25] adding unit tests --- MetaMorpheus/Test/FdrTest.cs | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/MetaMorpheus/Test/FdrTest.cs b/MetaMorpheus/Test/FdrTest.cs index 73bb1af82..975558b1c 100644 --- a/MetaMorpheus/Test/FdrTest.cs +++ b/MetaMorpheus/Test/FdrTest.cs @@ -791,5 +791,19 @@ public static void TestPsmData() string topDownToString = "\t0\t1\t2\t3\t4\t5\t6\t8\t9\t10\t21\t22\t23\t24\t25\t26\t27"; Assert.AreEqual(topDownToString, pd.ToString("top-down")); } + + [TestCase(5, 2.07918119f)] + [TestCase(0, 0.0f)] + [TestCase(-5, 0.0f)] + [Test] + public static void GetLog10Factorial_ReturnsCorrectValue(int n, float? expected) + { + // Act + float? result = PepAnalysisEngine.GetLog10Factorial(n); + + // Assert + Assert.AreEqual(expected, result); + } + } } \ No newline at end of file From 6b86df7d84924ffd29e8d2ffbc0d1f6227c9dcba Mon Sep 17 00:00:00 2001 From: trishorts Date: Wed, 11 Sep 2024 11:16:27 -0500 Subject: [PATCH 12/25] unit test 2 --- MetaMorpheus/Test/FdrTest.cs | 41 ++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/MetaMorpheus/Test/FdrTest.cs b/MetaMorpheus/Test/FdrTest.cs index 975558b1c..13c5e9cfc 100644 --- a/MetaMorpheus/Test/FdrTest.cs +++ b/MetaMorpheus/Test/FdrTest.cs @@ -24,6 +24,7 @@ using static iText.Svg.SvgConstants; using System.Reflection; using UsefulProteomicsDatabases.Generated; +using Easy.Common.Extensions; namespace Test { @@ -805,5 +806,45 @@ public static void GetLog10Factorial_ReturnsCorrectValue(int n, float? expected) Assert.AreEqual(expected, result); } + [Test] + public static void TestGetFraggerHyperScore() + { + MassDiffAcceptor searchModes = new DotMassDiffAcceptor(null, new List { 0, 1.0029 }, new PpmTolerance(5)); + + var p = new Protein("PEPTIDE", "accession"); + var d = p.Digest(new DigestionParams(), new List(), new List()).ToList(); + PeptideWithSetModifications pep = d.First(); + + CommonParameters commonParameters = new CommonParameters(); + + var digested = p.Digest(commonParameters.DigestionParams, new List(), new List()).ToList(); + + TestDataFile t = new TestDataFile(new List { pep}); + + MsDataScan mzLibScan1 = t.GetOneBasedScan(2); + Ms2ScanWithSpecificMass scan1 = new Ms2ScanWithSpecificMass(mzLibScan1, pep.MonoisotopicMass.ToMz(1), 1, null, new CommonParameters()); + + var peptideFragmentIons = new List + { + new MatchedFragmentIon(new Product(ProductType.b, FragmentationTerminus.N, 100, 1, 1, 0), 100, 100, 1), + new MatchedFragmentIon(new Product(ProductType.b, FragmentationTerminus.N, 200, 2, 2, 0), 200, 200, 2), + new MatchedFragmentIon(new Product(ProductType.b, FragmentationTerminus.N, 300, 3, 3, 0), 300, 300, 3), + new MatchedFragmentIon(new Product(ProductType.y, FragmentationTerminus.C, 100, 1, 1, 0), 100, 100, 1), + new MatchedFragmentIon(new Product(ProductType.y, FragmentationTerminus.C, 200, 2, 2, 0), 200, 200, 1), + new MatchedFragmentIon(new Product(ProductType.y, FragmentationTerminus.C, 300, 3, 3, 0), 300, 300, 1) + }; + + SpectralMatch psm1 = new PeptideSpectralMatch(pep, 0, 3, 0, scan1, commonParameters, peptideFragmentIons); + + psm1.ResolveAllAmbiguities(); + + // Act + + float hyperScore = PepAnalysisEngine.GetFraggerHyperScore(psm1, psm1.BestMatchingBioPolymersWithSetMods.First().Peptide); + + + // Assert + Assert.AreEqual(7.112605f, hyperScore, 0.000001f); + } } } \ No newline at end of file From ddd7460301c1df1f56fc9d01a09d6f393f9bf2bc Mon Sep 17 00:00:00 2001 From: trishorts Date: Wed, 11 Sep 2024 11:35:10 -0500 Subject: [PATCH 13/25] fix unit test 1 --- .../Test/PostSearchAnalysisTaskTests.cs | 36 +++++++++---------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/MetaMorpheus/Test/PostSearchAnalysisTaskTests.cs b/MetaMorpheus/Test/PostSearchAnalysisTaskTests.cs index f01117297..b0c96e575 100644 --- a/MetaMorpheus/Test/PostSearchAnalysisTaskTests.cs +++ b/MetaMorpheus/Test/PostSearchAnalysisTaskTests.cs @@ -78,27 +78,27 @@ public static void AllResultsAndResultsTxtContainsCorrectValues_PepQValue_Bottom string outputFolder = testCase.OutputDirectory; var allResultsFile = Path.Combine(outputFolder, "allResults.txt"); var allResults = File.ReadAllLines(allResultsFile); - Assert.AreEqual("All target PSMs with pep q-value <= 0.01: 382", allResults[10]); - Assert.AreEqual("All target peptides with pep q-value <= 0.01: 153", allResults[11]); - Assert.AreEqual("All target protein groups with q-value <= 0.01 (1% FDR): 140", allResults[12]); - Assert.AreEqual("TaGe_SA_A549_3_snip - Target PSMs with pep q-value <= 0.01: 190", allResults[14]); - Assert.AreEqual("TaGe_SA_A549_3_snip - Target peptides with pep q-value <= 0.01: 153", allResults[15]); - Assert.AreEqual("TaGe_SA_A549_3_snip - Target protein groups within 1 % FDR: 140", allResults[16]); - Assert.AreEqual("TaGe_SA_A549_3_snip_2 - Target PSMs with pep q-value <= 0.01: 190", allResults[18]); - Assert.AreEqual("TaGe_SA_A549_3_snip_2 - Target peptides with pep q-value <= 0.01: 153", allResults[19]); - Assert.AreEqual("TaGe_SA_A549_3_snip_2 - Target protein groups within 1 % FDR: 140", allResults[20]); + Assert.AreEqual("All target PSMs with pep q-value <= 0.01: 378", allResults[10]); + Assert.AreEqual("All target peptides with pep q-value <= 0.01: 150", allResults[11]); + Assert.AreEqual("All target protein groups with q-value <= 0.01 (1% FDR): 137", allResults[12]); + Assert.AreEqual("TaGe_SA_A549_3_snip - Target PSMs with pep q-value <= 0.01: 188", allResults[14]); + Assert.AreEqual("TaGe_SA_A549_3_snip - Target peptides with pep q-value <= 0.01: 150", allResults[15]); + Assert.AreEqual("TaGe_SA_A549_3_snip - Target protein groups within 1 % FDR: 137", allResults[16]); + Assert.AreEqual("TaGe_SA_A549_3_snip_2 - Target PSMs with pep q-value <= 0.01: 188", allResults[18]); + Assert.AreEqual("TaGe_SA_A549_3_snip_2 - Target peptides with pep q-value <= 0.01: 150", allResults[19]); + Assert.AreEqual("TaGe_SA_A549_3_snip_2 - Target protein groups within 1 % FDR: 137", allResults[20]); var resultsFile = Path.Combine(outputFolder, @"postSearchAnalysisTaskTestOutput\results.txt"); var results = File.ReadAllLines(resultsFile); - Assert.AreEqual("All target PSMs with pep q-value <= 0.01: 382", results[5]); - Assert.AreEqual("All target peptides with pep q-value <= 0.01: 153", results[6]); - Assert.AreEqual("All target protein groups with q-value <= 0.01 (1% FDR): 140", results[7]); - Assert.AreEqual("TaGe_SA_A549_3_snip - Target PSMs with pep q-value <= 0.01: 190", results[9]); - Assert.AreEqual("TaGe_SA_A549_3_snip - Target peptides with pep q-value <= 0.01: 153", results[10]); - Assert.AreEqual("TaGe_SA_A549_3_snip - Target protein groups within 1 % FDR: 140", results[11]); - Assert.AreEqual("TaGe_SA_A549_3_snip_2 - Target PSMs with pep q-value <= 0.01: 190", results[13]); - Assert.AreEqual("TaGe_SA_A549_3_snip_2 - Target peptides with pep q-value <= 0.01: 153", results[14]); - Assert.AreEqual("TaGe_SA_A549_3_snip_2 - Target protein groups within 1 % FDR: 140", results[15]); + Assert.AreEqual("All target PSMs with pep q-value <= 0.01: 378", results[5]); + Assert.AreEqual("All target peptides with pep q-value <= 0.01: 150", results[6]); + Assert.AreEqual("All target protein groups with q-value <= 0.01 (1% FDR): 137", results[7]); + Assert.AreEqual("TaGe_SA_A549_3_snip - Target PSMs with pep q-value <= 0.01: 188", results[9]); + Assert.AreEqual("TaGe_SA_A549_3_snip - Target peptides with pep q-value <= 0.01: 150", results[10]); + Assert.AreEqual("TaGe_SA_A549_3_snip - Target protein groups within 1 % FDR: 137", results[11]); + Assert.AreEqual("TaGe_SA_A549_3_snip_2 - Target PSMs with pep q-value <= 0.01: 188", results[13]); + Assert.AreEqual("TaGe_SA_A549_3_snip_2 - Target peptides with pep q-value <= 0.01: 150", results[14]); + Assert.AreEqual("TaGe_SA_A549_3_snip_2 - Target protein groups within 1 % FDR: 137", results[15]); } /// From 3782c5ee7471ac835f29eb3abf931be0fa7e3cac Mon Sep 17 00:00:00 2001 From: trishorts Date: Wed, 11 Sep 2024 11:37:42 -0500 Subject: [PATCH 14/25] fix unit test 2 --- MetaMorpheus/Test/FdrTest.cs | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/MetaMorpheus/Test/FdrTest.cs b/MetaMorpheus/Test/FdrTest.cs index 13c5e9cfc..a295134f6 100644 --- a/MetaMorpheus/Test/FdrTest.cs +++ b/MetaMorpheus/Test/FdrTest.cs @@ -716,7 +716,7 @@ public static void TestPsmData() "TotalMatchingFragmentCount", "Intensity", "PrecursorChargeDiffToMode", "DeltaScore", "Notch", "ModsCount", "AbsoluteAverageFragmentMassErrorFromMedian", "MissedCleavagesCount", "Ambiguity", "LongestFragmentIonSeries", "ComplementaryIonCount", "HydrophobicityZScore", "IsVariantPeptide", - "IsDeadEnd", "IsLoop", "SpectralAngle", "HasSpectralAngle" + "IsDeadEnd", "IsLoop", "SpectralAngle", "HasSpectralAngle", "FraggerHyperScorebyLength" }; Assert.AreEqual(expectedTrainingInfoStandard, trainingInfoStandard); @@ -727,7 +727,7 @@ public static void TestPsmData() "TotalMatchingFragmentCount", "Intensity", "PrecursorChargeDiffToMode", "DeltaScore", "Notch", "ModsCount", "AbsoluteAverageFragmentMassErrorFromMedian", "Ambiguity", "LongestFragmentIonSeries", "ComplementaryIonCount", "SpectralAngle", "HasSpectralAngle", "PeaksInPrecursorEnvelope", - "ChimeraCount", "MostAbundantPrecursorPeakIntensity", "PrecursorFractionalIntensity", "InternalIonCount" + "ChimeraCount", "MostAbundantPrecursorPeakIntensity", "PrecursorFractionalIntensity", "InternalIonCount", "FraggerHyperScorebyLength" }; Assert.AreEqual(expectedTrainingInfoTopDown, trainingInfoTopDown); @@ -736,7 +736,7 @@ public static void TestPsmData() "TotalMatchingFragmentCount", "Intensity", "PrecursorChargeDiffToMode", "DeltaScore", "LongestFragmentIonSeries", "ComplementaryIonCount", "AlphaIntensity", "BetaIntensity", "LongestFragmentIonSeries_Alpha", "LongestFragmentIonSeries_Beta", "PeaksInPrecursorEnvelope", - "MostAbundantPrecursorPeakIntensity", "PrecursorFractionalIntensity", "InternalIonCount" + "MostAbundantPrecursorPeakIntensity", "PrecursorFractionalIntensity", "InternalIonCount", "FraggerHyperScorebyLength" }; List negativeAttributes = new List { @@ -784,12 +784,13 @@ public static void TestPsmData() MostAbundantPrecursorPeakIntensity = 25, PrecursorFractionalIntensity = 26, InternalIonCount = 27, + FraggerHyperScorebyLength = new float[] { 28 } }; - string standardToString = "\t0\t1\t2\t3\t4\t5\t6\t7\t8\t9\t10\t11\t12\t17\t18\t21\t22"; + string standardToString = "\t0\t1\t2\t3\t4\t5\t6\t7\t8\t9\t10\t11\t12\t17\t18\t21\t22\t28"; Assert.AreEqual(standardToString, pd.ToString("standard")); - string topDownToString = "\t0\t1\t2\t3\t4\t5\t6\t8\t9\t10\t21\t22\t23\t24\t25\t26\t27"; + string topDownToString = "\t0\t1\t2\t3\t4\t5\t6\t8\t9\t10\t21\t22\t23\t24\t25\t26\t27\t28"; Assert.AreEqual(topDownToString, pd.ToString("top-down")); } From 983f54cf492f43cdb0afc992a03ee55ca73b7a2d Mon Sep 17 00:00:00 2001 From: trishorts Date: Wed, 11 Sep 2024 11:45:28 -0500 Subject: [PATCH 15/25] fix unit test for psm intensity in psmdata we don't have normalization any more --- MetaMorpheus/Test/FdrTest.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/MetaMorpheus/Test/FdrTest.cs b/MetaMorpheus/Test/FdrTest.cs index a295134f6..89278db8d 100644 --- a/MetaMorpheus/Test/FdrTest.cs +++ b/MetaMorpheus/Test/FdrTest.cs @@ -248,7 +248,7 @@ public static void TestComputePEPValue() double normalizationFactor = (double)pwsm.BaseSequence.Length; float maxPsmDeltaScore = (float)Math.Round(maxScorePsm.DeltaScore / normalizationFactor * 10.0, 0); Assert.That(maxPsmDeltaScore, Is.EqualTo(maxPsmData.DeltaScore).Within(0.05)); - float maxPsmIntensity = Math.Min(50, (float)Math.Round((maxScorePsm.Score - (int)maxScorePsm.Score) / normalizationFactor * 100.0, 0)); + float maxPsmIntensity = (float)(maxScorePsm.Score - (int)maxScorePsm.Score); Assert.That(maxPsmIntensity, Is.EqualTo(maxPsmData.Intensity).Within(0.05)); Assert.That(maxPsmData.HydrophobicityZScore, Is.EqualTo(52.0).Within(0.05)); Assert.That(maxScorePsm.BestMatchingBioPolymersWithSetMods.Select(p => p.Peptide).First().MissedCleavages, Is.EqualTo(maxPsmData.MissedCleavagesCount)); @@ -480,7 +480,7 @@ public static void TestComputePEPValueTopDown() double normalizationFactor = 1; float maxPsmDeltaScore = (float)Math.Round(maxScorePsm.DeltaScore / normalizationFactor * 10.0, 0); Assert.That(maxPsmDeltaScore, Is.EqualTo(maxPsmData.DeltaScore).Within(0.05)); - float maxPsmIntensity = (float)Math.Min(50, Math.Round((maxScorePsm.Score - (int)maxScorePsm.Score) / normalizationFactor * 100.0, 0)); + float maxPsmIntensity = (float)(maxScorePsm.Score - (int)maxScorePsm.Score); Assert.That(maxPsmIntensity, Is.EqualTo(maxPsmData.Intensity).Within(0.05)); Assert.AreEqual(maxPsmData.HydrophobicityZScore, float.NaN); Assert.That(maxScorePsm.BestMatchingBioPolymersWithSetMods.Select(p => p.Peptide).First().MissedCleavages, Is.EqualTo(maxPsmData.MissedCleavagesCount)); From 8eb52e0c4b90d4e6c62664e90f09b54da769b7b8 Mon Sep 17 00:00:00 2001 From: trishorts Date: Wed, 11 Sep 2024 11:51:23 -0500 Subject: [PATCH 16/25] fix last unit test --- MetaMorpheus/Test/XLTest.cs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/MetaMorpheus/Test/XLTest.cs b/MetaMorpheus/Test/XLTest.cs index 5de9a1e65..ee931fcd8 100644 --- a/MetaMorpheus/Test/XLTest.cs +++ b/MetaMorpheus/Test/XLTest.cs @@ -726,7 +726,7 @@ public static void XlTest_MoreComprehensive() Assert.That(singleCsmPsmData.ComplementaryIonCount, Is.EqualTo(2).Within(0.1)); Assert.That(singleCsmPsmData.DeltaScore, Is.EqualTo(8).Within(0.1)); Assert.That(singleCsmPsmData.HydrophobicityZScore, Is.EqualTo(5).Within(0.1)); - Assert.That(singleCsmPsmData.Intensity, Is.EqualTo(0).Within(0.1)); + Assert.That(singleCsmPsmData.Intensity, Is.EqualTo(0.168412149f).Within(0.1)); Assert.AreEqual(singleCsmPsmData.IsDeadEnd, 0); Assert.AreEqual(singleCsmPsmData.IsInter, 0); Assert.AreEqual(singleCsmPsmData.IsIntra, 0); @@ -740,7 +740,7 @@ public static void XlTest_MoreComprehensive() Assert.AreEqual(singleCsmPsmData.ModsCount, 1); Assert.AreEqual(singleCsmPsmData.Notch, 0); Assert.AreEqual(singleCsmPsmData.PrecursorChargeDiffToMode, -1); - Assert.That(singleCsmPsmData.TotalMatchingFragmentCount, Is.EqualTo(8).Within(0.1)); + Assert.That(singleCsmPsmData.TotalMatchingFragmentCount, Is.EqualTo(30).Within(0.1)); CrosslinkSpectralMatch loopCsm = firstCsmsFromListsOfCsms.Where(c => c.CrossType == PsmCrossType.Loop).OrderBy(c => -c.Score).First(); var loopCsmPsmData = pepEngine.CreateOnePsmDataEntry("standard", loopCsm, loopCsm.BestMatchingBioPolymersWithSetMods.First().Peptide, loopCsm.BestMatchingBioPolymersWithSetMods.First().Notch, !loopCsm.BestMatchingBioPolymersWithSetMods.First().Peptide.Parent.IsDecoy); Assert.That(loopCsmPsmData.AbsoluteAverageFragmentMassErrorFromMedian, Is.EqualTo(6).Within(0.1)); @@ -750,7 +750,7 @@ public static void XlTest_MoreComprehensive() Assert.That(loopCsmPsmData.ComplementaryIonCount, Is.EqualTo(3).Within(0.1)); Assert.That(loopCsmPsmData.DeltaScore, Is.EqualTo(8).Within(0.1)); Assert.That(loopCsmPsmData.HydrophobicityZScore, Is.EqualTo(1).Within(0.1)); - Assert.That(loopCsmPsmData.Intensity, Is.EqualTo(1).Within(0.1)); + Assert.That(loopCsmPsmData.Intensity, Is.EqualTo(0.205627412f).Within(0.1)); Assert.AreEqual(loopCsmPsmData.IsDeadEnd, 0); Assert.AreEqual(loopCsmPsmData.IsInter, 0); Assert.AreEqual(loopCsmPsmData.IsIntra, 0); @@ -764,7 +764,7 @@ public static void XlTest_MoreComprehensive() Assert.AreEqual(loopCsmPsmData.ModsCount, 2); Assert.AreEqual(loopCsmPsmData.Notch, 0); Assert.AreEqual(loopCsmPsmData.PrecursorChargeDiffToMode, -1); - Assert.That(loopCsmPsmData.TotalMatchingFragmentCount, Is.EqualTo(8).Within(0.1)); + Assert.That(loopCsmPsmData.TotalMatchingFragmentCount, Is.EqualTo(22).Within(0.1)); unnasignedCrossType = 0; inter = 0; From fa56b881efa37d92f44420b9cad4ae260d17b1ef Mon Sep 17 00:00:00 2001 From: trishorts Date: Wed, 11 Sep 2024 12:59:40 -0500 Subject: [PATCH 17/25] fixed percolator output second row header with default direction --- MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs b/MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs index 46fb89539..440ad37c6 100644 --- a/MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs +++ b/MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs @@ -1815,7 +1815,7 @@ private static void WritePsmsForPercolator(List psmList, string w output.WriteLine(header); StringBuilder directions = new StringBuilder(); - directions.Append("DefaultDirection\t-\t-"); + directions.Append("DefaultDirection\t-\t-\t\t"); foreach (var headerVariable in PsmData.trainingInfos[searchType]) { From 4c03b28ebd89c216c2520ef2b406da0cadd8baa4 Mon Sep 17 00:00:00 2001 From: trishorts Date: Wed, 11 Sep 2024 13:01:28 -0500 Subject: [PATCH 18/25] remove space between Length and the number in the percolator header --- MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs b/MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs index 440ad37c6..441f90137 100644 --- a/MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs +++ b/MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs @@ -1798,7 +1798,7 @@ private static void WritePsmsForPercolator(List psmList, string w for(int i = 0; i < 150; i++) { sb.Append("\t"); - sb.Append("Length " + (i + 1).ToString()); + sb.Append("Length" + (i + 1).ToString()); } continue; } From ba178bfd22c1619815cbf3675e83cd182d748a54 Mon Sep 17 00:00:00 2001 From: trishorts Date: Fri, 13 Sep 2024 11:13:37 -0500 Subject: [PATCH 19/25] correct formatting --- .../FdrAnalysis/PEPAnalysisEngine.cs | 4 +- .../EngineLayer/FdrAnalysis/PsmData.cs | 12 ++-- .../SearchTask/PostSearchAnalysisTask.cs | 65 +++++++------------ 3 files changed, 33 insertions(+), 48 deletions(-) diff --git a/MetaMorpheus/EngineLayer/FdrAnalysis/PEPAnalysisEngine.cs b/MetaMorpheus/EngineLayer/FdrAnalysis/PEPAnalysisEngine.cs index 1279dee51..7ca0a3c24 100644 --- a/MetaMorpheus/EngineLayer/FdrAnalysis/PEPAnalysisEngine.cs +++ b/MetaMorpheus/EngineLayer/FdrAnalysis/PEPAnalysisEngine.cs @@ -465,7 +465,7 @@ public PsmData CreateOnePsmDataEntry(string searchType, SpectralMatch psm, IBioP float peaksInPrecursorEnvelope = 0; float mostAbundantPrecursorPeakIntensity = 0; float fractionalIntensity = 0; - float[] fragerHyperScoreByLength = new float[150]; + float fragerHyperScoreByLength = 0; float missedCleavages = 0; float longestSeq = 0; @@ -515,7 +515,7 @@ public PsmData CreateOnePsmDataEntry(string searchType, SpectralMatch psm, IBioP peaksInPrecursorEnvelope = psm.PrecursorScanEnvelopePeakCount; mostAbundantPrecursorPeakIntensity = (float)Math.Round((float)psm.PrecursorScanIntensity / normalizationFactor * multiplier, 0); fractionalIntensity = (float)psm.PrecursorFractionalIntensity; - fragerHyperScoreByLength[selectedPeptide.BaseSequence.Length - 1] = GetFraggerHyperScore(psm, selectedPeptide); + fragerHyperScoreByLength = GetFraggerHyperScore(psm, selectedPeptide); if (PsmHasSpectralAngle(psm)) { diff --git a/MetaMorpheus/EngineLayer/FdrAnalysis/PsmData.cs b/MetaMorpheus/EngineLayer/FdrAnalysis/PsmData.cs index b286327f5..dc1c51149 100644 --- a/MetaMorpheus/EngineLayer/FdrAnalysis/PsmData.cs +++ b/MetaMorpheus/EngineLayer/FdrAnalysis/PsmData.cs @@ -15,7 +15,7 @@ public class PsmData "TotalMatchingFragmentCount", "Intensity", "PrecursorChargeDiffToMode", "DeltaScore", "Notch", "ModsCount", "AbsoluteAverageFragmentMassErrorFromMedian", "MissedCleavagesCount", "Ambiguity", "LongestFragmentIonSeries", "ComplementaryIonCount", "HydrophobicityZScore", - "IsVariantPeptide", "IsDeadEnd", "IsLoop", "SpectralAngle", "HasSpectralAngle", "FraggerHyperScorebyLength" + "IsVariantPeptide", "SpectralAngle", "HasSpectralAngle", "FraggerHyperScorebyLength" } }, @@ -106,6 +106,11 @@ public string ToString(string searchType) return sb.ToString(); } + //If you need to add an array instead of just one feature you can use the following code + //[LoadColumn(29, 103)] + //[VectorType(75)] + //public float[] FraggerHyperScorebyLength { get; set; } + [LoadColumn(0)] public float Intensity { get; set; } @@ -193,8 +198,7 @@ public string ToString(string searchType) [LoadColumn(28)] public float InternalIonCount { get; set; } - [LoadColumn(29,178)] - [VectorType(150)] - public float[] FraggerHyperScorebyLength { get; set; } + [LoadColumn(29)] + public float FraggerHyperScorebyLength { get; set; } } } \ No newline at end of file diff --git a/MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs b/MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs index 441f90137..d1e934972 100644 --- a/MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs +++ b/MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs @@ -1784,68 +1784,49 @@ private static void WritePsmsForPercolator(List psmList, string w searchType = "top-down"; } + //header string header = "SpecId\tLabel\tScanNr"; - header += "\tPeptide\tProteins"; - StringBuilder sb = new StringBuilder(); var variablesToOutput = PsmData.trainingInfos[searchType]; foreach (var variable in variablesToOutput) { - if (variable is "FraggerHyperScorebyLength") - { - for(int i = 0; i < 150; i++) - { - sb.Append("\t"); - sb.Append("Length" + (i + 1).ToString()); - } - continue; - } - else - { - sb.Append("\t"); - sb.Append(variable.ToString()); - } + sb.Append("\t"); + sb.Append(variable.ToString()); } - header += sb.ToString(); - - + header += "\tPeptide\tProteins"; output.WriteLine(header); - StringBuilder directions = new StringBuilder(); - directions.Append("DefaultDirection\t-\t-\t\t"); + //direction + string direction = "DefaultDirection\t\t"; - foreach (var headerVariable in PsmData.trainingInfos[searchType]) + sb = new StringBuilder(); + variablesToOutput = PsmData.trainingInfos[searchType]; + + foreach (var variable in variablesToOutput) { - if(headerVariable is "FraggerHyperScorebyLength") - { - for (int i = 0; i < 150; i++) - { - directions.Append("\t"); - directions.Append(PsmData.assumedAttributeDirection[headerVariable]); - } - continue; - } - directions.Append("\t"); - directions.Append(PsmData.assumedAttributeDirection[headerVariable]); + sb.Append("\t"); + sb.Append(PsmData.assumedAttributeDirection[variable]); } + direction += sb.ToString(); + direction += "\t\t"; + output.WriteLine(direction); - output.WriteLine(directions.ToString()); - + //psmdata lines int idNumber = 0; - psmList.OrderByDescending(p => p.Score); + psmList.OrderByDescending(p => p); foreach (SpectralMatch psm in psmList.Where(p => p.PsmData_forPEPandPercolator != null)) { foreach (var peptide in psm.BestMatchingBioPolymersWithSetMods) { - output.Write(idNumber.ToString()); - output.Write('\t' + (peptide.Peptide.Parent.IsDecoy ? -1 : 1).ToString()); - output.Write('\t' + psm.ScanNumber.ToString()); - output.Write('\t' + (peptide.Peptide.PreviousResidue + "." + peptide.Peptide.FullSequence + "." + peptide.Peptide.NextResidue).ToString()); - output.Write('\t' + (peptide.Peptide.Parent.Accession).ToString()); - output.Write(psm.PsmData_forPEPandPercolator.ToString(searchType)); + output.Write(idNumber.ToString()); //id number + output.Write('\t' + (peptide.Peptide.Parent.IsDecoy ? -1 : 1).ToString()); //label + output.Write('\t' + psm.ScanNumber.ToString()); //scan number + output.Write(psm.PsmData_forPEPandPercolator.ToString(searchType));//psmdata + output.Write('\t' + (peptide.Peptide.PreviousResidue + "." + peptide.Peptide.FullSequence + "." + peptide.Peptide.NextResidue).ToString());//peptide + output.Write('\t' + (peptide.Peptide.Parent.Accession).ToString());//proteins output.WriteLine(); } idNumber++; From 37a32b900db2bc13700e21822fcba0ecea9471d2 Mon Sep 17 00:00:00 2001 From: trishorts Date: Fri, 13 Sep 2024 11:25:58 -0500 Subject: [PATCH 20/25] fixed unit tests --- MetaMorpheus/Test/FdrTest.cs | 8 +++---- .../Test/PostSearchAnalysisTaskTests.cs | 24 +++++++++---------- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/MetaMorpheus/Test/FdrTest.cs b/MetaMorpheus/Test/FdrTest.cs index 89278db8d..e59c4d0c9 100644 --- a/MetaMorpheus/Test/FdrTest.cs +++ b/MetaMorpheus/Test/FdrTest.cs @@ -716,7 +716,7 @@ public static void TestPsmData() "TotalMatchingFragmentCount", "Intensity", "PrecursorChargeDiffToMode", "DeltaScore", "Notch", "ModsCount", "AbsoluteAverageFragmentMassErrorFromMedian", "MissedCleavagesCount", "Ambiguity", "LongestFragmentIonSeries", "ComplementaryIonCount", "HydrophobicityZScore", "IsVariantPeptide", - "IsDeadEnd", "IsLoop", "SpectralAngle", "HasSpectralAngle", "FraggerHyperScorebyLength" + "SpectralAngle", "HasSpectralAngle", "FraggerHyperScorebyLength" }; Assert.AreEqual(expectedTrainingInfoStandard, trainingInfoStandard); @@ -784,13 +784,13 @@ public static void TestPsmData() MostAbundantPrecursorPeakIntensity = 25, PrecursorFractionalIntensity = 26, InternalIonCount = 27, - FraggerHyperScorebyLength = new float[] { 28 } + FraggerHyperScorebyLength = 29 }; - string standardToString = "\t0\t1\t2\t3\t4\t5\t6\t7\t8\t9\t10\t11\t12\t17\t18\t21\t22\t28"; + string standardToString = "\t0\t1\t2\t3\t4\t5\t6\t7\t8\t9\t10\t11\t12\t21\t22\t29"; Assert.AreEqual(standardToString, pd.ToString("standard")); - string topDownToString = "\t0\t1\t2\t3\t4\t5\t6\t8\t9\t10\t21\t22\t23\t24\t25\t26\t27\t28"; + string topDownToString = "\t0\t1\t2\t3\t4\t5\t6\t8\t9\t10\t21\t22\t23\t24\t25\t26\t27\t29"; Assert.AreEqual(topDownToString, pd.ToString("top-down")); } diff --git a/MetaMorpheus/Test/PostSearchAnalysisTaskTests.cs b/MetaMorpheus/Test/PostSearchAnalysisTaskTests.cs index b0c96e575..84468da22 100644 --- a/MetaMorpheus/Test/PostSearchAnalysisTaskTests.cs +++ b/MetaMorpheus/Test/PostSearchAnalysisTaskTests.cs @@ -78,26 +78,26 @@ public static void AllResultsAndResultsTxtContainsCorrectValues_PepQValue_Bottom string outputFolder = testCase.OutputDirectory; var allResultsFile = Path.Combine(outputFolder, "allResults.txt"); var allResults = File.ReadAllLines(allResultsFile); - Assert.AreEqual("All target PSMs with pep q-value <= 0.01: 378", allResults[10]); - Assert.AreEqual("All target peptides with pep q-value <= 0.01: 150", allResults[11]); + Assert.AreEqual("All target PSMs with pep q-value <= 0.01: 376", allResults[10]); + Assert.AreEqual("All target peptides with pep q-value <= 0.01: 149", allResults[11]); Assert.AreEqual("All target protein groups with q-value <= 0.01 (1% FDR): 137", allResults[12]); - Assert.AreEqual("TaGe_SA_A549_3_snip - Target PSMs with pep q-value <= 0.01: 188", allResults[14]); - Assert.AreEqual("TaGe_SA_A549_3_snip - Target peptides with pep q-value <= 0.01: 150", allResults[15]); + Assert.AreEqual("TaGe_SA_A549_3_snip - Target PSMs with pep q-value <= 0.01: 187", allResults[14]); + Assert.AreEqual("TaGe_SA_A549_3_snip - Target peptides with pep q-value <= 0.01: 149", allResults[15]); Assert.AreEqual("TaGe_SA_A549_3_snip - Target protein groups within 1 % FDR: 137", allResults[16]); - Assert.AreEqual("TaGe_SA_A549_3_snip_2 - Target PSMs with pep q-value <= 0.01: 188", allResults[18]); - Assert.AreEqual("TaGe_SA_A549_3_snip_2 - Target peptides with pep q-value <= 0.01: 150", allResults[19]); + Assert.AreEqual("TaGe_SA_A549_3_snip_2 - Target PSMs with pep q-value <= 0.01: 187", allResults[18]); + Assert.AreEqual("TaGe_SA_A549_3_snip_2 - Target peptides with pep q-value <= 0.01: 149", allResults[19]); Assert.AreEqual("TaGe_SA_A549_3_snip_2 - Target protein groups within 1 % FDR: 137", allResults[20]); var resultsFile = Path.Combine(outputFolder, @"postSearchAnalysisTaskTestOutput\results.txt"); var results = File.ReadAllLines(resultsFile); - Assert.AreEqual("All target PSMs with pep q-value <= 0.01: 378", results[5]); - Assert.AreEqual("All target peptides with pep q-value <= 0.01: 150", results[6]); + Assert.AreEqual("All target PSMs with pep q-value <= 0.01: 376", results[5]); + Assert.AreEqual("All target peptides with pep q-value <= 0.01: 149", results[6]); Assert.AreEqual("All target protein groups with q-value <= 0.01 (1% FDR): 137", results[7]); - Assert.AreEqual("TaGe_SA_A549_3_snip - Target PSMs with pep q-value <= 0.01: 188", results[9]); - Assert.AreEqual("TaGe_SA_A549_3_snip - Target peptides with pep q-value <= 0.01: 150", results[10]); + Assert.AreEqual("TaGe_SA_A549_3_snip - Target PSMs with pep q-value <= 0.01: 187", results[9]); + Assert.AreEqual("TaGe_SA_A549_3_snip - Target peptides with pep q-value <= 0.01: 149", results[10]); Assert.AreEqual("TaGe_SA_A549_3_snip - Target protein groups within 1 % FDR: 137", results[11]); - Assert.AreEqual("TaGe_SA_A549_3_snip_2 - Target PSMs with pep q-value <= 0.01: 188", results[13]); - Assert.AreEqual("TaGe_SA_A549_3_snip_2 - Target peptides with pep q-value <= 0.01: 150", results[14]); + Assert.AreEqual("TaGe_SA_A549_3_snip_2 - Target PSMs with pep q-value <= 0.01: 187", results[13]); + Assert.AreEqual("TaGe_SA_A549_3_snip_2 - Target peptides with pep q-value <= 0.01: 149", results[14]); Assert.AreEqual("TaGe_SA_A549_3_snip_2 - Target protein groups within 1 % FDR: 137", results[15]); } From f200d02763221688b90675a6c0b0ea24383b2e21 Mon Sep 17 00:00:00 2001 From: trishorts Date: Fri, 13 Sep 2024 13:29:56 -0500 Subject: [PATCH 21/25] fix psmdata tostring --- .../EngineLayer/FdrAnalysis/PsmData.cs | 18 +++--------------- 1 file changed, 3 insertions(+), 15 deletions(-) diff --git a/MetaMorpheus/EngineLayer/FdrAnalysis/PsmData.cs b/MetaMorpheus/EngineLayer/FdrAnalysis/PsmData.cs index dc1c51149..e6bf3e2aa 100644 --- a/MetaMorpheus/EngineLayer/FdrAnalysis/PsmData.cs +++ b/MetaMorpheus/EngineLayer/FdrAnalysis/PsmData.cs @@ -86,21 +86,9 @@ public string ToString(string searchType) foreach (var variable in variablesToOutput) { var property = typeof(PsmData).GetProperty(variable).GetValue(this, null); - if(property is float[]) - { - foreach (var value in (float[])property) - { - sb.Append("\t"); - sb.Append(value.ToString()); - } - continue; - } - else - { - var floatValue = (float)property; - sb.Append("\t"); - sb.Append(floatValue.ToString()); - } + var floatValue = (float)property; + sb.Append("\t"); + sb.Append(floatValue.ToString()); } return sb.ToString(); From aa7ea4ae915e5a48c9824b50fe8c1d3cfd0a1bc2 Mon Sep 17 00:00:00 2001 From: trishorts Date: Wed, 13 Nov 2024 10:53:06 -0600 Subject: [PATCH 22/25] add xcorr to PEP --- .../FdrAnalysis/PEPAnalysisEngine.cs | 47 +++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/MetaMorpheus/EngineLayer/FdrAnalysis/PEPAnalysisEngine.cs b/MetaMorpheus/EngineLayer/FdrAnalysis/PEPAnalysisEngine.cs index 86c389116..6fb7e3d24 100644 --- a/MetaMorpheus/EngineLayer/FdrAnalysis/PEPAnalysisEngine.cs +++ b/MetaMorpheus/EngineLayer/FdrAnalysis/PEPAnalysisEngine.cs @@ -1102,6 +1102,53 @@ public static float GetFraggerHyperScore(SpectralMatch psm, IBioPolymerWithSetMo return (float)((matched_n_IonCountFactorial + matched_c_IonCountFactorial + log10IntensitySum));; } + /// + /// https://willfondrie.com/2019/02/an-intuitive-look-at-the-xcorr-score-function-in-proteomics/ + /// + /// A mass spectrum can be preprocessed by subtracting the mean intensities at all of the offsets. + /// Then a single dot product between the preprocessed mass spectrum and the theoretical peptide + /// mass spectrum yields the xcorr score, which is made possible because of the distributive + /// property of the dot product. + /// + /// Since we have already chosen the match for this scan, we can use the matched ions to calculate the + /// xcorr and skip the dot product step. + /// + /// + /// + /// + /// + public static float Xcorr(SpectralMatch psm, IBioPolymerWithSetMods selectedPeptide) + { + double xcorr = 0; + var xArray = psm.MsDataScan.MassSpectrum.XArray; + var yArray = psm.MsDataScan.MassSpectrum.YArray; + + foreach (var peptideFragmentIon in psm.BioPolymersWithSetModsToMatchingFragments[selectedPeptide]) + { + int startIndex = Array.BinarySearch(xArray, peptideFragmentIon.Mz - 75); + int endIndex = Array.BinarySearch(xArray, peptideFragmentIon.Mz + 75); + + // Ensure valid indices + startIndex = startIndex < 0 ? ~startIndex : startIndex; + endIndex = endIndex < 0 ? ~endIndex - 1 : endIndex; + + double sum = 0; + for (int i = startIndex; i <= endIndex; i++) + { + sum += yArray[i]; + } + sum -= peptideFragmentIon.Intensity; + double range = xArray[endIndex] - xArray[startIndex]; + if (range > 0) + { + sum /= range; + } + + xcorr += Math.Max(peptideFragmentIon.Intensity - sum, 0); + } + + return (float)xcorr; + } public static float? GetLog10Factorial(int n) { From 9e81ffa0cf924130972aeb9f13eb292d4c45bf4f Mon Sep 17 00:00:00 2001 From: trishorts Date: Wed, 13 Nov 2024 11:04:52 -0600 Subject: [PATCH 23/25] optimized for speed --- .../FdrAnalysis/PEPAnalysisEngine.cs | 81 ++++++++++++------- 1 file changed, 53 insertions(+), 28 deletions(-) diff --git a/MetaMorpheus/EngineLayer/FdrAnalysis/PEPAnalysisEngine.cs b/MetaMorpheus/EngineLayer/FdrAnalysis/PEPAnalysisEngine.cs index 6fb7e3d24..00d85d846 100644 --- a/MetaMorpheus/EngineLayer/FdrAnalysis/PEPAnalysisEngine.cs +++ b/MetaMorpheus/EngineLayer/FdrAnalysis/PEPAnalysisEngine.cs @@ -1071,36 +1071,31 @@ public static float GetAverageFragmentMassError(IEnumerable public static float GetFraggerHyperScore(SpectralMatch psm, IBioPolymerWithSetMods selectedPeptide) { var peptideFragmentIons = psm.BioPolymersWithSetModsToMatchingFragments[selectedPeptide]; - var nIons = peptideFragmentIons.Where(f => f.NeutralTheoreticalProduct.Terminus == FragmentationTerminus.N).ToList(); - var cIons = peptideFragmentIons.Where(f => f.NeutralTheoreticalProduct.Terminus == FragmentationTerminus.C).ToList(); float nIonIntensitySum = 0; - if(nIons.Any()) - { - nIonIntensitySum = (float)nIons.Sum(f => f.Intensity); - } float cIonIntensitySum = 0; - if (cIons.Any()) - { - cIonIntensitySum = (float)cIons.Sum(f => f.Intensity); - } - float matched_n_IonCountFactorial = 0; - if(nIons.Count > 0) - { - matched_n_IonCountFactorial = GetLog10Factorial((int)nIons.Count).Value; - } - float matched_c_IonCountFactorial = 0; - if (nIons.Count > 0) - { - matched_c_IonCountFactorial = GetLog10Factorial((int)cIons.Count).Value; - } + int nIonCount = 0; + int cIonCount = 0; - double log10IntensitySum = 0.1; - if(nIonIntensitySum > 0 && cIonIntensitySum > 0) + foreach (var ion in peptideFragmentIons) { - log10IntensitySum = Math.Log10(nIonIntensitySum * cIonIntensitySum); + if (ion.NeutralTheoreticalProduct.Terminus == FragmentationTerminus.N) + { + nIonIntensitySum += (float)ion.Intensity; + nIonCount++; + } + else if (ion.NeutralTheoreticalProduct.Terminus == FragmentationTerminus.C) + { + cIonIntensitySum += (float)ion.Intensity; + cIonCount++; + } } - return (float)((matched_n_IonCountFactorial + matched_c_IonCountFactorial + log10IntensitySum));; + float matched_n_IonCountFactorial = nIonCount > 0 ? GetLog10Factorial(nIonCount).Value : 0; + float matched_c_IonCountFactorial = cIonCount > 0 ? GetLog10Factorial(cIonCount).Value : 0; + + double log10IntensitySum = (nIonIntensitySum > 0 && cIonIntensitySum > 0) ? Math.Log10(nIonIntensitySum * cIonIntensitySum) : 0.1; + + return matched_n_IonCountFactorial + matched_c_IonCountFactorial + (float)log10IntensitySum; } /// /// https://willfondrie.com/2019/02/an-intuitive-look-at-the-xcorr-score-function-in-proteomics/ @@ -1122,8 +1117,9 @@ public static float Xcorr(SpectralMatch psm, IBioPolymerWithSetMods selectedPept double xcorr = 0; var xArray = psm.MsDataScan.MassSpectrum.XArray; var yArray = psm.MsDataScan.MassSpectrum.YArray; + var fragments = psm.BioPolymersWithSetModsToMatchingFragments[selectedPeptide]; - foreach (var peptideFragmentIon in psm.BioPolymersWithSetModsToMatchingFragments[selectedPeptide]) + foreach (var peptideFragmentIon in fragments) { int startIndex = Array.BinarySearch(xArray, peptideFragmentIon.Mz - 75); int endIndex = Array.BinarySearch(xArray, peptideFragmentIon.Mz + 75); @@ -1132,12 +1128,14 @@ public static float Xcorr(SpectralMatch psm, IBioPolymerWithSetMods selectedPept startIndex = startIndex < 0 ? ~startIndex : startIndex; endIndex = endIndex < 0 ? ~endIndex - 1 : endIndex; + // Sum yArray values between startIndex and endIndex double sum = 0; for (int i = startIndex; i <= endIndex; i++) { sum += yArray[i]; } - sum -= peptideFragmentIon.Intensity; + sum -= peptideFragmentIon.Intensity; // Subtract the intensity of the current ion + double range = xArray[endIndex] - xArray[startIndex]; if (range > 0) { @@ -1152,11 +1150,38 @@ public static float Xcorr(SpectralMatch psm, IBioPolymerWithSetMods selectedPept public static float? GetLog10Factorial(int n) { - double log10Factorial = 0.0; - for (int i = 1; i <= n; i++) + if (n < 0) + { + throw new ArgumentOutOfRangeException(nameof(n), "Input must be non-negative."); + } + + // Use a precomputed array for small values of n + double[] precomputedLog10Factorials = new double[] + { + 0.0, // 0! + 0.0, // 1! + 0.3010, // 2! + 0.7782, // 3! + 1.2553, // 4! + 1.7324, // 5! + 2.2095, // 6! + 2.6866, // 7! + 3.1637, // 8! + 3.6408, // 9! + 4.1179 // 10! + }; + + if (n < precomputedLog10Factorials.Length) + { + return (float)precomputedLog10Factorials[n]; + } + + double log10Factorial = precomputedLog10Factorials[precomputedLog10Factorials.Length - 1]; + for (int i = precomputedLog10Factorials.Length; i <= n; i++) { log10Factorial += Math.Log10(i); } + return (float)log10Factorial; } #endregion From 7de17ef0ce8cb232f9f1750ff5057c3c949b4689 Mon Sep 17 00:00:00 2001 From: trishorts Date: Wed, 13 Nov 2024 11:45:44 -0600 Subject: [PATCH 24/25] log 10 factorial tests --- MetaMorpheus/Test/FdrTest.cs | 41 ++++++++++++++++++++++++++++++++++-- 1 file changed, 39 insertions(+), 2 deletions(-) diff --git a/MetaMorpheus/Test/FdrTest.cs b/MetaMorpheus/Test/FdrTest.cs index 22e889f44..d04746b29 100644 --- a/MetaMorpheus/Test/FdrTest.cs +++ b/MetaMorpheus/Test/FdrTest.cs @@ -796,7 +796,7 @@ public static void GetLog10Factorial_ReturnsCorrectValue(int n, float? expected) float? result = PepAnalysisEngine.GetLog10Factorial(n); // Assert - Assert.AreEqual(expected, result); + Assert.That(expected, Is.EqualTo(result)); } [Test] @@ -837,7 +837,44 @@ public static void TestGetFraggerHyperScore() // Assert - Assert.AreEqual(7.112605f, hyperScore, 0.000001f); + Assert.That(7.112605f, Is.EqualTo(hyperScore).Within(0.000001f)); + } + + [Test] + public static void GetLog10Factorial_NegativeInput_ThrowsArgumentOutOfRangeException() + { + Assert.Throws(() => PepAnalysisEngine.GetLog10Factorial(-1)); + } + + [Test] + [TestCase(0, 0.0)] + [TestCase(1, 0.0)] + [TestCase(2, 0.3010)] + [TestCase(3, 0.7782)] + [TestCase(4, 1.2553)] + [TestCase(5, 1.7324)] + [TestCase(6, 2.2095)] + [TestCase(7, 2.6866)] + [TestCase(8, 3.1637)] + [TestCase(9, 3.6408)] + [TestCase(10, 4.1179)] + public static void GetLog10Factorial_PrecomputedValues_ReturnsExpectedResult(int n, double expected) + { + float? result = PepAnalysisEngine.GetLog10Factorial(n); + Assert.That((float)expected, Is.EqualTo(result)); + } + + [Test] + public static void GetLog10Factorial_LargeInput_ReturnsExpectedResult() + { + int n = 20; + float? result = PepAnalysisEngine.GetLog10Factorial(n); + double expected = 0.0; + for (int i = 1; i <= n; i++) + { + expected += Math.Log10(i); + } + Assert.That((float)expected, Is.EqualTo(result).Within(4)); // Allowing a small tolerance for floating-point comparison } } } \ No newline at end of file From 7e17c0e838e5751e480fb9222e4e03523f674e0a Mon Sep 17 00:00:00 2001 From: trishorts Date: Wed, 13 Nov 2024 13:46:26 -0600 Subject: [PATCH 25/25] xcorr unit tests --- MetaMorpheus/Test/FdrTest.cs | 4 +- MetaMorpheus/Test/PepAnalysisEngineTests.cs | 114 ++++++++++++++++++++ 2 files changed, 116 insertions(+), 2 deletions(-) create mode 100644 MetaMorpheus/Test/PepAnalysisEngineTests.cs diff --git a/MetaMorpheus/Test/FdrTest.cs b/MetaMorpheus/Test/FdrTest.cs index d04746b29..7454db9ee 100644 --- a/MetaMorpheus/Test/FdrTest.cs +++ b/MetaMorpheus/Test/FdrTest.cs @@ -62,9 +62,9 @@ public static void FdrTestMethod() TestDataFile t = new TestDataFile(new List { pep1, pep2, pep3 }); - MsDataScan mzLibScan1 = t.GetOneBasedScan(2); + MassSpectrometry.MsDataScan mzLibScan1 = t.GetOneBasedScan(2); Ms2ScanWithSpecificMass scan1 = new Ms2ScanWithSpecificMass(mzLibScan1, pep1.MonoisotopicMass.ToMz(1), 1, null, new CommonParameters()); - SpectralMatch psm1 = new PeptideSpectralMatch(pep1, 0, 3, 0, scan1, commonParameters, new List()); + EngineLayer.SpectralMatch psm1 = new PeptideSpectralMatch(pep1, 0, 3, 0, scan1, commonParameters, new List()); MsDataScan mzLibScan2 = t.GetOneBasedScan(4); Ms2ScanWithSpecificMass scan2 = new Ms2ScanWithSpecificMass(mzLibScan2, pep2.MonoisotopicMass.ToMz(1), 1, null, new CommonParameters()); diff --git a/MetaMorpheus/Test/PepAnalysisEngineTests.cs b/MetaMorpheus/Test/PepAnalysisEngineTests.cs new file mode 100644 index 000000000..609d5e3f1 --- /dev/null +++ b/MetaMorpheus/Test/PepAnalysisEngineTests.cs @@ -0,0 +1,114 @@ +using EngineLayer; +using MassSpectrometry; +using System.Collections.Generic; +using NUnit.Framework; +using Omics; +using Omics.Fragmentation; +using ThermoFisher.CommonCore.Data.Business; +using System.Linq; +using MzLibUtil; +using Proteomics.ProteolyticDigestion; +using Omics.Modifications; +using Omics.Digestion; +using Proteomics; + +namespace Test +{ + public class PepAnalysisEngineTests + { + [Test] + public void Xcorr_ValidInput_ReturnsExpectedResult() + { + // Arrange + var xArray = new double[] { 100, 150, 200, 250, 300 }; + var yArray = new double[] { 10, 20, 30, 40, 50 }; + + var fragments = new List + { + new MatchedFragmentIon(new Product(ProductType.b, Omics.Fragmentation.FragmentationTerminus.N, 150, 1,1,0), 150, 20, 1), + new MatchedFragmentIon(new Product(ProductType.b, Omics.Fragmentation.FragmentationTerminus.N, 250, 1,1,0), 250, 40, 1), + }; + + var psm = CreateSpectralMatch(xArray, yArray, [150, 250], [20, 40], fragments); + + var selectedPeptide = psm.BestMatchingBioPolymersWithSetMods.First().Peptide; + + // Act + float result = EngineLayer.PepAnalysisEngine.Xcorr(psm, selectedPeptide); + + // Assert + Assert.That(58.8, Is.EqualTo(result).Within(1)); // Allowing a small tolerance for floating-point comparison + } + + [Test] + public void Xcorr_EmptyFragments_ReturnsZero() + { + // Arrange + var xArray = new double[] { 100, 150, 200, 250, 300 }; + var yArray = new double[] { 10, 20, 30, 40, 50 }; + + var fragments = new List + { + }; + + var psm = CreateSpectralMatch(xArray, yArray, new double[0], new double[0], fragments); + + var selectedPeptide = psm.BestMatchingBioPolymersWithSetMods.First().Peptide; + + // Act + float result = EngineLayer.PepAnalysisEngine.Xcorr(psm, selectedPeptide); + + // Assert + Assert.That(0, Is.EqualTo(result)); + } + + private SpectralMatch CreateSpectralMatch(double[] xArray, double[] yArray, double[] fragmentMz, double[] fragmentIntensity, List matchedFragmentIons) + { + PeptideWithSetModifications pwsm = new PeptideWithSetModifications(new Protein("PEPTIDE", "ACCESSION", "ORGANISM"), new DigestionParams(), 1, 2, CleavageSpecificity.Full, "", 0, new Dictionary(), 0); + int notch = 0; + double score = 0; + int scanIndex = 1; + Ms2ScanWithSpecificMass scan = CreateMs2ScanWithSpecificMass(xArray, yArray); + CommonParameters commonParameters = new CommonParameters(); + + return new PeptideSpectralMatch(pwsm, notch, score, scanIndex, scan, commonParameters, matchedFragmentIons); + } + + private Ms2ScanWithSpecificMass CreateMs2ScanWithSpecificMass(double[] xArray, double[] yArray) + { + MsDataScan scan = CreateMsDataScan(xArray, yArray); + double precursorMonoisotopicPeakMz = 1; + int precursorCharge = 1; + string fullFilePath = ""; + CommonParameters commonParam = new CommonParameters(); + + return new Ms2ScanWithSpecificMass(scan, precursorMonoisotopicPeakMz, precursorCharge, fullFilePath, commonParam); + } + private MsDataScan CreateMsDataScan(double[] xArray, double[] yArray) + { + MzSpectrum massSpectrum = CreateMzSpectrum(xArray, yArray); + int oneBasedScanNumber = 1; + int msnOrder = 1; + bool isCentroid = true; + MassSpectrometry.Polarity polarity = MassSpectrometry.Polarity.Positive; + double retentionTime = 1.0; + MzRange scanWindowRange = new MzRange(1,500); + string scanFilter = ""; + MZAnalyzerType mzAnalyzer = MZAnalyzerType.Orbitrap; + double totalIonCurrent = 1.0; + double? injectionTime = 1.0; + double[,] noiseData = new double[1,1]; + string nativeId = ""; + + return new MsDataScan(massSpectrum, oneBasedScanNumber, msnOrder, isCentroid, polarity, retentionTime, scanWindowRange, scanFilter, mzAnalyzer, totalIonCurrent, injectionTime, noiseData, nativeId); + + } + private MzSpectrum CreateMzSpectrum(double[] xArray, double[] yArray) + { + double[] mz = xArray; + double[] intensities = yArray; + bool shouldCopy = true; + return new MzSpectrum(mz, intensities, shouldCopy); + } + } +}