Skip to content

Commit

Permalink
Changes to PEP Calculation and Filtering (#2387)
Browse files Browse the repository at this point in the history
* new style computation of pep q-value

* fixed unit tests

* separate PsmFdrInfo and PeptideFdrInfo calculations in FdrAnalysisEngine

* d

* fdh

* not working yet

* maybe better maybe not

* huh

* tr

* j

* 53

* Fixed filtering kinda

* commit before i start breaking things

* still sorta broken

* idk

* Fixed most issues, moved filtering to MetaMorpheus Task

* fix multiprotease unit test

* fix MakeSureFdrDoesntSkip

* fix TestPeptideCount

* new postsearchanalysistask results generator

* fixed results output in postsearchanalysistask

* yert

* fix pep q-value calc

* fix peptideFdrTest

* fix spectral recovery

* ity

* fix semi specific test

* fix metadraw test

* lkah

* poiu

* slice test fixed

* some tests

* some testst

* fixed most of silac unit tests

* hmm

* dsg

* uio

* kjg

* ghk

* Fixed the few remaining tests that were breaking

* Five tests breaking, mostly numbers

* Fixed results.txt writer for PEP-Q-values.

* Fixed output bug

* idk

* broken

* Finally fixed!!!

* Added QValueThresholdForPEP to common params

* Addressed Nic's comments

* Fixed tests that broke when addressing Nic's comments

* Made fields in FilteredPsms more explicit

* added comments - MRS

* More comments, better ordering

---------

Co-authored-by: Michael Shortreed <mshort@chem.wisc.edu>
  • Loading branch information
Alexander-Sol and trishorts authored Aug 5, 2024
1 parent 7fd6aee commit 4e95528
Show file tree
Hide file tree
Showing 23 changed files with 963 additions and 667 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ public ClassicSearchEngine(SpectralMatch[] globalPsms, Ms2ScanWithSpecificMass[]

protected override MetaMorpheusEngineResults RunSpecific()
{
Status("Getting ms2 scans...");
Status("Getting ms2 scans...");

double proteinsSearched = 0;
int oldPercentProgress = 0;
Expand Down
8 changes: 8 additions & 0 deletions MetaMorpheus/EngineLayer/CommonParameters.cs
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ public CommonParameters(
int totalPartitions = 1,
double qValueThreshold = 0.01,
double pepQValueThreshold = 1.0,
double qValueCutoffForPepCalculation = 0.005,
double scoreCutoff = 5,
int? numberOfPeaksToKeepPerWindow = 200,
double? minimumAllowedIntensityRatioToBasePeak = 0.01,
Expand Down Expand Up @@ -67,6 +68,7 @@ public CommonParameters(
TotalPartitions = totalPartitions;
QValueThreshold = qValueThreshold;
PepQValueThreshold = pepQValueThreshold;
QValueCutoffForPepCalculation = qValueCutoffForPepCalculation;
ScoreCutoff = scoreCutoff;
NumberOfPeaksToKeepPerWindow = numberOfPeaksToKeepPerWindow;
MinimumAllowedIntensityRatioToBasePeak = minimumAllowedIntensityRatioToBasePeak;
Expand Down Expand Up @@ -157,6 +159,11 @@ public int DeconvolutionMaxAssumedChargeState
/// </summary>
public double PepQValueThreshold { get; private set; }
public double ScoreCutoff { get; private set; }
/// <summary>
/// This parameter determines which PSMs/Peptides will be used as postive training examples
/// when training the GBDT model for PEP.
/// </summary>
public double QValueCutoffForPepCalculation { get; private set; }
public DigestionParams DigestionParams { get; private set; }
public bool ReportAllAmbiguity { get; private set; }
public int? NumberOfPeaksToKeepPerWindow { get; private set; }
Expand Down Expand Up @@ -225,6 +232,7 @@ public CommonParameters CloneWithNewTerminus(FragmentationTerminus? terminus = n
TotalPartitions,
QValueThreshold,
PepQValueThreshold,
QValueCutoffForPepCalculation,
ScoreCutoff,
NumberOfPeaksToKeepPerWindow,
MinimumAllowedIntensityRatioToBasePeak,
Expand Down
254 changes: 178 additions & 76 deletions MetaMorpheus/EngineLayer/FdrAnalysis/FdrAnalysisEngine.cs

Large diffs are not rendered by default.

15 changes: 11 additions & 4 deletions MetaMorpheus/EngineLayer/FdrAnalysis/PEPValueAnalysisGeneric.cs
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,9 @@ public static class PEP_Analysis_Cross_Validation
/// A dictionary which stores the chimeric ID string in the key and the number of chimeric identifications as the vale
/// </summary>
private static Dictionary<string, int> chimeraCountDictionary = new Dictionary<string, int>();

public static bool UsePeptideLevelQValueForTraining = true;
public static double QValueCutoff = 0.005;


/// <summary>
/// This method is used to compute the PEP values for all PSMs in a dataset.
Expand All @@ -52,6 +54,8 @@ public static string ComputePEPValuesForAllPSMsGeneric(List<SpectralMatch> psms,
.Select(b => b.FirstOrDefault()).ToList();
List<int> countOfPeptidesInEachFile = peptides.GroupBy(b => b.FullFilePath).Select(b => b.Count()).ToList();
bool allFilesContainPeptides = (countOfPeptidesInEachFile.Count == fileSpecificParameters.Count); //rare condition where each file has psms but some files don't have peptides. probably only happens in unit tests.
UsePeptideLevelQValueForTraining = true;
QValueCutoff = fileSpecificParameters.Select(t => t.fileSpecificParameters.QValueCutoffForPepCalculation).Min();

int chargeStateMode = 0;
Dictionary<string, float> fileSpecificMedianFragmentMassErrors = new Dictionary<string, float>();
Expand All @@ -67,10 +71,12 @@ public static string ComputePEPValuesForAllPSMsGeneric(List<SpectralMatch> psms,
else
{
//there are too few psms to do any meaningful training if we used only peptides. So, we will train using psms instead.
UsePeptideLevelQValueForTraining = false;
allPeptideIndices = Enumerable.Range(0, psms.Count).ToList();
chargeStateMode = GetChargeStateMode(psms);
fileSpecificMedianFragmentMassErrors = GetFileSpecificMedianFragmentMassError(psms);
}


//These two dictionaries contain the average and standard deviations of hydrophobicitys measured in 1 minute increments accross each raw
//file separately. An individully measured hydrobophicty calculated for a specific PSM sequence is compared to these values by computing
Expand Down Expand Up @@ -378,7 +384,8 @@ public static void RemoveBestMatchingPeptidesWithLowPEP(SpectralMatch psm, List<
psm.RemoveThisAmbiguousPeptide(notches[i], pwsmList[i]);
ambiguousPeptidesRemovedCount++;
}
psm.FdrInfo.PEP = 1 - pepValuePredictions.Max();
psm.PsmFdrInfo.PEP = 1 - pepValuePredictions.Max();
psm.PeptideFdrInfo.PEP = 1 - pepValuePredictions.Max();
}

/// <summary>
Expand Down Expand Up @@ -712,7 +719,7 @@ public static IEnumerable<PsmData> CreatePsmData(string searchType, List<(string
label = false;
newPsmData = CreateOnePsmDataEntry(searchType, fileSpecificParameters, psm, timeDependantHydrophobicityAverageAndDeviation_unmodified, timeDependantHydrophobicityAverageAndDeviation_modified, fileSpecificMedianFragmentMassErrors, chargeStateMode, csm.BestMatchingBioPolymersWithSetMods.First().Peptide, 0, label);
}
else if (!csm.IsDecoy && !csm.BetaPeptide.IsDecoy && psm.FdrInfo.QValue <= 0.005)
else if (!csm.IsDecoy && !csm.BetaPeptide.IsDecoy && psm.GetFdrInfo(UsePeptideLevelQValueForTraining).QValue <= QValueCutoff)
{
label = true;
newPsmData = CreateOnePsmDataEntry(searchType, fileSpecificParameters, psm, timeDependantHydrophobicityAverageAndDeviation_unmodified, timeDependantHydrophobicityAverageAndDeviation_modified, fileSpecificMedianFragmentMassErrors, chargeStateMode, csm.BestMatchingBioPolymersWithSetMods.First().Peptide, 0, label);
Expand All @@ -736,7 +743,7 @@ public static IEnumerable<PsmData> CreatePsmData(string searchType, List<(string
label = false;
newPsmData = CreateOnePsmDataEntry(searchType, fileSpecificParameters, psm, timeDependantHydrophobicityAverageAndDeviation_unmodified, timeDependantHydrophobicityAverageAndDeviation_modified, fileSpecificMedianFragmentMassErrors, chargeStateMode, peptideWithSetMods, notch, label);
}
else if (!peptideWithSetMods.Parent.IsDecoy && psm.FdrInfo.QValue <= 0.005)
else if (!peptideWithSetMods.Parent.IsDecoy && psm.GetFdrInfo(UsePeptideLevelQValueForTraining).QValue <= QValueCutoff)
{
label = true;
newPsmData = CreateOnePsmDataEntry(searchType, fileSpecificParameters, psm, timeDependantHydrophobicityAverageAndDeviation_unmodified, timeDependantHydrophobicityAverageAndDeviation_modified, fileSpecificMedianFragmentMassErrors, chargeStateMode, peptideWithSetMods, notch, label);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -475,7 +475,7 @@ public static List<SpectralMatch> ResolveFdrCategorySpecificPsms(List<SpectralMa
{
if (AllPsms[i] != null)
{
ranking[i] = AllPsms[i].Where(x => x != null).Count(x => x.FdrInfo.QValue <= 0.01); //set ranking as number of psms above 1% FDR
ranking[i] = AllPsms[i].Where(x => x != null).Count(x => x.PsmFdrInfo.QValue <= 0.01); //set ranking as number of psms above 1% FDR
indexesOfInterest.Add(i);
}
}
Expand Down Expand Up @@ -515,9 +515,9 @@ public static List<SpectralMatch> ResolveFdrCategorySpecificPsms(List<SpectralMa
}
else
{
if (majorPsm.FdrInfo.QValue > minorPsm.FdrInfo.QValue)
if (majorPsm.PsmFdrInfo.QValue > minorPsm.PsmFdrInfo.QValue)
{
minorPsm.FdrInfo.QValue = majorPsm.FdrInfo.QValue;
minorPsm.PsmFdrInfo.QValue = majorPsm.PsmFdrInfo.QValue;
}
minorPsmIndex++;
}
Expand All @@ -527,9 +527,9 @@ public static List<SpectralMatch> ResolveFdrCategorySpecificPsms(List<SpectralMa
{
SpectralMatch majorPsm = majorCategoryPsms[majorPsmIndex - 1]; //-1 because it's out of index right now
SpectralMatch minorPsm = minorCategoryPsms[minorPsmIndex];
if (majorPsm.FdrInfo.QValue > minorPsm.FdrInfo.QValue)
if (majorPsm.PsmFdrInfo.QValue > minorPsm.PsmFdrInfo.QValue)
{
minorPsm.FdrInfo.QValue = majorPsm.FdrInfo.QValue;
minorPsm.PsmFdrInfo.QValue = majorPsm.PsmFdrInfo.QValue;
}
minorPsmIndex++;
}
Expand All @@ -548,7 +548,7 @@ public static List<SpectralMatch> ResolveFdrCategorySpecificPsms(List<SpectralMa
SpectralMatch currentPsm = AllPsms[index][i];
if (currentPsm != null)
{
double currentQValue = currentPsm.FdrInfo.QValue;
double currentQValue = currentPsm.PsmFdrInfo.QValue;
if (currentQValue < lowestQ //if the new one is better
|| (currentQValue == lowestQ && currentPsm.Score > bestPsm.Score))
{
Expand Down Expand Up @@ -587,7 +587,7 @@ public static List<SpectralMatch> ResolveFdrCategorySpecificPsms(List<SpectralMa
}
}

return bestPsmsList.OrderBy(b => b.FdrInfo.QValue).ThenByDescending(b => b.Score).ToList();
return bestPsmsList.OrderBy(b => b.PsmFdrInfo.QValue).ThenByDescending(b => b.Score).ToList();
}

public static List<Modification> GetVariableTerminalMods(FragmentationTerminus fragmentationTerminus, List<Modification> variableModifications)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,11 +41,11 @@ public ProteinParsimonyEngine(List<SpectralMatch> allPsms, bool modPeptidesAreDi
// KEEP contaminants for use in parsimony!
if (modPeptidesAreDifferent)
{
_fdrFilteredPsms = allPsms.Where(p => p.FullSequence != null && p.FdrInfo.QValue <= FdrCutoffForParsimony && p.FdrInfo.QValueNotch <= FdrCutoffForParsimony).ToList();
_fdrFilteredPsms = allPsms.Where(p => p.FullSequence != null && p.PsmFdrInfo.QValue <= FdrCutoffForParsimony && p.PsmFdrInfo.QValueNotch <= FdrCutoffForParsimony).ToList();
}
else
{
_fdrFilteredPsms = allPsms.Where(p => p.BaseSequence != null && p.FdrInfo.QValue <= FdrCutoffForParsimony && p.FdrInfo.QValueNotch <= FdrCutoffForParsimony).ToList();
_fdrFilteredPsms = allPsms.Where(p => p.BaseSequence != null && p.PsmFdrInfo.QValue <= FdrCutoffForParsimony && p.PsmFdrInfo.QValueNotch <= FdrCutoffForParsimony).ToList();
}

// peptides to use in parsimony = peptides observed in high-confidence PSMs (including decoys)
Expand Down
21 changes: 11 additions & 10 deletions MetaMorpheus/EngineLayer/PsmTsv/PsmTsvWriter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -316,7 +316,7 @@ internal static void AddMatchedIonsData(Dictionary<string, string> s, List<Match
s[PsmTsvHeader.MatchedIonCounts] = nullPsm ? " " : matchedIons.Count.ToString();
}

internal static void AddMatchScoreData(Dictionary<string, string> s, SpectralMatch peptide)
internal static void AddMatchScoreData(Dictionary<string, string> s, SpectralMatch peptide, bool writePeptideLevelFdr = false)
{
string spectralAngle = peptide == null ? " " : peptide.SpectralAngle.ToString("F4");
string localizedScores = " ";
Expand All @@ -339,17 +339,18 @@ internal static void AddMatchScoreData(Dictionary<string, string> s, SpectralMat
string PEP = " ";
string PEP_Qvalue = " ";

if (peptide != null && peptide.FdrInfo != null)
if (peptide != null && peptide.GetFdrInfo(writePeptideLevelFdr) != null)
{
cumulativeTarget = peptide.FdrInfo.CumulativeTarget.ToString(CultureInfo.InvariantCulture);
cumulativeDecoy = peptide.FdrInfo.CumulativeDecoy.ToString(CultureInfo.InvariantCulture);
qValue = peptide.FdrInfo.QValue.ToString("F6", CultureInfo.InvariantCulture);
cumulativeTargetNotch = peptide.FdrInfo.CumulativeTargetNotch.ToString(CultureInfo.InvariantCulture);
cumulativeDecoyNotch = peptide.FdrInfo.CumulativeDecoyNotch.ToString(CultureInfo.InvariantCulture);
qValueNotch = peptide.FdrInfo.QValueNotch.ToString("F6", CultureInfo.InvariantCulture);
PEP = peptide.FdrInfo.PEP.ToString();
PEP_Qvalue = peptide.FdrInfo.PEP_QValue.ToString();
cumulativeTarget = peptide.GetFdrInfo(writePeptideLevelFdr).CumulativeTarget.ToString(CultureInfo.InvariantCulture);
cumulativeDecoy = peptide.GetFdrInfo(writePeptideLevelFdr).CumulativeDecoy.ToString(CultureInfo.InvariantCulture);
qValue = peptide.GetFdrInfo(writePeptideLevelFdr).QValue.ToString("F6", CultureInfo.InvariantCulture);
cumulativeTargetNotch = peptide.GetFdrInfo(writePeptideLevelFdr).CumulativeTargetNotch.ToString(CultureInfo.InvariantCulture);
cumulativeDecoyNotch = peptide.GetFdrInfo(writePeptideLevelFdr).CumulativeDecoyNotch.ToString(CultureInfo.InvariantCulture);
qValueNotch = peptide.GetFdrInfo(writePeptideLevelFdr).QValueNotch.ToString("F6", CultureInfo.InvariantCulture);
PEP = peptide.GetFdrInfo(writePeptideLevelFdr).PEP.ToString();
PEP_Qvalue = peptide.GetFdrInfo(writePeptideLevelFdr).PEP_QValue.ToString();
}

s[PsmTsvHeader.CumulativeTarget] = cumulativeTarget;
s[PsmTsvHeader.CumulativeDecoy] = cumulativeDecoy;
s[PsmTsvHeader.QValue] = qValue;
Expand Down
23 changes: 18 additions & 5 deletions MetaMorpheus/EngineLayer/SpectralMatch.cs
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,20 @@ protected SpectralMatch(IBioPolymerWithSetMods peptide, int notch, double score,
public string FullFilePath { get; private set; }
public int ScanIndex { get; }
public int NumDifferentMatchingPeptides { get { return _BestMatchingBioPolymersWithSetMods.Count; } }
public FdrInfo FdrInfo { get; private set; }

public FdrInfo FdrInfo
{
get => PsmFdrInfo;
set => PsmFdrInfo = value;

}
public FdrInfo PsmFdrInfo { get; set; }
public FdrInfo PeptideFdrInfo { get; set; }
public FdrInfo GetFdrInfo(bool peptideLevel)
{
return peptideLevel ? PeptideFdrInfo : PsmFdrInfo;
}

public PsmData PsmData_forPEPandPercolator { get; set; }

public double Score { get; private set; }
Expand Down Expand Up @@ -266,18 +279,18 @@ public override string ToString()
return ToString(new Dictionary<string, int>());
}

public string ToString(IReadOnlyDictionary<string, int> ModstoWritePruned)
public string ToString(IReadOnlyDictionary<string, int> ModstoWritePruned, bool writePeptideLevelFdr = false)
{
return string.Join("\t", DataDictionary(this, ModstoWritePruned).Values);
return string.Join("\t", DataDictionary(this, ModstoWritePruned, writePeptideLevelFdr).Values);
}

public static Dictionary<string, string> DataDictionary(SpectralMatch psm, IReadOnlyDictionary<string, int> ModsToWritePruned)
public static Dictionary<string, string> DataDictionary(SpectralMatch psm, IReadOnlyDictionary<string, int> ModsToWritePruned, bool writePeptideLevelFdr = false)
{
Dictionary<string, string> s = new Dictionary<string, string>();
PsmTsvWriter.AddBasicMatchData(s, psm);
PsmTsvWriter.AddPeptideSequenceData(s, psm, ModsToWritePruned);
PsmTsvWriter.AddMatchedIonsData(s, psm?.MatchedFragmentIons);
PsmTsvWriter.AddMatchScoreData(s, psm);
PsmTsvWriter.AddMatchScoreData(s, psm, writePeptideLevelFdr);
return s;
}

Expand Down
Loading

0 comments on commit 4e95528

Please sign in to comment.