diff --git a/.github/workflows/dotnet.yml b/.github/workflows/dotnet.yml
index f89904dc3..dca2c6509 100644
--- a/.github/workflows/dotnet.yml
+++ b/.github/workflows/dotnet.yml
@@ -38,3 +38,65 @@ jobs:
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
verbose: true
files: mzLib/Test*/TestResults/*/coverage.cobertura.xml
+ integration:
+ runs-on: windows-latest
+ timeout-minutes: 15
+ steps:
+ - uses: actions/checkout@v2
+ - name: Set up .NET
+ uses: actions/setup-dotnet@v1
+ with:
+ dotnet-version: 8.0.x
+ - name: Restore dependencies
+ run: cd mzLib && dotnet restore
+ - name: Build
+ run: cd mzLib && dotnet build --no-restore --configuration Release
+ - name: Change mzLib version, pack, add source
+ run: |
+ cd mzLib;
+ (Get-Content mzLib.nuspec) -replace "\(.*)\", "9.9.9" | Set-Content mzLib.nuspec;
+ $mzlibMatch = Select-String -Path mzLib.nuspec -Pattern "(?<=\)(.*)(?=\ RoundedDouble(myNumber as double?, places);
public static double? RoundedDouble(this double? myNumber, int places = 9)
{
if (myNumber != null)
diff --git a/mzLib/Development/Deconvolution/StandardDeconvolutionTest.cs b/mzLib/Development/DeconvolutionDevelopment/StandardDeconvolutionTest.cs
similarity index 56%
rename from mzLib/Development/Deconvolution/StandardDeconvolutionTest.cs
rename to mzLib/Development/DeconvolutionDevelopment/StandardDeconvolutionTest.cs
index ea02b6dd4..002e0f9b5 100644
--- a/mzLib/Development/Deconvolution/StandardDeconvolutionTest.cs
+++ b/mzLib/Development/DeconvolutionDevelopment/StandardDeconvolutionTest.cs
@@ -38,117 +38,152 @@ static StandardDeconvolutionTest()
Loaders.LoadElements();
// define paths to spectra
- var ubiquitinPath = Path.Combine(TestContext.CurrentContext.TestDirectory, "Deconvolution", "TestData",
+ var ubiquitinPath = Path.Combine(TestContext.CurrentContext.TestDirectory, "DeconvolutionDevelopment", "TestData",
"Averaged_221110_UbiqOnly.mzML");
- var hghPath = Path.Combine(TestContext.CurrentContext.TestDirectory, "Deconvolution", "TestData",
+ var hghPath = Path.Combine(TestContext.CurrentContext.TestDirectory, "DeconvolutionDevelopment", "TestData",
"Averaged_221110_HGHOnly.mzML");
- var cytoPath = Path.Combine(TestContext.CurrentContext.TestDirectory, "Deconvolution", "TestData",
+ var cytoPath = Path.Combine(TestContext.CurrentContext.TestDirectory, "DeconvolutionDevelopment", "TestData",
"Averaged_221110_CytoOnly.mzML");
// set up deconvoluters to be utilized by test cases
- DeconvolutionParameters classicTopDownDeconvolutionParams = new ClassicDeconvolutionParameters(1, 60, 4, 3);
- DeconvolutionParameters classicBottomUpDeconvolutionParams = new ClassicDeconvolutionParameters(1, 12, 4, 3);
+ List topDownDeconvolutionParametersToTest =
+ [
+ new ClassicDeconvolutionParameters(1, 60, 4, 3),
+ new IsoDecDeconvolutionParameters()
+ ];
- // Add Individual peak test cases
- List singlePeakDeconvolutionTestCases = new()
+ List bottomUpDeconvolutionParametersToTest =
+ [
+ new ClassicDeconvolutionParameters(1, 12, 4, 3),
+ new IsoDecDeconvolutionParameters()
+ ];
+
+
+
+ // Add Individual peak test cases for top down
+ List singlePeakDeconvolutionTestCases = new();
+ foreach (var deconParams in topDownDeconvolutionParametersToTest)
{
// uniquitin, direct injection
- new SinglePeakDeconvolutionTestCase(classicTopDownDeconvolutionParams, "Direct Injection PolyUbiquitin, Averaged",
- ubiquitinPath, 1, 10038.4, 8, 1254.8, 20),
- new SinglePeakDeconvolutionTestCase(classicTopDownDeconvolutionParams, "Direct Injection PolyUbiquitin, Averaged",
- ubiquitinPath, 1, 10039.41, 9, 1115.49, 20),
- new SinglePeakDeconvolutionTestCase(classicTopDownDeconvolutionParams, "Direct Injection PolyUbiquitin, Averaged",
- ubiquitinPath, 1, 10041.4, 10, 1004.14, 20),
- new SinglePeakDeconvolutionTestCase(classicTopDownDeconvolutionParams, "Direct Injection PolyUbiquitin, Averaged",
- ubiquitinPath, 1, 10041.46, 11, 912.86, 20),
- new SinglePeakDeconvolutionTestCase(classicTopDownDeconvolutionParams, "Direct Injection PolyUbiquitin, Averaged",
- ubiquitinPath, 1, 10043.4, 12, 836.95, 20),
- new SinglePeakDeconvolutionTestCase(classicTopDownDeconvolutionParams, "Direct Injection PolyUbiquitin, Averaged",
- ubiquitinPath, 1, 10043.41, 13, 772.57, 20),
- new SinglePeakDeconvolutionTestCase(classicTopDownDeconvolutionParams, "Direct Injection PolyUbiquitin, Averaged",
- ubiquitinPath, 1, 10044.44, 14, 717.46, 20),
- new SinglePeakDeconvolutionTestCase(classicTopDownDeconvolutionParams, "Direct Injection PolyUbiquitin, Averaged",
- ubiquitinPath, 1, 10045.5, 15, 669.70, 20),
- new SinglePeakDeconvolutionTestCase(classicTopDownDeconvolutionParams, "Direct Injection PolyUbiquitin, Averaged",
- ubiquitinPath, 1, 10045.44, 16, 627.84, 20),
+ singlePeakDeconvolutionTestCases.Add(new SinglePeakDeconvolutionTestCase(deconParams,
+ "Direct Injection PolyUbiquitin, Averaged",
+ ubiquitinPath, 1, 10038.4, 8, 1254.8, 20));
+ singlePeakDeconvolutionTestCases.Add(new SinglePeakDeconvolutionTestCase(deconParams,
+ "Direct Injection PolyUbiquitin, Averaged",
+ ubiquitinPath, 1, 10039.41, 9, 1115.49, 20));
+ singlePeakDeconvolutionTestCases.Add(new SinglePeakDeconvolutionTestCase(deconParams,
+ "Direct Injection PolyUbiquitin, Averaged",
+ ubiquitinPath, 1, 10041.4, 10, 1004.14, 20));
+ singlePeakDeconvolutionTestCases.Add(new SinglePeakDeconvolutionTestCase(deconParams,
+ "Direct Injection PolyUbiquitin, Averaged",
+ ubiquitinPath, 1, 10041.46, 11, 912.86, 20));
+ singlePeakDeconvolutionTestCases.Add(new SinglePeakDeconvolutionTestCase(deconParams,
+ "Direct Injection PolyUbiquitin, Averaged",
+ ubiquitinPath, 1, 10043.4, 12, 836.95, 20));
+ singlePeakDeconvolutionTestCases.Add(new SinglePeakDeconvolutionTestCase(deconParams,
+ "Direct Injection PolyUbiquitin, Averaged",
+ ubiquitinPath, 1, 10043.41, 13, 772.57, 20));
+ singlePeakDeconvolutionTestCases.Add(new SinglePeakDeconvolutionTestCase(deconParams,
+ "Direct Injection PolyUbiquitin, Averaged",
+ ubiquitinPath, 1, 10044.44, 14, 717.46, 20));
+ singlePeakDeconvolutionTestCases.Add(new SinglePeakDeconvolutionTestCase(deconParams,
+ "Direct Injection PolyUbiquitin, Averaged",
+ ubiquitinPath, 1, 10045.5, 15, 669.70, 20));
+ singlePeakDeconvolutionTestCases.Add(new SinglePeakDeconvolutionTestCase(deconParams,
+ "Direct Injection PolyUbiquitin, Averaged",
+ ubiquitinPath, 1, 10045.44, 16, 627.84, 20));
// hgh, direct injection
- new SinglePeakDeconvolutionTestCase(classicTopDownDeconvolutionParams,
+ singlePeakDeconvolutionTestCases.Add(new SinglePeakDeconvolutionTestCase(deconParams,
"Direct Injection Human Growth Hormone, Averaged",
- hghPath, 1, 22139.41, 11, 2012.29, 20),
- new SinglePeakDeconvolutionTestCase(classicTopDownDeconvolutionParams,
+ hghPath, 1, 22139.41, 11, 2012.29, 20));
+ singlePeakDeconvolutionTestCases.Add(new SinglePeakDeconvolutionTestCase(deconParams,
"Direct Injection Human Growth Hormone, Averaged",
- hghPath, 1, 22136.28, 12, 1844.69, 20),
- new SinglePeakDeconvolutionTestCase(classicTopDownDeconvolutionParams,
+ hghPath, 1, 22136.28, 12, 1844.69, 20));
+ singlePeakDeconvolutionTestCases.Add(new SinglePeakDeconvolutionTestCase(deconParams,
"Direct Injection Human Growth Hormone, Averaged",
- hghPath, 1, 22137.31, 13, 1702.87, 20),
- new SinglePeakDeconvolutionTestCase(classicTopDownDeconvolutionParams,
+ hghPath, 1, 22137.31, 13, 1702.87, 20));
+ singlePeakDeconvolutionTestCases.Add(new SinglePeakDeconvolutionTestCase(deconParams,
"Direct Injection Human Growth Hormone, Averaged",
- hghPath, 1, 22139.32, 14, 1581.38, 20),
- new SinglePeakDeconvolutionTestCase(classicTopDownDeconvolutionParams,
+ hghPath, 1, 22139.32, 14, 1581.38, 20));
+ singlePeakDeconvolutionTestCases.Add(new SinglePeakDeconvolutionTestCase(deconParams,
"Direct Injection Human Growth Hormone, Averaged",
- hghPath, 1, 22139.25, 15, 1475.95, 20),
- new SinglePeakDeconvolutionTestCase(classicTopDownDeconvolutionParams,
+ hghPath, 1, 22139.25, 15, 1475.95, 20));
+ singlePeakDeconvolutionTestCases.Add(new SinglePeakDeconvolutionTestCase(deconParams,
"Direct Injectio Human Growth Hormone, Averaged",
- hghPath, 1, 22140.32, 16, 1383.77, 20),
- new SinglePeakDeconvolutionTestCase(classicTopDownDeconvolutionParams,
+ hghPath, 1, 22140.32, 16, 1383.77, 20));
+ singlePeakDeconvolutionTestCases.Add(new SinglePeakDeconvolutionTestCase(deconParams,
"Direct Injection Human Growth Hormone, Averaged",
- hghPath, 1, 22141.31, 17, 1302.43, 20),
- new SinglePeakDeconvolutionTestCase(classicTopDownDeconvolutionParams,
+ hghPath, 1, 22141.31, 17, 1302.43, 20));
+ singlePeakDeconvolutionTestCases.Add(new SinglePeakDeconvolutionTestCase(deconParams,
"Direct Injection Human Growth Hormone, Averaged",
- hghPath, 1, 22142.34, 18, 1230.13, 20),
- new SinglePeakDeconvolutionTestCase(classicTopDownDeconvolutionParams,
+ hghPath, 1, 22142.34, 18, 1230.13, 20));
+ singlePeakDeconvolutionTestCases.Add(new SinglePeakDeconvolutionTestCase(deconParams,
"Direct Injection Human Growth Hormone, Averaged",
- hghPath, 1, 22143.36, 19, 1165.44, 20),
+ hghPath, 1, 22143.36, 19, 1165.44, 20));
// cytochrome c, direct injection
- new SinglePeakDeconvolutionTestCase(classicTopDownDeconvolutionParams, "Direct Injection Cytochrome C, Averaged",
- cytoPath, 1, 12367.44, 9, 1374.16, 20),
- new SinglePeakDeconvolutionTestCase(classicTopDownDeconvolutionParams, "Direct Injection Cytochrome C, Averaged",
- cytoPath, 1, 12367.4, 10, 1236.74, 20),
- new SinglePeakDeconvolutionTestCase(classicTopDownDeconvolutionParams, "Direct Injection Cytochrome C, Averaged",
- cytoPath, 1, 12368.4, 11, 1124.40, 20),
- new SinglePeakDeconvolutionTestCase(classicTopDownDeconvolutionParams, "Direct Injection Cytochrome C, Averaged",
- cytoPath, 1, 12370.44, 12, 1030.87, 20),
- new SinglePeakDeconvolutionTestCase(classicTopDownDeconvolutionParams, "Direct Injection Cytochrome C, Averaged",
- cytoPath, 1, 12371.45, 13, 951.65, 20),
- new SinglePeakDeconvolutionTestCase(classicTopDownDeconvolutionParams, "Direct Injection Cytochrome C, Averaged",
- cytoPath, 1, 12373.48, 14, 883.82, 20),
- new SinglePeakDeconvolutionTestCase(classicTopDownDeconvolutionParams, "Direct Injection Cytochrome C, Averaged",
- cytoPath, 1, 12373.5, 15, 824.90, 20),
- new SinglePeakDeconvolutionTestCase(classicTopDownDeconvolutionParams, "Direct Injection Cytochrome C, Averaged",
- cytoPath, 1, 12374.56, 16, 773.41, 20),
- new SinglePeakDeconvolutionTestCase(classicTopDownDeconvolutionParams, "Direct Injection Cytochrome C, Averaged",
- cytoPath, 1, 12374.47, 17, 727.91, 20),
- new SinglePeakDeconvolutionTestCase(classicTopDownDeconvolutionParams, "Direct Injection Cytochrome C, Averaged",
- cytoPath, 1, 12376.44, 18, 687.58, 20),
- new SinglePeakDeconvolutionTestCase(classicTopDownDeconvolutionParams, "Direct Injection Cytochrome C, Averaged",
- cytoPath, 1, 12360.6, 20, 619.03, 20)
- };
+ singlePeakDeconvolutionTestCases.Add(new SinglePeakDeconvolutionTestCase(deconParams,
+ "Direct Injection Cytochrome C, Averaged",
+ cytoPath, 1, 12367.44, 9, 1374.16, 20));
+ singlePeakDeconvolutionTestCases.Add(new SinglePeakDeconvolutionTestCase(deconParams,
+ "Direct Injection Cytochrome C, Averaged",
+ cytoPath, 1, 12367.4, 10, 1236.74, 20));
+ singlePeakDeconvolutionTestCases.Add(new SinglePeakDeconvolutionTestCase(deconParams,
+ "Direct Injection Cytochrome C, Averaged",
+ cytoPath, 1, 12368.4, 11, 1124.40, 20));
+ singlePeakDeconvolutionTestCases.Add(new SinglePeakDeconvolutionTestCase(deconParams,
+ "Direct Injection Cytochrome C, Averaged",
+ cytoPath, 1, 12370.44, 12, 1030.87, 20));
+ singlePeakDeconvolutionTestCases.Add(new SinglePeakDeconvolutionTestCase(deconParams,
+ "Direct Injection Cytochrome C, Averaged",
+ cytoPath, 1, 12371.45, 13, 951.65, 20));
+ singlePeakDeconvolutionTestCases.Add(new SinglePeakDeconvolutionTestCase(deconParams,
+ "Direct Injection Cytochrome C, Averaged",
+ cytoPath, 1, 12373.48, 14, 883.82, 20));
+ singlePeakDeconvolutionTestCases.Add(new SinglePeakDeconvolutionTestCase(deconParams,
+ "Direct Injection Cytochrome C, Averaged",
+ cytoPath, 1, 12373.5, 15, 824.90, 20));
+ singlePeakDeconvolutionTestCases.Add(new SinglePeakDeconvolutionTestCase(deconParams,
+ "Direct Injection Cytochrome C, Averaged",
+ cytoPath, 1, 12374.56, 16, 773.41, 20));
+ singlePeakDeconvolutionTestCases.Add(new SinglePeakDeconvolutionTestCase(deconParams,
+ "Direct Injection Cytochrome C, Averaged",
+ cytoPath, 1, 12374.47, 17, 727.91, 20));
+ singlePeakDeconvolutionTestCases.Add(new SinglePeakDeconvolutionTestCase(deconParams,
+ "Direct Injection Cytochrome C, Averaged",
+ cytoPath, 1, 12376.44, 18, 687.58, 20));
+ singlePeakDeconvolutionTestCases.Add(new SinglePeakDeconvolutionTestCase(deconParams,
+ "Direct Injection Cytochrome C, Averaged",
+ cytoPath, 1, 12360.6, 20, 619.03, 20));
+ }
_singlePeakTestCases = singlePeakDeconvolutionTestCases;
- // Add whole spectrum test cases
- List wholeSpectrumDeconvolutionTestCases = new()
+ // Add whole spectrum test cases for top down
+ List wholeSpectrumDeconvolutionTestCases = new();
+ foreach (var deconParams in topDownDeconvolutionParametersToTest)
{
- new WholeSpectrumDeconvolutionTestCase(classicTopDownDeconvolutionParams, "Direct Injection PolyUbiquitin, Averaged",
+ wholeSpectrumDeconvolutionTestCases.Add(new WholeSpectrumDeconvolutionTestCase(deconParams, "Direct Injection PolyUbiquitin, Averaged",
ubiquitinPath, 1, 20,
new[] { 10038.4, 10039.41, 10041.4, 10041.46, 10043.4, 10043.41, 10044.44, 10045.5, 10045.44, },
new[] { 8, 9, 10, 11, 12, 13, 14, 15, 16 },
- new[] { 1254.8, 1115.49, 1004.14, 912.86, 836.95, 772.57, 717.46, 669.70, 627.84 }),
+ new[] { 1254.8, 1115.49, 1004.14, 912.86, 836.95, 772.57, 717.46, 669.70, 627.84 }));
- new WholeSpectrumDeconvolutionTestCase(classicTopDownDeconvolutionParams, "Direct Injection Human Growth Hormone, Averaged",
+ wholeSpectrumDeconvolutionTestCases.Add(new WholeSpectrumDeconvolutionTestCase(deconParams, "Direct Injection Human Growth Hormone, Averaged",
hghPath, 1, 20,
new []{22139.41, 22136.28, 22137.31, 22139.32, 22139.25, 22140.32, 22141.31, 22142.34, 22143.36},
new []{11, 12, 13, 14, 15, 16, 17, 18, 19},
- new []{2012.29, 1844.69, 1702.87, 1581.38, 1475.95, 1383.77, 1302.43, 1230.13, 1165.44}),
+ new []{2012.29, 1844.69, 1702.87, 1581.38, 1475.95, 1383.77, 1302.43, 1230.13, 1165.44}));
- new WholeSpectrumDeconvolutionTestCase(classicTopDownDeconvolutionParams, "Direct Injection Cytochrome C, Averaged",
+ wholeSpectrumDeconvolutionTestCases.Add(new WholeSpectrumDeconvolutionTestCase(deconParams, "Direct Injection Cytochrome C, Averaged",
cytoPath, 1, 20,
new []{12367.44, 12367.4, 12368.4, 12370.44, 12371.45, 12373.48, 12373.5, 12374.56, 12374.47, 12376.44, 12360.6},
new []{9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 20},
- new []{1374.16, 1236.74, 1124.40, 1030.87, 951.65, 883.82, 824.90, 773.41, 727.91, 687.58, 619.03}),
+ new []{1374.16, 1236.74, 1124.40, 1030.87, 951.65, 883.82, 824.90, 773.41, 727.91, 687.58, 619.03}));
};
_wholeSpectrumDeconvolutionTestCases = wholeSpectrumDeconvolutionTestCases;
+
+ // TODO: Add cases for bottom up deconvolution
}
#endregion
diff --git a/mzLib/Development/Deconvolution/TestCases/SinglePeakDeconvolutionTestCase.cs b/mzLib/Development/DeconvolutionDevelopment/TestCases/SinglePeakDeconvolutionTestCase.cs
similarity index 98%
rename from mzLib/Development/Deconvolution/TestCases/SinglePeakDeconvolutionTestCase.cs
rename to mzLib/Development/DeconvolutionDevelopment/TestCases/SinglePeakDeconvolutionTestCase.cs
index 2bcf25db8..4e63f2392 100644
--- a/mzLib/Development/Deconvolution/TestCases/SinglePeakDeconvolutionTestCase.cs
+++ b/mzLib/Development/DeconvolutionDevelopment/TestCases/SinglePeakDeconvolutionTestCase.cs
@@ -23,7 +23,7 @@ public class SinglePeakDeconvolutionTestCase
public SinglePeakDeconvolutionTestCase(DeconvolutionParameters deconParameters, string sampleInformation, string spectrumPath, int scanNumber,
double expectedMostAbundantObservedIsotopicMass, int expectedIonChargeState, double selectedIonMz, double precursorPpmMassTolerance)
{
-
+ DeconvolutionParameters = deconParameters;
SampleInformation = sampleInformation;
ExpectedMostAbundantObservedIsotopicMass = expectedMostAbundantObservedIsotopicMass;
ExpectedIonChargeState = expectedIonChargeState;
diff --git a/mzLib/Development/Deconvolution/TestCases/TestDevelopmentTestCases.cs b/mzLib/Development/DeconvolutionDevelopment/TestCases/TestDevelopmentTestCases.cs
similarity index 100%
rename from mzLib/Development/Deconvolution/TestCases/TestDevelopmentTestCases.cs
rename to mzLib/Development/DeconvolutionDevelopment/TestCases/TestDevelopmentTestCases.cs
diff --git a/mzLib/Development/Deconvolution/TestCases/WholeSpectrumDeconvolutionTestCase.cs b/mzLib/Development/DeconvolutionDevelopment/TestCases/WholeSpectrumDeconvolutionTestCase.cs
similarity index 100%
rename from mzLib/Development/Deconvolution/TestCases/WholeSpectrumDeconvolutionTestCase.cs
rename to mzLib/Development/DeconvolutionDevelopment/TestCases/WholeSpectrumDeconvolutionTestCase.cs
diff --git a/mzLib/Development/Deconvolution/TestData/Averaged_221110_CytoOnly.mzML b/mzLib/Development/DeconvolutionDevelopment/TestData/Averaged_221110_CytoOnly.mzML
similarity index 100%
rename from mzLib/Development/Deconvolution/TestData/Averaged_221110_CytoOnly.mzML
rename to mzLib/Development/DeconvolutionDevelopment/TestData/Averaged_221110_CytoOnly.mzML
diff --git a/mzLib/Development/Deconvolution/TestData/Averaged_221110_HGHOnly.mzML b/mzLib/Development/DeconvolutionDevelopment/TestData/Averaged_221110_HGHOnly.mzML
similarity index 100%
rename from mzLib/Development/Deconvolution/TestData/Averaged_221110_HGHOnly.mzML
rename to mzLib/Development/DeconvolutionDevelopment/TestData/Averaged_221110_HGHOnly.mzML
diff --git a/mzLib/Development/Deconvolution/TestData/Averaged_221110_UbiqOnly.mzML b/mzLib/Development/DeconvolutionDevelopment/TestData/Averaged_221110_UbiqOnly.mzML
similarity index 100%
rename from mzLib/Development/Deconvolution/TestData/Averaged_221110_UbiqOnly.mzML
rename to mzLib/Development/DeconvolutionDevelopment/TestData/Averaged_221110_UbiqOnly.mzML
diff --git a/mzLib/Development/Development.csproj b/mzLib/Development/Development.csproj
index 0f0eaf199..cd65a3163 100644
--- a/mzLib/Development/Development.csproj
+++ b/mzLib/Development/Development.csproj
@@ -23,13 +23,13 @@
-
+
PreserveNewest
-
+
PreserveNewest
-
+
PreserveNewest
diff --git a/mzLib/FlashLFQ/ChromatographicPeak.cs b/mzLib/FlashLFQ/ChromatographicPeak.cs
index 9041eab66..d7bac2195 100644
--- a/mzLib/FlashLFQ/ChromatographicPeak.cs
+++ b/mzLib/FlashLFQ/ChromatographicPeak.cs
@@ -1,9 +1,9 @@
-using MzLibUtil;
-using System;
+using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using ClassExtensions = Chemistry.ClassExtensions;
+using FlashLFQ.PEP;
namespace FlashLFQ
{
@@ -16,20 +16,23 @@ public class ChromatographicPeak
public int ScanCount => IsotopicEnvelopes.Count;
public double SplitRT;
public readonly bool IsMbrPeak;
- public double PredictedRetentionTime { get; init; }
-
+ public double MbrScore;
+ public double PpmScore { get; set; }
+ public double IntensityScore { get; set; }
+ public double RtScore { get; set; }
+ public double ScanCountScore { get; set; }
+ public double IsotopicDistributionScore { get; set; }
///
- /// A score bounded by 100 and 0, with more confident MBR-detections receiving higher scores
+ /// Stores the pearson correlation between the apex isotopic envelope and the theoretical isotopic distribution
///
- public double MbrScore { get; private set; }
-
- /// The four scores below are bounded by 0 and 1, with higher scores being better
- public double PpmScore { get; private set; }
- public double IntensityScore { get; private set; }
- public double RtScore { get; private set; }
- public double ScanCountScore { get; private set; }
-
- public ChromatographicPeak(Identification id, bool isMbrPeak, SpectraFileInfo fileInfo)
+ public double IsotopicPearsonCorrelation => Apex?.PearsonCorrelation ?? -1;
+ public double RtPredictionError { get; set; }
+ public List ChargeList { get; set; }
+ internal double MbrQValue { get; set; }
+ public ChromatographicPeakData PepPeakData { get; set; }
+ public double? MbrPep { get; set; }
+
+ public ChromatographicPeak(Identification id, bool isMbrPeak, SpectraFileInfo fileInfo, bool randomRt = false)
{
SplitRT = 0;
NumChargeStatesObserved = 0;
@@ -40,12 +43,14 @@ public ChromatographicPeak(Identification id, bool isMbrPeak, SpectraFileInfo fi
IsotopicEnvelopes = new List();
IsMbrPeak = isMbrPeak;
SpectraFileInfo = fileInfo;
+ RandomRt = randomRt;
}
- public ChromatographicPeak(Identification id, bool isMbrPeak, SpectraFileInfo fileInfo, double predictedRetentionTime) :
- this(id, isMbrPeak, fileInfo)
+ public bool Equals(ChromatographicPeak peak)
{
- PredictedRetentionTime = predictedRetentionTime;
+ return SpectraFileInfo.Equals(peak.SpectraFileInfo)
+ && Identifications.First().ModifiedSequence.Equals(peak.Identifications.First().ModifiedSequence)
+ && ApexRetentionTime == peak.ApexRetentionTime;
}
public IsotopicEnvelope Apex { get; private set; }
@@ -54,13 +59,18 @@ public ChromatographicPeak(Identification id, bool isMbrPeak, SpectraFileInfo fi
public int NumIdentificationsByBaseSeq { get; private set; }
public int NumIdentificationsByFullSeq { get; private set; }
public double MassError { get; private set; }
+ ///
+ /// Bool that describes whether the retention time of this peak was randomized
+ /// If true, implies that this peak is a decoy peak identified by the MBR algorithm
+ ///
+ public bool RandomRt { get; }
+ public bool DecoyPeptide => Identifications.First().IsDecoy;
public void CalculateIntensityForThisFeature(bool integrate)
{
if (IsotopicEnvelopes.Any())
{
- double maxIntensity = IsotopicEnvelopes.Max(p => p.Intensity);
- Apex = IsotopicEnvelopes.First(p => p.Intensity == maxIntensity);
+ Apex = IsotopicEnvelopes.MaxBy(p => p.Intensity);
if (integrate)
{
@@ -123,25 +133,6 @@ public void ResolveIdentifications()
this.NumIdentificationsByBaseSeq = Identifications.Select(v => v.BaseSequence).Distinct().Count();
this.NumIdentificationsByFullSeq = Identifications.Select(v => v.ModifiedSequence).Distinct().Count();
}
-
- ///
- /// Calculates four component scores and one overarching Mbr score for an MBR peak.
- /// MBR Score is equal to 100 * the geometric mean of the four component scores.
- ///
- /// An MbrScorer specific to the file where this peak was found
- /// The donor peak used as the basis for the MBR identification.
- internal void CalculateMbrScore(MbrScorer scorer, ChromatographicPeak donorPeak)
- {
- if (SpectraFileInfo != scorer.AcceptorFile) throw new MzLibException("Error when performing match-between-runs: Mismatch between scorer and peak.");
-
- IntensityScore = scorer.CalculateIntensityScore(this, donorPeak);
- RtScore = scorer.CalculateRetentionTimeScore(this, donorPeak);
- PpmScore = scorer.CalculatePpmErrorScore(this);
- ScanCountScore = scorer.CalculateScanCountScore(this);
-
- MbrScore = 100 * Math.Pow(IntensityScore * RtScore * PpmScore * ScanCountScore, 0.25);
- }
-
public static string TabSeparatedHeader
{
get
@@ -164,20 +155,18 @@ public static string TabSeparatedHeader
sb.Append("Peak Charge" + "\t");
sb.Append("Num Charge States Observed" + "\t");
sb.Append("Peak Detection Type" + "\t");
- sb.Append("MBR Score" + "\t");
- sb.Append("Ppm Score" + "\t");
- sb.Append("Intensity Score" + "\t");
- sb.Append("Rt Score" + "\t");
- sb.Append("Scan Count Score" + "\t");
+ sb.Append("PIP Q-Value" + "\t");
+ sb.Append("PIP PEP" + "\t");
sb.Append("PSMs Mapped" + "\t");
sb.Append("Base Sequences Mapped" + "\t");
sb.Append("Full Sequences Mapped" + "\t");
sb.Append("Peak Split Valley RT" + "\t");
- sb.Append("Peak Apex Mass Error (ppm)");
+ sb.Append("Peak Apex Mass Error (ppm)" + "\t");
+ sb.Append("Decoy Peptide" + "\t");
+ sb.Append("Random RT");
return sb.ToString();
}
}
-
public override string ToString()
{
StringBuilder sb = new StringBuilder();
@@ -260,17 +249,16 @@ public override string ToString()
sb.Append("" + "MSMS" + "\t");
}
- sb.Append("" + (IsMbrPeak ? MbrScore.ToString() : "") + "\t");
- sb.Append("" + (IsMbrPeak ? PpmScore.ToString() : "") + "\t");
- sb.Append("" + (IsMbrPeak ? IntensityScore.ToString() : "") + "\t");
- sb.Append("" + (IsMbrPeak ? RtScore.ToString() : "") + "\t");
- sb.Append("" + (IsMbrPeak ? ScanCountScore.ToString() : "") + "\t");
+ sb.Append("" + (IsMbrPeak ? MbrQValue.ToString() : "") + "\t");
+ sb.Append("" + (IsMbrPeak ? MbrPep.ToString() : "") + "\t");
sb.Append("" + Identifications.Count + "\t");
sb.Append("" + NumIdentificationsByBaseSeq + "\t");
sb.Append("" + NumIdentificationsByFullSeq + "\t");
sb.Append("" + SplitRT + "\t");
sb.Append("" + MassError);
+ sb.Append("\t" + DecoyPeptide);
+ sb.Append("\t" + RandomRt);
return sb.ToString();
}
diff --git a/mzLib/FlashLFQ/FlashLFQ.csproj b/mzLib/FlashLFQ/FlashLFQ.csproj
index 097bf6131..d5c466967 100644
--- a/mzLib/FlashLFQ/FlashLFQ.csproj
+++ b/mzLib/FlashLFQ/FlashLFQ.csproj
@@ -13,6 +13,8 @@
+
+
diff --git a/mzLib/FlashLFQ/FlashLFQResults.cs b/mzLib/FlashLFQ/FlashLFQResults.cs
index 9e5f8e0e4..7e7f7bfb5 100644
--- a/mzLib/FlashLFQ/FlashLFQResults.cs
+++ b/mzLib/FlashLFQ/FlashLFQResults.cs
@@ -1,5 +1,7 @@
using Easy.Common.Extensions;
using MathNet.Numerics.Statistics;
+using MzLibUtil;
+using Proteomics;
using System;
using System.Collections.Generic;
using System.IO;
@@ -14,28 +16,28 @@ public class FlashLfqResults
public readonly Dictionary PeptideModifiedSequences;
public readonly Dictionary ProteinGroups;
public readonly Dictionary> Peaks;
+ public Dictionary ModInfo { get; private set; }
private readonly HashSet _peptideModifiedSequencesToQuantify;
+ public string PepResultString { get; set; }
+ public double MbrQValueThreshold { get; set; }
- public FlashLfqResults(List spectraFiles, List identifications, HashSet peptides = null)
+ public FlashLfqResults(List spectraFiles, List identifications, double mbrQValueThreshold = 0.05,
+ HashSet peptideModifiedSequencesToQuantify = null)
{
SpectraFiles = spectraFiles;
PeptideModifiedSequences = new Dictionary();
ProteinGroups = new Dictionary();
Peaks = new Dictionary>();
- if(peptides == null || !peptides.Any())
- {
- peptides = identifications.Select(id => id.ModifiedSequence).ToHashSet();
- }
- _peptideModifiedSequencesToQuantify = peptides;
+ MbrQValueThreshold = mbrQValueThreshold;
+ _peptideModifiedSequencesToQuantify = peptideModifiedSequencesToQuantify ?? identifications.Where(id => !id.IsDecoy).Select(id => id.ModifiedSequence).ToHashSet();
foreach (SpectraFileInfo file in spectraFiles)
{
Peaks.Add(file, new List());
}
-
// Only quantify peptides within the set of valid peptide modified (full) sequences. This is done to enable pepitde-level FDR control of reported results
- foreach (Identification id in identifications.Where(id => peptides.Contains(id.ModifiedSequence)))
+ foreach (Identification id in identifications.Where(id => !id.IsDecoy & _peptideModifiedSequencesToQuantify.Contains(id.ModifiedSequence)))
{
if (!PeptideModifiedSequences.TryGetValue(id.ModifiedSequence, out Peptide peptide))
{
@@ -59,6 +61,17 @@ public FlashLfqResults(List spectraFiles, List
}
}
+ public void ReNormalizeResults(bool integrate = false, int maxThreads = 10, bool useSharedPeptides = false)
+ {
+ foreach(var peak in Peaks.SelectMany(p => p.Value))
+ {
+ peak.CalculateIntensityForThisFeature(integrate);
+ }
+ new IntensityNormalizationEngine(this, integrate, silent: true, maxThreads).NormalizeResults();
+ CalculatePeptideResults(quantifyAmbiguousPeptides: false);
+ CalculateProteinResultsMedianPolish(useSharedPeptides: useSharedPeptides);
+ }
+
public void MergeResultsWith(FlashLfqResults mergeFrom)
{
this.SpectraFiles.AddRange(mergeFrom.SpectraFiles);
@@ -128,6 +141,8 @@ public void CalculatePeptideResults(bool quantifyAmbiguousPeptides)
{
var groupedPeaks = filePeaks.Value
.Where(p => p.NumIdentificationsByFullSeq == 1)
+ .Where(p => !p.Identifications.First().IsDecoy)
+ .Where(p => !p.IsMbrPeak || (p.MbrQValue < MbrQValueThreshold && !p.RandomRt))
.GroupBy(p => p.Identifications.First().ModifiedSequence)
.Where(group => _peptideModifiedSequencesToQuantify.Contains(group.Key))
.ToList();
@@ -163,11 +178,15 @@ public void CalculatePeptideResults(bool quantifyAmbiguousPeptides)
// report ambiguous quantification
var ambiguousPeaks = filePeaks.Value
.Where(p => p.NumIdentificationsByFullSeq > 1)
+ .Where(p => !p.Identifications.First().IsDecoy)
+ .Where(p => !p.IsMbrPeak || (p.MbrQValue < MbrQValueThreshold && !p.RandomRt))
.ToList();
foreach (ChromatographicPeak ambiguousPeak in ambiguousPeaks)
{
- foreach (Identification id in ambiguousPeak.Identifications)
+ foreach (Identification id in ambiguousPeak.Identifications.Where(id => !id.IsDecoy))
{
+ if (!_peptideModifiedSequencesToQuantify.Contains(id.ModifiedSequence)) continue; // Ignore the ids/sequences we don't want to quantify
+
string sequence = id.ModifiedSequence;
double alreadyRecordedIntensity = PeptideModifiedSequences[sequence].GetIntensity(filePeaks.Key);
@@ -224,7 +243,7 @@ private void HandleAmbiguityInFractions()
foreach (SpectraFileInfo file in sample)
{
- foreach (ChromatographicPeak peak in Peaks[file])
+ foreach (ChromatographicPeak peak in Peaks[file].Where(p => !p.IsMbrPeak || p.MbrQValue < MbrQValueThreshold))
{
foreach (Identification id in peak.Identifications)
{
@@ -331,6 +350,29 @@ public void CalculateProteinResultsTop3(bool useSharedPeptides)
}
}
}
+ ///
+ /// Calculate peptide level ptm occupancy with either all peptides to be quantified (by intensity) or a subset of FlashLFQ-identified peptides with an arbitrary peptide-level quantifier.
+ ///
+ /// Dictionary where keys are string-typed peptide full sequences in PeptideModifiedSequences and the value is a double-typed quantifier of that peptide.
+ /// If true, the index of modifications at the N-terminus will be 0 (zero-based indexing). Otherwise, it is the index of the first amino acid (one-based indexing).
+ /// If true, the index of modifications at the C-terminus will be one more than the index of the last amino acid. Otherwise, it is the index of the last amino acid.
+ /// Dictionary with the key being the amino acid position of the mod and the value being the string representing the mod
+ public void CalculatePTMOccupancy(Dictionary quantifiedPeptides=null, bool modOnNTerminus=true, bool modOnCTerminus=true)
+ {
+ quantifiedPeptides = quantifiedPeptides ?? new Dictionary { };
+
+ var peptides = _peptideModifiedSequencesToQuantify
+ .Where(pep => PeptideModifiedSequences.ContainsKey(pep))
+ .Select(pep => Tuple.Create(
+ PeptideModifiedSequences[pep].Sequence,
+ PeptideModifiedSequences[pep].BaseSequence,
+ PeptideModifiedSequences[pep].ProteinGroups.Select(pg => pg.ProteinGroupName).ToList(),
+ quantifiedPeptides.GetValueOrDefault(pep, PeptideModifiedSequences[pep].GetTotalIntensity()))).ToList();
+
+ PositionFrequencyAnalysis pfa = new PositionFrequencyAnalysis();
+ pfa.PeptidePTMOccupancy(peptides, modOnNTerminus, modOnCTerminus);
+ ModInfo = pfa.Occupancy;
+ }
///
/// This method uses the median polish algorithm to calculate protein quantities in each biological replicate.
@@ -549,7 +591,7 @@ public void CalculateProteinResultsMedianPolish(bool useSharedPeptides)
}
}
- public void WriteResults(string peaksOutputPath, string modPeptideOutputPath, string proteinOutputPath, string bayesianProteinQuantOutput, bool silent = true)
+ public void WriteResults(string peaksOutputPath, string modPeptideOutputPath, string proteinOutputPath, string bayesianProteinQuantOutput, bool silent)
{
if (!silent)
{
diff --git a/mzLib/FlashLFQ/FlashLfqEngine.cs b/mzLib/FlashLFQ/FlashLfqEngine.cs
index 712eae6a1..b052ca9fd 100644
--- a/mzLib/FlashLFQ/FlashLfqEngine.cs
+++ b/mzLib/FlashLFQ/FlashLfqEngine.cs
@@ -12,11 +12,21 @@
using UsefulProteomicsDatabases;
using System.Runtime.CompilerServices;
using Easy.Common.Extensions;
+using FlashLFQ.PEP;
+using System.IO;
+using System.Threading;
[assembly: InternalsVisibleTo("TestFlashLFQ")]
namespace FlashLFQ
{
+ public enum DonorCriterion
+ {
+ Score,
+ Intensity,
+ Neighbors
+ }
+
public class FlashLfqEngine
{
// settings
@@ -38,9 +48,24 @@ public class FlashLfqEngine
public readonly bool MatchBetweenRuns;
public readonly double MbrRtWindow;
public readonly double MbrPpmTolerance;
- public readonly bool RequireMsmsIdInCondition;
+ public readonly double MbrDetectionQValueThreshold;
private int _numberOfAnchorPeptidesForMbr = 3; // the number of anchor peptides used for local alignment when predicting retention times of MBR acceptor peptides
+ // New MBR Settings
+ public readonly double RtWindowIncrease = 0;
+ public readonly double MbrAlignmentWindow = 2.5;
+ public readonly double PepTrainingFraction = 0.25;
+ ///
+ /// Specifies how the donor peak for MBR is selected.
+ /// 'Score' selects the donor peak associated with the highest scoring PSM
+ /// 'Intensity' selects the donor peak with the max intensity
+ /// 'Neighbors' selects the donor peak with the most neighboring peaks
+ ///
+ public DonorCriterion DonorCriterion { get; init; }
+ public readonly double DonorQValueThreshold;
+ public readonly bool RequireMsmsIdInCondition;
+ private int _randomSeed = 42;
+
// settings for the Bayesian protein quantification engine
public readonly bool BayesianProteinQuant;
@@ -61,7 +86,7 @@ public class FlashLfqEngine
/// Other peptides may appear in the QuantifiedPeaks output, but this list is used to enable
/// peptide-level FDR filtering
///
- public HashSet PeptidesModifiedSequencesToQuantify { get; init; }
+ public HashSet PeptideModifiedSequencesToQuantify { get; init; }
///
/// Dictionary linking a modified sequence to a List of tuples containing
/// the mass shifts (isotope mass - monoisotopic mass) and normalized abundances for the
@@ -72,6 +97,7 @@ public class FlashLfqEngine
private FlashLfqResults _results;
internal Dictionary _ms1Scans;
internal PeakIndexingEngine _peakIndexingEngine;
+ internal Dictionary> DonorFileToPeakDict { get; private set; }
///
/// Create an instance of FlashLFQ that will quantify peptides based on their precursor intensity in MS1 spectra
@@ -95,8 +121,9 @@ public FlashLfqEngine(
// MBR settings
bool matchBetweenRuns = false,
double matchBetweenRunsPpmTolerance = 10.0,
- double maxMbrWindow = 2.5,
+ double maxMbrWindow = 1.0,
bool requireMsmsIdInCondition = false,
+ double matchBetweenRunsFdrThreshold = 0.05,
// settings for the Bayesian protein quantification engine
bool bayesianProteinQuant = false,
@@ -106,8 +133,10 @@ public FlashLfqEngine(
int mcmcBurninSteps = 1000,
bool useSharedPeptidesForProteinQuant = false,
bool pairedSamples = false,
- List peptideSequencesToUse = null,
- int? randomSeed = null)
+ int? randomSeed = null,
+ DonorCriterion donorCriterion = DonorCriterion.Score,
+ double donorQValueThreshold = 0.01,
+ List peptideSequencesToQuantify = null)
{
Loaders.LoadElements();
@@ -122,17 +151,17 @@ public FlashLfqEngine(
.ThenBy(p => p.TechnicalReplicate).ToList();
_allIdentifications = allIdentifications;
+ PeptideModifiedSequencesToQuantify = peptideSequencesToQuantify.IsNotNullOrEmpty()
+ ? new HashSet(peptideSequencesToQuantify)
+ : allIdentifications.Select(id => id.ModifiedSequence).ToHashSet();
PpmTolerance = ppmTolerance;
IsotopePpmTolerance = isotopeTolerancePpm;
- MatchBetweenRuns = matchBetweenRuns;
- MbrPpmTolerance = matchBetweenRunsPpmTolerance;
+
Integrate = integrate;
NumIsotopesRequired = numIsotopesRequired;
QuantifyAmbiguousPeptides = quantifyAmbiguousPeptides;
Silent = silent;
IdSpecificChargeState = idSpecificChargeState;
- MbrRtWindow = maxMbrWindow;
-
RequireMsmsIdInCondition = requireMsmsIdInCondition;
Normalize = normalize;
MaxThreads = maxThreads;
@@ -143,8 +172,14 @@ public FlashLfqEngine(
McmcSteps = mcmcSteps;
McmcBurninSteps = mcmcBurninSteps;
UseSharedPeptidesForProteinQuant = useSharedPeptidesForProteinQuant;
- PeptidesModifiedSequencesToQuantify = peptideSequencesToUse.IsNotNullOrEmpty() ? new HashSet(peptideSequencesToUse)
- : allIdentifications.Select(id => id.ModifiedSequence).ToHashSet();
+
+ // MBR settings
+ MatchBetweenRuns = matchBetweenRuns;
+ MbrPpmTolerance = matchBetweenRunsPpmTolerance;
+ MbrRtWindow = maxMbrWindow;
+ DonorCriterion = donorCriterion;
+ DonorQValueThreshold = donorQValueThreshold;
+ MbrDetectionQValueThreshold = matchBetweenRunsFdrThreshold;
RandomSeed = randomSeed;
if (MaxThreads == -1 || MaxThreads >= Environment.ProcessorCount)
@@ -166,7 +201,7 @@ public FlashLfqResults Run()
{
_globalStopwatch.Start();
_ms1Scans = new Dictionary();
- _results = new FlashLfqResults(_spectraFileInfo, _allIdentifications, PeptidesModifiedSequencesToQuantify);
+ _results = new FlashLfqResults(_spectraFileInfo, _allIdentifications, MbrDetectionQValueThreshold, PeptideModifiedSequencesToQuantify);
// build m/z index keys
CalculateTheoreticalIsotopeDistributions();
@@ -206,6 +241,8 @@ public FlashLfqResults Run()
// do MBR
if (MatchBetweenRuns)
{
+ Console.WriteLine("Find the best donors for match-between-runs");
+ FindPeptideDonorFiles();
foreach (var spectraFile in _spectraFileInfo)
{
if (!Silent)
@@ -214,7 +251,6 @@ public FlashLfqResults Run()
}
QuantifyMatchBetweenRunsPeaks(spectraFile);
-
_peakIndexingEngine.ClearIndex();
if (!Silent)
@@ -222,6 +258,14 @@ public FlashLfqResults Run()
Console.WriteLine("Finished MBR for " + spectraFile.FilenameWithoutExtension);
}
}
+
+ Console.WriteLine("Computing PEP for MBR Transfers");
+ bool pepSuccesful = RunPEPAnalysis();
+
+ foreach (var spectraFile in _spectraFileInfo)
+ {
+ CalculateFdrForMbrPeaks(spectraFile, pepSuccesful);
+ }
}
// normalize
@@ -236,6 +280,9 @@ public FlashLfqResults Run()
// do top3 protein quantification
_results.CalculateProteinResultsMedianPolish(UseSharedPeptidesForProteinQuant);
+ // calculate ptm occupancy at the peptide level
+ _results.CalculatePTMOccupancy();
+
// do Bayesian protein fold-change analysis
if (BayesianProteinQuant)
{
@@ -489,52 +536,59 @@ private void QuantifyMs2IdentifiedPeptides(SpectraFileInfo fileInfo)
_results.Peaks[fileInfo].AddRange(chromatographicPeaks.ToList());
}
+ #region MatchBetweenRuns
///
/// Used by the match-between-runs algorithm to determine systematic retention time drifts between
/// chromatographic runs.
///
- private RetentionTimeCalibDataPoint[] GetRtCalSpline(SpectraFileInfo donor, SpectraFileInfo acceptor, MbrScorer scorer)
- {
+ private RetentionTimeCalibDataPoint[] GetRtCalSpline(SpectraFileInfo donor, SpectraFileInfo acceptor, MbrScorer scorer,
+ out List donorFileBestMsmsPeaksOrderedByMass)
+ {
Dictionary donorFileBestMsmsPeaks = new();
Dictionary acceptorFileBestMsmsPeaks = new();
List rtCalibrationCurve = new();
List anchorPeptideRtDiffs = new(); // anchor peptides are peptides that were MS2 detected in both the donor and acceptor runs
+ Dictionary> donorFileAllMsmsPeaks = _results.Peaks[donor]
+ .Where(peak => peak.NumIdentificationsByFullSeq == 1
+ && !peak.IsMbrPeak
+ && peak.IsotopicEnvelopes.Any()
+ && peak.Identifications.Min(id => id.QValue) < DonorQValueThreshold)
+ .GroupBy(peak => peak.Identifications.First().ModifiedSequence)
+ .ToDictionary(group => group.Key, group => group.ToList());
- // get all peaks, not counting ambiguous peaks
- IEnumerable donorPeaks = _results.Peaks[donor].Where(p => p.Apex != null && !p.IsMbrPeak && p.NumIdentificationsByFullSeq == 1);
- IEnumerable acceptorPeaks = _results.Peaks[acceptor].Where(p => p.Apex != null && !p.IsMbrPeak && p.NumIdentificationsByFullSeq == 1);
-
- // get the best (most intense) peak for each peptide in the acceptor file
- foreach (ChromatographicPeak acceptorPeak in acceptorPeaks)
+ // iterate through each unique donor sequence
+ foreach (var sequencePeakListKvp in donorFileAllMsmsPeaks)
{
- if (acceptorFileBestMsmsPeaks.TryGetValue(acceptorPeak.Identifications.First().ModifiedSequence, out ChromatographicPeak currentBestPeak))
- {
- if (currentBestPeak.Intensity > acceptorPeak.Intensity)
- {
- acceptorFileBestMsmsPeaks[acceptorPeak.Identifications.First().ModifiedSequence] = acceptorPeak;
- }
- }
- else
- {
- acceptorFileBestMsmsPeaks.Add(acceptorPeak.Identifications.First().ModifiedSequence, acceptorPeak);
- }
+ List peaksForPeptide = sequencePeakListKvp.Value;
+ if (!peaksForPeptide.Any())
+ continue;
+
+ ChromatographicPeak bestPeak = ChooseBestPeak(peaksForPeptide);
+
+ if (bestPeak == null) continue;
+ donorFileBestMsmsPeaks.Add(sequencePeakListKvp.Key, bestPeak);
}
- // get the best (most intense) peak for each peptide in the donor file
- foreach (ChromatographicPeak donorPeak in donorPeaks)
+ Dictionary> acceptorFileAllMsmsPeaks = _results.Peaks[acceptor]
+ .Where(peak => peak.NumIdentificationsByFullSeq == 1
+ && !peak.IsMbrPeak
+ && peak.IsotopicEnvelopes.Any()
+ && peak.Identifications.Min(id => id.QValue) < DonorQValueThreshold)
+ .GroupBy(peak => peak.Identifications.First().ModifiedSequence)
+ .ToDictionary(group => group.Key, group => group.ToList());
+
+ // iterate through each acceptor sequence
+ foreach (var sequencePeakListKvp in acceptorFileAllMsmsPeaks)
{
- if (donorFileBestMsmsPeaks.TryGetValue(donorPeak.Identifications.First().ModifiedSequence, out ChromatographicPeak currentBestPeak))
- {
- if (currentBestPeak.Intensity > donorPeak.Intensity)
- {
- donorFileBestMsmsPeaks[donorPeak.Identifications.First().ModifiedSequence] = donorPeak;
- }
- }
- else
- {
- donorFileBestMsmsPeaks.Add(donorPeak.Identifications.First().ModifiedSequence, donorPeak);
- }
+ List peaksForPeptide = sequencePeakListKvp.Value;
+ if (!peaksForPeptide.Any())
+ continue;
+
+ ChromatographicPeak bestPeak = ChooseBestPeak(peaksForPeptide);
+
+ if (bestPeak == null) continue;
+ acceptorFileBestMsmsPeaks.Add(sequencePeakListKvp.Key, bestPeak);
}
// create RT calibration curve
@@ -545,7 +599,7 @@ private RetentionTimeCalibDataPoint[] GetRtCalSpline(SpectraFileInfo donor, Spec
if (donorFileBestMsmsPeaks.TryGetValue(peak.Key, out ChromatographicPeak donorFilePeak))
{
rtCalibrationCurve.Add(new RetentionTimeCalibDataPoint(donorFilePeak, acceptorFilePeak));
- if(donorFilePeak.ApexRetentionTime > 0 && acceptorFilePeak.ApexRetentionTime > 0)
+ if (donorFilePeak.ApexRetentionTime > 0 && acceptorFilePeak.ApexRetentionTime > 0)
{
anchorPeptideRtDiffs.Add(donorFilePeak.ApexRetentionTime - acceptorFilePeak.ApexRetentionTime);
}
@@ -553,10 +607,89 @@ private RetentionTimeCalibDataPoint[] GetRtCalSpline(SpectraFileInfo donor, Spec
}
scorer.AddRtPredErrorDistribution(donor, anchorPeptideRtDiffs, _numberOfAnchorPeptidesForMbr);
+ donorFileBestMsmsPeaksOrderedByMass = donorFileBestMsmsPeaks.Select(kvp => kvp.Value).OrderBy(p => p.Identifications.First().PeakfindingMass).ToList();
return rtCalibrationCurve.OrderBy(p => p.DonorFilePeak.Apex.IndexedPeak.RetentionTime).ToArray();
}
+ ///
+ /// For every MSMS identified peptide, selects one file that will be used as the donor
+ /// by finding files that contain the most peaks in the local neighborhood,
+ /// then writes the restults to the DonorFileToIdsDict.
+ /// WARNING! Strong assumption that this is called BEFORE MBR peaks are identified/assigned to the results
+ ///
+ private void FindPeptideDonorFiles()
+ {
+ DonorFileToPeakDict = new Dictionary>();
+
+ Dictionary> seqPeakDict = _results.Peaks
+ .SelectMany(kvp => kvp.Value)
+ .Where(peak => peak.NumIdentificationsByFullSeq == 1
+ && peak.IsotopicEnvelopes.Any()
+ && peak.Identifications.Min(id => id.QValue) < DonorQValueThreshold)
+ .GroupBy(peak => peak.Identifications.First().ModifiedSequence)
+ .Where(group => PeptideModifiedSequencesToQuantify.Contains(group.Key))
+ .ToDictionary(group => group.Key, group => group.ToList());
+
+ // iterate through each unique sequence
+ foreach (var sequencePeakListKvp in seqPeakDict)
+ {
+ List peaksForPeptide = sequencePeakListKvp.Value;
+ if (!peaksForPeptide.Any())
+ continue;
+
+ ChromatographicPeak bestPeak = ChooseBestPeak(peaksForPeptide);
+
+ if (bestPeak == null) continue;
+ if (DonorFileToPeakDict.ContainsKey(bestPeak.SpectraFileInfo))
+ {
+ DonorFileToPeakDict[bestPeak.SpectraFileInfo].Add(bestPeak);
+ }
+ else
+ {
+ DonorFileToPeakDict.Add(bestPeak.SpectraFileInfo, new List { bestPeak });
+ }
+ }
+ }
+
+ internal ChromatographicPeak ChooseBestPeak(List peaks)
+ {
+ ChromatographicPeak bestPeak = null;
+ switch (DonorCriterion)
+ {
+ case DonorCriterion.Score: // Select best peak by the PSM score
+ bestPeak = peaks.MaxBy(peak => peak.Identifications.Max(id => id.PsmScore));
+ if (bestPeak.Identifications.First().PsmScore > 0)
+ break;
+ else // if every ID has a score of zero, let it fall through to the default case
+ goto default;
+ case DonorCriterion.Neighbors: // Select peak with the most neighboring peaks
+ int maxPeaks = 0;
+ foreach (var donorPeak in peaks)
+ {
+ // Count the number of neighboring peaks with unique peptides
+ int neighboringPeaksCount = _results.Peaks[donorPeak.SpectraFileInfo]
+ .Where(peak => Math.Abs(peak.ApexRetentionTime - donorPeak.ApexRetentionTime) < MbrAlignmentWindow)
+ .Select(peak => peak.Identifications.First().ModifiedSequence)
+ .Distinct()
+ .Count();
+
+ if (neighboringPeaksCount > maxPeaks)
+ {
+ maxPeaks = neighboringPeaksCount;
+ bestPeak = donorPeak;
+ }
+ }
+ break;
+ case DonorCriterion.Intensity: // Select the peak with the highest intensity
+ default:
+ bestPeak = peaks.MaxBy(peak => peak.Intensity);
+ break;
+ }
+
+ return bestPeak;
+ }
+
///
/// Used by MBR. Predicts the retention time of a peak in an acceptor file based on the
/// retention time of the peak in the donor file. This is done with a local alignment
@@ -601,21 +734,21 @@ internal RtInfo PredictRetentionTime(
int numberOfForwardAnchors = 0;
// gather nearby data points
- for (int r = index+1; r < rtCalibrationCurve.Length; r++)
+ for (int r = index + 1; r < rtCalibrationCurve.Length; r++)
{
double rtDiff = rtCalibrationCurve[r].DonorFilePeak.Apex.IndexedPeak.RetentionTime - donorPeak.Apex.IndexedPeak.RetentionTime;
- if (rtCalibrationCurve[r].AcceptorFilePeak != null
+ if (rtCalibrationCurve[r].AcceptorFilePeak != null
&& rtCalibrationCurve[r].AcceptorFilePeak.ApexRetentionTime > 0)
{
- if(Math.Abs(rtDiff) > 0.5) // If the rtDiff is too large, it's no longer local alignment
+ if (Math.Abs(rtDiff) > 0.5) // If the rtDiff is too large, it's no longer local alignment
{
break;
}
nearbyCalibrationPoints.Add(rtCalibrationCurve[r]);
numberOfForwardAnchors++;
- if(numberOfForwardAnchors >= _numberOfAnchorPeptidesForMbr) // We only want a handful of anchor points
+ if (numberOfForwardAnchors >= _numberOfAnchorPeptidesForMbr) // We only want a handful of anchor points
{
- break;
+ break;
}
}
}
@@ -642,20 +775,25 @@ internal RtInfo PredictRetentionTime(
if (!nearbyCalibrationPoints.Any())
{
- return null;
+ // If there are no nearby calibration points, return the donor peak's RT and a width of 15 seconds
+ return new RtInfo(predictedRt: donorPeak.Apex.IndexedPeak.RetentionTime, width: 0.25);
}
// calculate difference between acceptor and donor RTs for these RT region
List rtDiffs = nearbyCalibrationPoints
- .Select(p => p.DonorFilePeak.ApexRetentionTime - p.AcceptorFilePeak.ApexRetentionTime)
+ .Select(p => p.DonorFilePeak.ApexRetentionTime - p.AcceptorFilePeak.ApexRetentionTime)
.ToList();
-
+
double medianRtDiff = rtDiffs.Median();
- double rtRange = rtDiffs.InterquartileRange() * 4.5;
- // IQR * 4.5 is roughly equivalent to 6 StdDevs, so search window extends ~3 std.devs from either side of predicted RT
- // IQR is less affected by outliers than StdDev
+ if(rtDiffs.Count == 1)
+ {
+ // If there are no nearby calibration points, return the donor peak's RT and a width of 15 seconds
+ return new RtInfo(predictedRt: donorPeak.Apex.IndexedPeak.RetentionTime - medianRtDiff, width: 0.25);
+ }
+
+ double rtRange = rtDiffs.StandardDeviation() * 6;
- rtRange = Math.Min(rtRange, MbrRtWindow);
+ rtRange = Math.Min(rtRange, MbrRtWindow);
return new RtInfo(predictedRt: donorPeak.Apex.IndexedPeak.RetentionTime - medianRtDiff, width: rtRange);
}
@@ -672,7 +810,8 @@ private MbrScorer BuildMbrScorer(List acceptorFileIdentifie
var apexToAcceptorFilePeakDict = new Dictionary();
List ppmErrors = new List();
foreach (var peak in acceptorFileIdentifiedPeaks.Where(p => p.Apex != null
- && PeptidesModifiedSequencesToQuantify.Contains(p.Identifications.First().ModifiedSequence)))
+ && PeptideModifiedSequencesToQuantify.Contains(p.Identifications.First().ModifiedSequence)
+ && p.Identifications.First().QValue < DonorQValueThreshold))
{
if (!apexToAcceptorFilePeakDict.ContainsKey(peak.Apex.IndexedPeak))
{
@@ -702,6 +841,56 @@ private MbrScorer BuildMbrScorer(List acceptorFileIdentifie
return new MbrScorer(apexToAcceptorFilePeakDict, acceptorFileIdentifiedPeaks, ppmDistribution, logIntensityDistribution);
}
+ ///
+ /// Returns a pseudo-randomly selected peak that does not have the same mass as the donor
+ ///
+ ///
+ /// Will search for a peak at least 5 Da away from the peakfinding mass
+ ///
+ internal ChromatographicPeak GetRandomPeak(
+ List peaksOrderedByMass,
+ double donorPeakRetentionTime,
+ double retentionTimeMinDiff,
+ Identification donorIdentification)
+ {
+ double minDiff = 5 * PeriodicTable.GetElement("H").PrincipalIsotope.AtomicMass;
+ double maxDiff = 11 * PeriodicTable.GetElement("H").PrincipalIsotope.AtomicMass;
+ double donorPeakPeakfindingMass = donorIdentification.PeakfindingMass;
+
+ // Theoretically we could do a binary search but we're just going to iterate through the whole list of donor peaks
+ List randomPeakCandidates = peaksOrderedByMass
+ .Where(p =>
+ p.ApexRetentionTime > 0
+ && Math.Abs(p.ApexRetentionTime - donorPeakRetentionTime) > retentionTimeMinDiff
+ && p.Identifications.First().BaseSequence != donorIdentification.BaseSequence
+ && Math.Abs(p.Identifications.First().PeakfindingMass - donorPeakPeakfindingMass) > minDiff
+ && Math.Abs(p.Identifications.First().PeakfindingMass - donorPeakPeakfindingMass) < maxDiff)
+ .ToList();
+
+ while (!randomPeakCandidates.Any() & maxDiff < 1e5)
+ {
+ // Increase the search space by a factor of 10 and try again
+ maxDiff *= 10;
+ randomPeakCandidates = peaksOrderedByMass
+ .Where(p =>
+ p.ApexRetentionTime > 0
+ && Math.Abs(p.ApexRetentionTime - donorPeakRetentionTime) > retentionTimeMinDiff
+ && p.Identifications.First().BaseSequence != donorIdentification.BaseSequence
+ && Math.Abs(p.Identifications.First().PeakfindingMass - donorPeakPeakfindingMass) > minDiff
+ && Math.Abs(p.Identifications.First().PeakfindingMass - donorPeakPeakfindingMass) < maxDiff)
+ .ToList();
+ }
+
+ if (!randomPeakCandidates.Any())
+ {
+ return null;
+ }
+
+ // Generates a pseudo-random number based on the donor peak finding mass + retention time
+ int pseudoRandomNumber = (int)(1e5 * (donorIdentification.PeakfindingMass % 1.0) * (donorIdentification.Ms2RetentionTimeInMinutes % 1.0)) % randomPeakCandidates.Count;
+ return randomPeakCandidates[pseudoRandomNumber];
+ }
+
///
/// This method maps identified peaks from other chromatographic runs ("donors") onto
/// the defined chromatographic run ("acceptor"). The goal is to reduce the number of missing
@@ -722,7 +911,7 @@ private void QuantifyMatchBetweenRunsPeaks(SpectraFileInfo idAcceptorFile)
// these are the analytes already identified in this run. we don't need to try to match them from other runs
var acceptorFileIdentifiedSequences = new HashSet(acceptorFileIdentifiedPeaks
- .Where(peak => peak.IsotopicEnvelopes.Any())
+ .Where(peak => peak.IsotopicEnvelopes.Any() && peak.Identifications.Min(id => id.QValue) < 0.01)
.SelectMany(p => p.Identifications.Select(d => d.ModifiedSequence)));
MbrScorer scorer = BuildMbrScorer(acceptorFileIdentifiedPeaks, out var mbrTol);
@@ -748,24 +937,24 @@ private void QuantifyMatchBetweenRunsPeaks(SpectraFileInfo idAcceptorFile)
}
// this stores the results of MBR
- var matchBetweenRunsIdentifiedPeaks = new Dictionary>>();
+ ConcurrentDictionary>> matchBetweenRunsIdentifiedPeaks = new();
// map each donor file onto this file
- foreach (SpectraFileInfo idDonorFile in _spectraFileInfo)
+ foreach (var donorFilePeakListKvp in DonorFileToPeakDict)
{
- if (idAcceptorFile.Equals(idDonorFile))
+ if (idAcceptorFile.Equals(donorFilePeakListKvp.Key))
{
continue;
}
// this is the list of peaks identified in the other file but not in this one ("ID donor peaks")
- List idDonorPeaks = _results.Peaks[idDonorFile].Where(p =>
- !p.IsMbrPeak
- && p.NumIdentificationsByFullSeq == 1
- && p.IsotopicEnvelopes.Any()
- && PeptidesModifiedSequencesToQuantify.Contains(p.Identifications.First().ModifiedSequence) // Only do MBR for peptides that we want to quantify
- && !acceptorFileIdentifiedSequences.Contains(p.Identifications.First().ModifiedSequence)
- && (!RequireMsmsIdInCondition || p.Identifications.Any(v => v.ProteinGroups.Any(g => thisFilesMsmsIdentifiedProteins.Contains(g))))).ToList();
+ List idDonorPeaks = donorFilePeakListKvp.Value
+ .Where(p =>
+ !acceptorFileIdentifiedSequences.Contains(p.Identifications.First().ModifiedSequence)
+ && (!RequireMsmsIdInCondition
+ || p.Identifications.Any(v => v.ProteinGroups.Any(g => thisFilesMsmsIdentifiedProteins.Contains(g))))
+ && this.PeptideModifiedSequencesToQuantify.Contains(p.Identifications.First().ModifiedSequence))
+ .ToList();
if (!idDonorPeaks.Any())
{
@@ -773,7 +962,7 @@ private void QuantifyMatchBetweenRunsPeaks(SpectraFileInfo idAcceptorFile)
}
bool donorSampleIsFractionated = _results.SpectraFiles
- .Where(p => p.Condition == idDonorFile.Condition && p.BiologicalReplicate == idDonorFile.BiologicalReplicate)
+ .Where(p => p.Condition == donorFilePeakListKvp.Key.Condition && p.BiologicalReplicate == donorFilePeakListKvp.Key.BiologicalReplicate)
.Select(p => p.Fraction)
.Distinct()
.Count() > 1;
@@ -781,21 +970,22 @@ private void QuantifyMatchBetweenRunsPeaks(SpectraFileInfo idAcceptorFile)
// We're only interested in the fold change if the conditions are different. Otherwise, we score based off of the intensities
// of the acceptor file
if (_spectraFileInfo.Select(p => p.Condition).Distinct().Count() > 1
- && idDonorFile.Condition != idAcceptorFile.Condition)
+ && donorFilePeakListKvp.Key.Condition != idAcceptorFile.Condition)
{
scorer.CalculateFoldChangeBetweenFiles(idDonorPeaks);
}
// generate RT calibration curve
- RetentionTimeCalibDataPoint[] rtCalibrationCurve = GetRtCalSpline(idDonorFile, idAcceptorFile, scorer);
+ RetentionTimeCalibDataPoint[] rtCalibrationCurve = GetRtCalSpline(donorFilePeakListKvp.Key, idAcceptorFile, scorer, out var donorPeaksMassOrdered);
+
+ // break if MBR transfers can't be scored
+ if (!scorer.IsValid(donorFilePeakListKvp.Key)) continue;
// Loop through every MSMS id in the donor file
Parallel.ForEach(Partitioner.Create(0, idDonorPeaks.Count),
new ParallelOptions { MaxDegreeOfParallelism = MaxThreads },
(range, loopState) =>
{
- var matchBetweenRunsIdentifiedPeaksThreadSpecific = new Dictionary>>();
-
for (int i = range.Item1; i < range.Item2; i++)
{
ChromatographicPeak donorPeak = idDonorPeaks[i];
@@ -803,49 +993,91 @@ private void QuantifyMatchBetweenRunsPeaks(SpectraFileInfo idAcceptorFile)
RtInfo rtInfo = PredictRetentionTime(rtCalibrationCurve, donorPeak, idAcceptorFile, acceptorSampleIsFractionated, donorSampleIsFractionated);
if (rtInfo == null) continue;
- FindAllAcceptorPeaks(idAcceptorFile, scorer, rtInfo, mbrTol, donorPeak, matchBetweenRunsIdentifiedPeaksThreadSpecific);
- }
+ // Look for MBR target (predicted-RT peak)
+ FindAllAcceptorPeaks(idAcceptorFile, scorer, rtInfo, mbrTol, donorPeak, out var bestAcceptor);
+ AddPeakToConcurrentDict(matchBetweenRunsIdentifiedPeaks, bestAcceptor, donorPeak.Identifications.First());
+
+ //Draw a random donor that has an rt sufficiently far enough away
+ double minimumRtDifference = rtInfo.Width*2;
+ ChromatographicPeak randomDonor = GetRandomPeak(donorPeaksMassOrdered,
+ donorPeak.ApexRetentionTime,
+ minimumRtDifference,
+ donorPeak.Identifications.First());
+
+ // Look for MBR decoy (random-RT peak)
+ ChromatographicPeak bestDecoy = null;
+ RtInfo decoyRtInfo = null;
+ if (randomDonor != null)
+ {
+ decoyRtInfo = PredictRetentionTime(rtCalibrationCurve, randomDonor, idAcceptorFile, acceptorSampleIsFractionated, donorSampleIsFractionated);
+ if (decoyRtInfo != null)
+ {
+ //Find a decoy peak using the randomly drawn retention time
+ FindAllAcceptorPeaks(idAcceptorFile, scorer, rtInfo, mbrTol, donorPeak, out bestDecoy,
+ randomRt: decoyRtInfo.PredictedRt);
+ AddPeakToConcurrentDict(matchBetweenRunsIdentifiedPeaks, bestDecoy, donorPeak.Identifications.First());
+ }
+ }
- lock (matchBetweenRunsIdentifiedPeaks)
- {
- foreach (var kvp in matchBetweenRunsIdentifiedPeaksThreadSpecific)
+ double windowWidth = Math.Max(0.5, rtInfo.Width);
+ // If the search turned up empty, try again with a wider search window
+ while (bestAcceptor == null && bestDecoy == null)
{
- if (matchBetweenRunsIdentifiedPeaks.TryGetValue(kvp.Key, out var list))
+ windowWidth = Math.Min(windowWidth, MbrRtWindow);
+ rtInfo.Width = windowWidth;
+ FindAllAcceptorPeaks(idAcceptorFile, scorer, rtInfo, mbrTol, donorPeak, out bestAcceptor);
+ AddPeakToConcurrentDict(matchBetweenRunsIdentifiedPeaks, bestAcceptor, donorPeak.Identifications.First());
+
+ if(decoyRtInfo != null)
+ {
+ decoyRtInfo.Width = windowWidth;
+ FindAllAcceptorPeaks(idAcceptorFile, scorer, rtInfo, mbrTol, donorPeak, out bestDecoy,
+ randomRt: decoyRtInfo.PredictedRt);
+ AddPeakToConcurrentDict(matchBetweenRunsIdentifiedPeaks, bestDecoy, donorPeak.Identifications.First());
+ }
+ if (windowWidth >= MbrRtWindow)
{
- foreach (var peak in kvp.Value)
- {
- if (list.TryGetValue(peak.Key, out List existing))
- {
- foreach (var acceptorPeak in peak.Value)
- {
- var samePeakSameSequence = existing
- .FirstOrDefault(p => p.Identifications.First().ModifiedSequence == acceptorPeak.Identifications.First().ModifiedSequence);
-
- if (samePeakSameSequence != null)
- {
- samePeakSameSequence.Identifications.Add(acceptorPeak.Identifications.First());
- }
- else
- {
- existing.Add(acceptorPeak);
- }
- }
- }
- else
- {
- list.Add(peak.Key, peak.Value);
- }
- }
+ break;
}
else
{
- matchBetweenRunsIdentifiedPeaks.Add(kvp.Key, kvp.Value);
+ windowWidth += 0.5;
}
}
+
}
});
}
+ // Eliminate duplicate peaks (not sure where they come from)
+ foreach (var seqDictionaryKvp in matchBetweenRunsIdentifiedPeaks)
+ {
+ // Each isotopic envelope is linked to a list of ChromatographicPeaks
+ // Here, we remove instances where the same envelope is associated with multiple chromatographic peaks but the peaks correspond to the same donor peptide
+ // I don't know why this happens lol
+ // If multiple peaks are associated with the same envelope, and they have different associated peptide identifications, then they're kept separate.
+ foreach (var envelopePeakListKvp in seqDictionaryKvp.Value)
+ {
+ List bestPeaks = new();
+ foreach (var peakGroup in envelopePeakListKvp.Value.GroupBy(peak => peak.Identifications.First().ModifiedSequence))
+ {
+ bestPeaks.Add(peakGroup.MaxBy(peak => peak.MbrScore));
+ }
+ envelopePeakListKvp.Value.Clear();
+ envelopePeakListKvp.Value.AddRange(bestPeaks);
+ }
+ }
+
+ // Create a dictionary that stores imsPeak associated with an ms/ms identified peptide
+ Dictionary> msmsImsPeaks = _results.Peaks[idAcceptorFile]
+ .Where(peak =>
+ !peak.DecoyPeptide
+ && peak.Apex?.IndexedPeak != null
+ && PeptideModifiedSequencesToQuantify.Contains(peak.Identifications.First().ModifiedSequence))
+ .Select(peak => peak.Apex.IndexedPeak)
+ .GroupBy(imsPeak => imsPeak.ZeroBasedMs1ScanIndex)
+ .ToDictionary(g => g.Key, g => g.ToList());
+
// take the best result (highest scoring) for each peptide after we've matched from all the donor files
foreach (var mbrIdentifiedPeptide in matchBetweenRunsIdentifiedPeaks.Where(p => !acceptorFileIdentifiedSequences.Contains(p.Key)))
{
@@ -855,36 +1087,101 @@ private void QuantifyMatchBetweenRunsPeaks(SpectraFileInfo idAcceptorFile)
continue;
}
- List peakHypotheses = mbrIdentifiedPeptide.Value.SelectMany(p => p.Value).OrderByDescending(p => p.MbrScore).ToList();
-
- ChromatographicPeak best = peakHypotheses.First();
-
- peakHypotheses.Remove(best);
-
- if (peakHypotheses.Count > 0)
+ foreach (var peakHypothesisGroup in mbrIdentifiedPeptide.Value.SelectMany(kvp => kvp.Value).OrderByDescending(p => p.MbrScore).GroupBy(p => p.RandomRt))
{
- double start = best.IsotopicEnvelopes.Min(p => p.IndexedPeak.RetentionTime);
- double end = best.IsotopicEnvelopes.Max(p => p.IndexedPeak.RetentionTime);
+ var peakHypotheses = peakHypothesisGroup.ToList();
+ ChromatographicPeak best = peakHypotheses.First();
+ peakHypotheses.Remove(best);
- List peaksToRemoveFromHypotheses = new List();
- foreach (ChromatographicPeak peak in peakHypotheses.Where(p => p.Apex.ChargeState != best.Apex.ChargeState))
+ // Discard any peaks that are already associated with an ms/ms identified peptide
+ while (best?.Apex?.IndexedPeak != null && msmsImsPeaks.TryGetValue(best.Apex.IndexedPeak.ZeroBasedMs1ScanIndex, out var peakList))
{
- if (peak.Apex.IndexedPeak.RetentionTime > start && peak.Apex.IndexedPeak.RetentionTime < end)
+ if (peakList.Contains(best.Apex.IndexedPeak))
+ {
+ if (!peakHypotheses.Any())
+ {
+ best = null;
+ break;
+ }
+ best = peakHypotheses.First();
+ peakHypotheses.Remove(best);
+ }
+ else
{
- best.MergeFeatureWith(peak, Integrate);
+ break;
+ }
+ }
+ if (best == null) continue;
- peaksToRemoveFromHypotheses.Add(peak);
+ // merge peaks with different charge states
+ if (peakHypotheses.Count > 0)
+ {
+ double start = best.IsotopicEnvelopes.Min(p => p.IndexedPeak.RetentionTime);
+ double end = best.IsotopicEnvelopes.Max(p => p.IndexedPeak.RetentionTime);
+
+ _results.Peaks[idAcceptorFile].Add(best);
+ foreach (ChromatographicPeak peak in peakHypotheses.Where(p => p.Apex.ChargeState != best.Apex.ChargeState))
+ {
+ if (peak.Apex.IndexedPeak.RetentionTime >= start
+ && peak.Apex.IndexedPeak.RetentionTime <= end)
+ //&& Math.Abs(peak.MbrScore - best.MbrScore) / best.MbrScore < 0.25)// 25% difference is a rough heuristic, but I don't want super shitty peaks being able to supercede the intensity of a good peak!
+ {
+ if (msmsImsPeaks.TryGetValue(peak.Apex.IndexedPeak.ZeroBasedMs1ScanIndex, out var peakList) && peakList.Contains(peak.Apex.IndexedPeak))
+ {
+ continue; // If the peak is already accounted for, skip it.
+ }
+ else
+ {
+ best.MergeFeatureWith(peak, Integrate);
+ }
+ }
}
}
+ _results.Peaks[idAcceptorFile].Add(best);
}
-
- _results.Peaks[idAcceptorFile].Add(best);
}
-
RunErrorChecking(idAcceptorFile);
}
+ ///
+ /// A concurrent dictionary is used to keep track of MBR peaks that have been identified in the acceptor file. This function updates that dictionary
+ ///
+ /// concurrent dictionary. Key = Peptide sequence. Value = ConcurrentDictionary mapping where keys are isotopic envelopes and values are list of associated peaks
+ /// Peak to add to the dictionary
+ /// The donor ID associated with the MBR peaks
+ private void AddPeakToConcurrentDict(ConcurrentDictionary>> matchBetweenRunsIdentifiedPeaks,
+ ChromatographicPeak peakToSave,
+ Identification donorIdentification)
+ {
+ if(peakToSave == null)
+ {
+ return;
+ }
+ // save the peak hypothesis
+ matchBetweenRunsIdentifiedPeaks.AddOrUpdate
+ (
+ // new key
+ key: donorIdentification.ModifiedSequence,
+ // if we are adding a value for the first time, we simply create a new dictionatry with one entry
+ addValueFactory: (sequenceKey) =>
+ new ConcurrentDictionary>(
+ new Dictionary>
+ {
+ { peakToSave.Apex, new List { peakToSave } }
+ }),
+ // if the key (sequence) already exists, we have to add the new peak to the existing dictionary
+ updateValueFactory: (sequenceKey, envelopePeakListDict) =>
+ {
+ envelopePeakListDict.AddOrUpdate(
+ key: peakToSave.Apex,
+ addValueFactory: (envelopeKey) => new List { peakToSave }, // if the key (envelope) doesnt exist, just create a new list
+ updateValueFactory: (envelopeKey, peakList) => { peakList.Add(peakToSave); return peakList; }); // if the key (envelope) already exists, add the peak to the associated list
+ return envelopePeakListDict;
+ }
+ );
+ }
+
///
/// Finds MBR acceptor peaks by looping through every possible peak for every possible charge state
/// in a given retention time range. Identified peaks are added to the matchBetweenRunsIdentifiedPeaks dictionary.
@@ -900,21 +1197,24 @@ internal void FindAllAcceptorPeaks(
RtInfo rtInfo,
Tolerance fileSpecificTol,
ChromatographicPeak donorPeak,
- Dictionary>> matchBetweenRunsIdentifiedPeaksThreadSpecific)
+ out ChromatographicPeak bestAcceptor,
+ double? randomRt = null)
{
// get the MS1 scan info for this region so we can look up indexed peaks
Ms1ScanInfo[] ms1ScanInfos = _ms1Scans[idAcceptorFile];
Ms1ScanInfo start = ms1ScanInfos[0];
Ms1ScanInfo end = ms1ScanInfos[ms1ScanInfos.Length - 1];
+ double rtStartHypothesis = randomRt == null ? rtInfo.RtStartHypothesis : (double)randomRt - (rtInfo.Width / 2.0);
+ double rtEndHypothesis = randomRt == null ? rtInfo.RtEndHypothesis : (double)randomRt + (rtInfo.Width / 2.0);
for (int j = 0; j < ms1ScanInfos.Length; j++)
{
Ms1ScanInfo scan = ms1ScanInfos[j];
- if (scan.RetentionTime <= rtInfo.RtStartHypothesis)
+ if (scan.RetentionTime <= rtStartHypothesis)
{
start = scan;
}
- if (scan.RetentionTime >= rtInfo.RtEndHypothesis)
+ if (scan.RetentionTime >= rtEndHypothesis)
{
end = scan;
break;
@@ -930,6 +1230,7 @@ internal void FindAllAcceptorPeaks(
}
Identification donorIdentification = donorPeak.Identifications.First();
+ bestAcceptor = null;
foreach (int z in chargesToMatch)
{
@@ -951,36 +1252,13 @@ internal void FindAllAcceptorPeaks(
while (chargeEnvelopes.Any())
{
ChromatographicPeak acceptorPeak = FindIndividualAcceptorPeak(idAcceptorFile, scorer, donorPeak,
- fileSpecificTol, rtInfo, z, chargeEnvelopes);
+ fileSpecificTol, rtInfo, z, chargeEnvelopes, randomRt);
if (acceptorPeak == null)
- continue;
-
- // save the peak hypothesis
- if (matchBetweenRunsIdentifiedPeaksThreadSpecific.TryGetValue(donorIdentification.ModifiedSequence, out var mbrPeaks))
- {
- if (mbrPeaks.TryGetValue(acceptorPeak.Apex, out List existing))
- {
- var samePeakSameSequence = existing
- .FirstOrDefault(p => p.Identifications.First().ModifiedSequence == acceptorPeak.Identifications.First().ModifiedSequence);
-
- if (samePeakSameSequence != null)
- {
- samePeakSameSequence.Identifications.Add(donorIdentification);
- }
- else
- {
- existing.Add(acceptorPeak);
- }
- }
- else
- {
- mbrPeaks.Add(acceptorPeak.Apex, new List { acceptorPeak });
- }
- }
- else
+ continue;
+ if (bestAcceptor == null || bestAcceptor.MbrScore < acceptorPeak.MbrScore)
{
- matchBetweenRunsIdentifiedPeaksThreadSpecific.Add(donorIdentification.ModifiedSequence, new Dictionary>());
- matchBetweenRunsIdentifiedPeaksThreadSpecific[donorIdentification.ModifiedSequence].Add(acceptorPeak.Apex, new List { acceptorPeak });
+ acceptorPeak.ChargeList = chargesToMatch;
+ bestAcceptor = acceptorPeak;
}
}
}
@@ -1004,10 +1282,11 @@ internal ChromatographicPeak FindIndividualAcceptorPeak(
Tolerance mbrTol,
RtInfo rtInfo,
int z,
- List chargeEnvelopes)
+ List chargeEnvelopes,
+ double? randomRt = null)
{
- var donorId = donorPeak.Identifications.First();
- var acceptorPeak = new ChromatographicPeak(donorId, true, idAcceptorFile, predictedRetentionTime: rtInfo.PredictedRt);
+ var donorId = donorPeak.Identifications.OrderBy(p => p.QValue).First();
+ var acceptorPeak = new ChromatographicPeak(donorId, true, idAcceptorFile, randomRt != null);
// Grab the first scan/envelope from charge envelopes. This should be the most intense envelope in the list
IsotopicEnvelope seedEnv = chargeEnvelopes.First();
@@ -1030,7 +1309,7 @@ internal ChromatographicPeak FindIndividualAcceptorPeak(
return null;
}
- acceptorPeak.CalculateMbrScore(scorer, donorPeak);
+ acceptorPeak.MbrScore = scorer.ScoreMbr(acceptorPeak, donorPeak, randomRt ?? rtInfo.PredictedRt);
return acceptorPeak;
}
@@ -1076,9 +1355,9 @@ private void RunErrorChecking(SpectraFileInfo spectraFile)
{
if (!tryPeak.IsMbrPeak && !storedPeak.IsMbrPeak)
{
- if (PeptidesModifiedSequencesToQuantify.Contains(tryPeak.Identifications.First().ModifiedSequence))
+ if (PeptideModifiedSequencesToQuantify.Contains(tryPeak.Identifications.First().ModifiedSequence))
{
- if (PeptidesModifiedSequencesToQuantify.Contains(storedPeak.Identifications.First().ModifiedSequence))
+ if (PeptideModifiedSequencesToQuantify.Contains(storedPeak.Identifications.First().ModifiedSequence))
{
storedPeak.MergeFeatureWith(tryPeak, Integrate);
}
@@ -1096,14 +1375,20 @@ private void RunErrorChecking(SpectraFileInfo spectraFile)
}
else if (tryPeak.IsMbrPeak && !storedPeak.IsMbrPeak)
{
- if(PeptidesModifiedSequencesToQuantify.Contains(storedPeak.Identifications.First().ModifiedSequence))
+ // Default to MSMS peaks over MBR Peaks.
+ // Most of these have already been eliminated
+ // However, sometimes merging MBR peaks with different charge states reveals that
+ // The MBR peak conflicts with an MSMS peak
+ // Removing the peak when this happens is a conservative step.
+ // Sometimes the MSMS peak is a decoy, or has a peptides level Q-value < 0.01 (i.e., the modified sequence isn't in PeptideModifiedSequencesToQuantify).
+ // In this case, we keep the MBR peak.
+ if (storedPeak.DecoyPeptide || !PeptideModifiedSequencesToQuantify.Contains(storedPeak.Identifications.First().ModifiedSequence))
{
- continue;
+ errorCheckedPeaksGroupedByApex[tryPeak.Apex.IndexedPeak] = tryPeak;
}
else
{
- // If the stored peak id isn't in the list of peptides to quantify, overwrite it
- errorCheckedPeaksGroupedByApex[tryPeak.Apex.IndexedPeak] = tryPeak;
+ continue;
}
}
else if (tryPeak.IsMbrPeak && storedPeak.IsMbrPeak)
@@ -1129,6 +1414,140 @@ private void RunErrorChecking(SpectraFileInfo spectraFile)
_results.Peaks[spectraFile] = errorCheckedPeaks;
}
+ private bool RunPEPAnalysis()
+ {
+ List mbrPeaks = _results.Peaks.SelectMany(kvp => kvp.Value)
+ .Where(peak => peak.IsMbrPeak)
+ .OrderByDescending(peak => peak.MbrScore)
+ .ToList();
+
+ if (!mbrPeaks.IsNotNullOrEmpty()) return false;
+ int decoyPeakTotal = mbrPeaks.Count(peak => peak.RandomRt);
+
+ List tempPepQs = new();
+ List tempQs = new();
+ if (mbrPeaks.Count > 100 && decoyPeakTotal > 20)
+ {
+ PepAnalysisEngine pepAnalysisEngine = new PepAnalysisEngine(mbrPeaks,
+ outputFolder: Path.GetDirectoryName(_spectraFileInfo.First().FullFilePathWithExtension),
+ maxThreads: MaxThreads,
+ pepTrainingFraction: PepTrainingFraction);
+ var pepOutput = pepAnalysisEngine.ComputePEPValuesForAllPeaks();
+
+ _results.PepResultString = pepOutput;
+
+ return true;
+ }
+ return false;
+ }
+
+ ///
+ /// Calculates the FDR for each MBR-detected peak using decoy peaks and decoy peptides,
+ /// Then filters out all peaks below a given FDR threshold
+ ///
+ private void CalculateFdrForMbrPeaks(SpectraFileInfo acceptorFile, bool usePep)
+ {
+ List mbrPeaks;
+ if (usePep)
+ {
+ // Take only the top scoring acceptor for each donor (acceptor can be target or decoy!)
+ // Maybe we're sorting twice when we don't have to but idk if order is preserved using group by
+ mbrPeaks = _results.Peaks[acceptorFile]
+ .Where(peak => peak.IsMbrPeak)
+ .GroupBy(peak => peak.Identifications.First())
+ .Select(group => group.OrderBy(peak => peak.MbrPep).ThenByDescending(peak => peak.MbrScore).First())
+ .OrderBy(peak => peak.MbrPep)
+ .ThenByDescending(peak => peak.MbrScore)
+ .ToList();
+
+ _results.Peaks[acceptorFile] = mbrPeaks.Concat(_results.Peaks[acceptorFile].Where(peak => !peak.IsMbrPeak)).ToList();
+ }
+ else
+ {
+ // If PEP wasn't performed, things probably aren't calibrated very well, and so it's better
+ // To err on the safe side and not remove the decoys
+ mbrPeaks = _results.Peaks[acceptorFile]
+ .Where(peak => peak.IsMbrPeak)
+ .OrderByDescending(peak => peak.MbrScore)
+ .ToList();
+ }
+
+ if (!mbrPeaks.IsNotNullOrEmpty()) return;
+
+ List tempQs = new();
+ int totalPeaks = 0;
+ int decoyPeptides = 0;
+ int decoyPeaks = 0;
+ int doubleDecoys = 0;
+ for (int i = 0; i < mbrPeaks.Count; i++)
+ {
+ totalPeaks++;
+ switch (mbrPeaks[i])
+ {
+ case ChromatographicPeak p when (!p.DecoyPeptide && !p.RandomRt):
+ break;
+ case ChromatographicPeak p when (p.DecoyPeptide && !p.RandomRt):
+ decoyPeptides++;
+ break;
+ case ChromatographicPeak p when (!p.DecoyPeptide && p.RandomRt):
+ decoyPeaks++;
+ break;
+ case ChromatographicPeak p when (p.DecoyPeptide && p.RandomRt):
+ doubleDecoys++;
+ break;
+ }
+
+ // There are two parts to this score. We're summing the PEPs of peaks derived from target peptides. For peaks derived from decoy peptides,
+ // We do the double decoy things where we count decoyPeptidePeaks - doubleDecoypeaks
+ tempQs.Add(Math.Round(EstimateFdr(doubleDecoys, decoyPeptides, decoyPeaks, totalPeaks), 6));
+ }
+
+ // Set the q-value for each peak
+ double[] correctedQs = CorrectQValues(tempQs);
+ for (int i = 0; i < correctedQs.Length; i++)
+ {
+ mbrPeaks[i].MbrQValue = correctedQs[i];
+ }
+ }
+
+ private int EstimateDecoyPeptideErrors(int decoyPeptideCount, int doubleDecoyCount)
+ {
+ return Math.Max(0, decoyPeptideCount - doubleDecoyCount);
+ }
+
+ private double EstimateFdr(int doubleDecoyCount, int decoyPeptideCount, int decoyPeakCount, int totalPeakCount)
+ {
+ return (double)(1 + decoyPeakCount + EstimateDecoyPeptideErrors(decoyPeptideCount, doubleDecoyCount)) / totalPeakCount;
+ }
+
+ ///
+ /// Standard q-value correction, ensures that in a list of temporary q-values, a q-value is equal to
+ /// Min(q-values, every q-value below in the list). As you work your way down a list of q-values, the value should only increase or stay the same.
+ ///
+ ///
+ ///
+ private double[] CorrectQValues(List tempQs)
+ {
+ if (!tempQs.IsNotNullOrEmpty()) return null;
+ double[] correctedQValues = new double[tempQs.Count];
+ correctedQValues[tempQs.Count - 1] = tempQs.Last();
+ for(int i = tempQs.Count-2; i >=0; i--)
+ {
+ if (tempQs[i] > correctedQValues[i+1])
+ {
+ correctedQValues[i] = correctedQValues[i + 1];
+ }
+ else
+ {
+ correctedQValues[i] = tempQs[i];
+ }
+ }
+
+ return correctedQValues;
+ }
+
+ #endregion
+
///
/// Takes in a list of imsPeaks and finds all the isotopic peaks in each scan. If the experimental isotopic distribution
/// matches the theoretical distribution, an IsotopicEnvelope object is created from the summed intensities of each isotopic peak.
@@ -1228,7 +1647,7 @@ public List GetIsotopicEnvelopes(
}
// Check that the experimental envelope matches the theoretical
- if (CheckIsotopicEnvelopeCorrelation(massShiftToIsotopePeaks, peak, chargeState, isotopeTolerance))
+ if (CheckIsotopicEnvelopeCorrelation(massShiftToIsotopePeaks, peak, chargeState, isotopeTolerance, out var pearsonCorr))
{
// impute unobserved isotope peak intensities
// TODO: Figure out why value imputation is performed. Build a toggle?
@@ -1240,7 +1659,7 @@ public List GetIsotopicEnvelopes(
}
}
- isotopicEnvelopes.Add(new IsotopicEnvelope(peak, chargeState, experimentalIsotopeIntensities.Sum()));
+ isotopicEnvelopes.Add(new IsotopicEnvelope(peak, chargeState, experimentalIsotopeIntensities.Sum(), pearsonCorr));
}
}
@@ -1261,9 +1680,10 @@ public bool CheckIsotopicEnvelopeCorrelation(
Dictionary> massShiftToIsotopePeaks,
IndexedMassSpectralPeak peak,
int chargeState,
- Tolerance isotopeTolerance)
+ Tolerance isotopeTolerance,
+ out double pearsonCorrelation)
{
- double pearsonCorrelation = Correlation.Pearson(
+ pearsonCorrelation = Correlation.Pearson(
massShiftToIsotopePeaks[0].Select(p => p.expIntensity),
massShiftToIsotopePeaks[0].Select(p => p.theorIntensity));
diff --git a/mzLib/FlashLFQ/Identification.cs b/mzLib/FlashLFQ/Identification.cs
index 85c557c3e..2ee9bd1b4 100644
--- a/mzLib/FlashLFQ/Identification.cs
+++ b/mzLib/FlashLFQ/Identification.cs
@@ -15,11 +15,15 @@ public class Identification
public readonly ChemicalFormula OptionalChemicalFormula;
public readonly bool UseForProteinQuant;
public double PeakfindingMass;
+ public double PsmScore { get; init; }
+ public double QValue { get; init; }
+ public bool IsDecoy { get; }
public Identification(SpectraFileInfo fileInfo, string BaseSequence, string ModifiedSequence,
double monoisotopicMass,
double ms2RetentionTimeInMinutes, int chargeState, List proteinGroups,
- ChemicalFormula optionalChemicalFormula = null, bool useForProteinQuant = true)
+ ChemicalFormula optionalChemicalFormula = null, bool useForProteinQuant = true,
+ double psmScore = 0, double qValue = 0, bool decoy = false)
{
this.FileInfo = fileInfo;
this.BaseSequence = BaseSequence;
@@ -29,7 +33,10 @@ public Identification(SpectraFileInfo fileInfo, string BaseSequence, string Modi
this.PrecursorChargeState = chargeState;
this.ProteinGroups = new HashSet(proteinGroups);
this.OptionalChemicalFormula = optionalChemicalFormula;
- UseForProteinQuant = useForProteinQuant;
+ UseForProteinQuant = !decoy && useForProteinQuant; // ensure that decoy peptides aren't used for protein quant
+ QValue = qValue;
+ PsmScore = psmScore;
+ IsDecoy = decoy;
}
public override string ToString()
diff --git a/mzLib/FlashLFQ/IndexedMassSpectralPeak.cs b/mzLib/FlashLFQ/IndexedMassSpectralPeak.cs
index c9aa89042..cdadc56eb 100644
--- a/mzLib/FlashLFQ/IndexedMassSpectralPeak.cs
+++ b/mzLib/FlashLFQ/IndexedMassSpectralPeak.cs
@@ -29,7 +29,7 @@ public override bool Equals(object obj)
public override int GetHashCode()
{
- return Mz.GetHashCode();
+ return HashCode.Combine(Mz, ZeroBasedMs1ScanIndex);
}
public override string ToString()
diff --git a/mzLib/FlashLFQ/IsotopicEnvelope.cs b/mzLib/FlashLFQ/IsotopicEnvelope.cs
index 09d7207d7..938ac7850 100644
--- a/mzLib/FlashLFQ/IsotopicEnvelope.cs
+++ b/mzLib/FlashLFQ/IsotopicEnvelope.cs
@@ -11,11 +11,12 @@ public class IsotopicEnvelope
public readonly IndexedMassSpectralPeak IndexedPeak;
public readonly int ChargeState;
- public IsotopicEnvelope(IndexedMassSpectralPeak monoisotopicPeak, int chargeState, double intensity)
+ public IsotopicEnvelope(IndexedMassSpectralPeak monoisotopicPeak, int chargeState, double intensity, double pearsonCorrelation)
{
IndexedPeak = monoisotopicPeak;
ChargeState = chargeState;
Intensity = intensity / chargeState;
+ PearsonCorrelation = pearsonCorrelation;
}
///
@@ -25,6 +26,9 @@ public IsotopicEnvelope(IndexedMassSpectralPeak monoisotopicPeak, int chargeStat
///
public double Intensity { get; private set; }
+
+ public double PearsonCorrelation { get; init; }
+
public void Normalize(double normalizationFactor)
{
Intensity *= normalizationFactor;
diff --git a/mzLib/FlashLFQ/MbrScorer.cs b/mzLib/FlashLFQ/MbrScorer.cs
index a703cf3b7..72c2ee72d 100644
--- a/mzLib/FlashLFQ/MbrScorer.cs
+++ b/mzLib/FlashLFQ/MbrScorer.cs
@@ -1,8 +1,10 @@
-using MathNet.Numerics.Distributions;
+using Easy.Common.EasyComparer;
+using MathNet.Numerics.Distributions;
using MathNet.Numerics.Statistics;
using System;
using System.Collections.Generic;
using System.Data;
+using System.Data.Entity.ModelConfiguration.Conventions;
using System.Linq;
namespace FlashLFQ
@@ -13,16 +15,18 @@ namespace FlashLFQ
///
internal class MbrScorer
{
- internal SpectraFileInfo AcceptorFile { get; init; }
// Intensity and ppm distributions are specific to each acceptor file
private readonly Normal _logIntensityDistribution;
private readonly Normal _ppmDistribution;
private readonly Normal _scanCountDistribution;
+ private readonly Gamma _isotopicCorrelationDistribution;
// The logFcDistributions and rtDifference distributions are unique to each donor file - acceptor file pair
private Dictionary _logFcDistributionDictionary;
private Dictionary _rtPredictionErrorDistributionDictionary;
+
internal Dictionary ApexToAcceptorFilePeakDict { get; }
- internal List UnambiguousMsMsPeaks { get; }
+ internal List UnambiguousMsMsAcceptorPeaks { get; }
+ internal double MaxNumberOfScansObserved { get; }
///
/// Takes in an intensity distribution, a log foldchange distribution, and a ppm distribution
@@ -30,32 +34,47 @@ internal class MbrScorer
///
internal MbrScorer(
Dictionary apexToAcceptorFilePeakDict,
- List acceptorPeaks,
+ List acceptorFileMsmsPeaks,
Normal ppmDistribution,
Normal logIntensityDistribution)
{
- AcceptorFile = acceptorPeaks.First().SpectraFileInfo;
ApexToAcceptorFilePeakDict = apexToAcceptorFilePeakDict;
- UnambiguousMsMsPeaks = acceptorPeaks.Where(p => p.Apex != null && !p.IsMbrPeak && p.NumIdentificationsByFullSeq == 1).ToList();
+ UnambiguousMsMsAcceptorPeaks = acceptorFileMsmsPeaks.Where(p => p.Apex != null && !p.IsMbrPeak && p.NumIdentificationsByFullSeq == 1).ToList();
+ MaxNumberOfScansObserved = acceptorFileMsmsPeaks.Max(peak => peak.ScanCount);
_logIntensityDistribution = logIntensityDistribution;
_ppmDistribution = ppmDistribution;
+ _isotopicCorrelationDistribution = GetIsotopicEnvelopeCorrDistribution();
_logFcDistributionDictionary = new();
_rtPredictionErrorDistributionDictionary = new();
// This is kludgey, because scan counts are discrete
- List scanList = acceptorPeaks.Select(peak => (double)peak.ScanCount).ToList();
+ List scanList = UnambiguousMsMsAcceptorPeaks.Select(peak => (double)peak.ScanCount).ToList();
// build a normal distribution for the scan list of the acceptor peaks
- // InterQuartileRange / 1.35 = StandardDeviation for a normal distribution
_scanCountDistribution = new Normal(scanList.Average(), scanList.Count > 30 ? scanList.StandardDeviation() : scanList.InterquartileRange() / 1.36);
}
+ ///
+ /// This distribution represents (1 - Pearson Correlation) for isotopic envelopes of MS/MS acceptor peaks
+ ///
+ ///
+ private Gamma GetIsotopicEnvelopeCorrDistribution()
+ {
+ var pearsonCorrs = UnambiguousMsMsAcceptorPeaks.Select(p => 1 - p.IsotopicPearsonCorrelation).Where(p => p > 0).ToList();
+ if (pearsonCorrs.Count <= 1) return null;
+ double mean = pearsonCorrs.Mean();
+ double variance = pearsonCorrs.Variance();
+ var alpha = Math.Pow(mean, 2) / variance;
+ var beta = mean / variance;
+ return new Gamma(alpha, beta);
+ }
+
///
/// Takes in a list of retention time differences for anchor peptides (donor RT - acceptor RT) and uses
/// this list to calculate the distribution of prediction errors of the local RT alignment strategy employed by
/// match-between-runs for the specified donor file
///
/// List of retention time differences (doubles) calculated as donor file RT - acceptor file RT
- internal void AddRtPredErrorDistribution(SpectraFileInfo donorFile, List anchorPeptideRtDiffs, int numberOfAnchorPeptidesPerSide)
+ internal void AddRtPredErrorDistribution(SpectraFileInfo donorFile, List anchorPeptideRtDiffs, int numberOfAnchorPeptides)
{
// in MBR, we use anchor peptides on either side of the donor to predict the retention time
// here, we're going to repeat the same process, using neighboring anchor peptides to predicte the Rt shift for each
@@ -66,40 +85,88 @@ internal void AddRtPredErrorDistribution(SpectraFileInfo donorFile, List
double cumSumRtDiffs;
List rtPredictionErrors = new();
- for (int i = numberOfAnchorPeptidesPerSide; i < anchorPeptideRtDiffs.Count - (numberOfAnchorPeptidesPerSide) ; i++)
+ for (int i = numberOfAnchorPeptides; i < (anchorPeptideRtDiffs.Count - numberOfAnchorPeptides); i++)
{
cumSumRtDiffs = 0;
- for(int j = 1; j <= numberOfAnchorPeptidesPerSide; j++)
+ for(int j = 1; j <= numberOfAnchorPeptides; j++)
{
cumSumRtDiffs += anchorPeptideRtDiffs[i - j];
cumSumRtDiffs += anchorPeptideRtDiffs[i + j];
}
- double avgDiff = cumSumRtDiffs / (2 * numberOfAnchorPeptidesPerSide);
+ double avgDiff = cumSumRtDiffs / (2 * numberOfAnchorPeptides);
rtPredictionErrors.Add(avgDiff - anchorPeptideRtDiffs[i]);
}
- if(!rtPredictionErrors.Any())
+ Normal rtPredictionErrorDist = new Normal(0, 0);
+ // Default distribution. Effectively assigns a RT Score of zero if no alignment can be performed
+ // between the donor and acceptor based on shared MS/MS IDs
+
+ if(rtPredictionErrors.Any())
{
- _rtPredictionErrorDistributionDictionary.Add(donorFile, new Normal(0, 1));
- return;
+ double medianRtError = rtPredictionErrors.Median();
+ double stdDevRtError = rtPredictionErrors.StandardDeviation();
+ if(stdDevRtError >= 0.0 && !double.IsNaN(medianRtError))
+ {
+ rtPredictionErrorDist = new Normal(medianRtError, 1);
+ }
}
+
+ _rtPredictionErrorDistributionDictionary.Add(donorFile, rtPredictionErrorDist);
+ }
- double medianRtError = rtPredictionErrors.Median();
- double stdDevRtError = rtPredictionErrors.StandardDeviation();
-
- _rtPredictionErrorDistributionDictionary.Add(donorFile, new Normal(medianRtError, stdDevRtError));
+ ///
+ /// Takes in a list of retention time differences for anchor peptides (donor RT - acceptor RT) and uses
+ /// this list to calculate the distribution of prediction errors of the local RT alignment strategy employed by
+ /// match-between-runs for the specified donor file
+ ///
+ /// An MBR Score ranging between 0 and 100. Higher scores are better.
+ internal double ScoreMbr(ChromatographicPeak acceptorPeak, ChromatographicPeak donorPeak, double predictedRt)
+ {
+ acceptorPeak.IntensityScore = CalculateIntensityScore(acceptorPeak.Intensity, donorPeak);
+ acceptorPeak.RtPredictionError = predictedRt - acceptorPeak.ApexRetentionTime;
+ acceptorPeak.RtScore = CalculateScore(_rtPredictionErrorDistributionDictionary[donorPeak.SpectraFileInfo],
+ acceptorPeak.RtPredictionError);
+ acceptorPeak.PpmScore = CalculateScore(_ppmDistribution, acceptorPeak.MassError);
+ acceptorPeak.ScanCountScore = CalculateScore(_scanCountDistribution, acceptorPeak.ScanCount);
+ acceptorPeak.IsotopicDistributionScore = CalculateScore(_isotopicCorrelationDistribution, 1 - acceptorPeak.IsotopicPearsonCorrelation);
+
+ // Returns 100 times the geometric mean of the four scores (scan count, intensity score, rt score, ppm score)
+ return 100 * Math.Pow(acceptorPeak.IntensityScore
+ * acceptorPeak.RtScore
+ * acceptorPeak.PpmScore
+ * acceptorPeak.ScanCountScore
+ * acceptorPeak.IsotopicDistributionScore, 0.20);
}
- private double CalculateScore(Normal distribution, double value)
+ // Setting a minimum score prevents the MBR score from going to zero if one component of that score is 0
+ // 3e-7 is the fraction of a normal distribution that lies at least 5 stdDev away from the mean
+ private double _minScore = 3e-7;
+
+ internal double CalculateScore(Normal distribution, double value)
{
+ // new method
double absoluteDiffFromMean = Math.Abs(distribution.Mean - value);
// Returns a value between (0, 1] where 1 means the value was equal to the distribution mean
- return 2 * distribution.CumulativeDistribution(distribution.Mean - absoluteDiffFromMean);
+ // The score represents the fraction of the distribution that lies absoluteDiffFromMean away from the mean or further
+ // i.e., what fraction of the distribution is more extreme than value
+ double score = 2 * distribution.CumulativeDistribution(distribution.Mean - absoluteDiffFromMean);
+ return (double.IsNaN(score) || score == 0) ? _minScore : score;
}
- internal double CalculateIntensityScore(ChromatographicPeak acceptorPeak, ChromatographicPeak donorPeak)
+ internal double CalculateScore(Gamma distribution, double value)
+ {
+ if (value < 0 || distribution == null)
+ {
+ return _minScore;
+ }
+
+ // For the gamma distribtuion, the CDF is 0 when the pearson correlation is equal to 1 (value = 0)
+ // The CDF then rapidly rises, reaching ~1 at a value of 0.3 (corresponding to a pearson correlation of 0.7)
+ return 1 - distribution.CumulativeDistribution(value);
+ }
+
+ internal double CalculateIntensityScore(double acceptorIntensity, ChromatographicPeak donorPeak)
{
- double acceptorIntensity = acceptorPeak.Intensity;
if (donorPeak != null && acceptorIntensity != 0 && donorPeak.Intensity != 0 &&
_logFcDistributionDictionary.TryGetValue(donorPeak.SpectraFileInfo, out var logFcDistribution))
{
@@ -111,38 +178,7 @@ internal double CalculateIntensityScore(ChromatographicPeak acceptorPeak, Chroma
var logIntensity = Math.Log(acceptorIntensity, 2);
return CalculateScore(_logIntensityDistribution, logIntensity);
}
- }
- ///
- /// Calculates the retention time score for a given MbrAcceptor by comparing to the
- /// distribution of all retention time prediction errors for all anchor peptides shared between
- /// the donor and acceptor files
- ///
- /// Score bounded by 0 and 1, where higher scores are better
- internal double CalculateRetentionTimeScore(ChromatographicPeak acceptorPeak, ChromatographicPeak donorPeak)
- {
- double rtPredictionError = acceptorPeak.PredictedRetentionTime - acceptorPeak.ApexRetentionTime;
- return CalculateScore(_rtPredictionErrorDistributionDictionary[donorPeak.SpectraFileInfo], rtPredictionError);
- }
-
- ///
- /// Calculates the Ppm error score for a given acceptor by comparing the ppm error for the given peak
- /// to the ppm error of all non-MBR peaks in the acceptor file
- ///
- /// Score bounded by 0 and 1, where higher scores are better
- internal double CalculatePpmErrorScore(ChromatographicPeak acceptorPeak)
- {
- return CalculateScore(_ppmDistribution, acceptorPeak.MassError);
- }
-
- ///
- /// Calculates the scan count score for a given acceptor by comparing the number of scans observed for the given peak
- /// to the ppm error of all non-MBR peaks in the acceptor file
- ///
- /// Score bounded by 0 and 1, where higher scores are better
- internal double CalculateScanCountScore(ChromatographicPeak acceptorPeak)
- {
- return CalculateScore(_scanCountDistribution, acceptorPeak.ScanCount);
}
///
@@ -162,7 +198,7 @@ internal void CalculateFoldChangeBetweenFiles(List idDonorP
var acceptorFileBestMsmsPeaks = new Dictionary();
// get the best (most intense) peak for each peptide in the acceptor file
- foreach (ChromatographicPeak acceptorPeak in UnambiguousMsMsPeaks)
+ foreach (ChromatographicPeak acceptorPeak in UnambiguousMsMsAcceptorPeaks)
{
if (acceptorFileBestMsmsPeaks.TryGetValue(acceptorPeak.Identifications.First().ModifiedSequence, out ChromatographicPeak currentBestPeak))
{
@@ -199,5 +235,18 @@ internal void CalculateFoldChangeBetweenFiles(List idDonorP
}
}
+ ///
+ /// Determines whether or not the scorer is validly paramaterized and capable
+ /// of scoring MBR transfers originating from the given donorFile
+ ///
+ internal bool IsValid(SpectraFileInfo donorFile)
+ {
+ return _rtPredictionErrorDistributionDictionary.TryGetValue(donorFile, out var rtDist)
+ && rtDist != null
+ && _ppmDistribution != null
+ && _scanCountDistribution != null
+ && _logIntensityDistribution != null;
+ }
+
}
}
diff --git a/mzLib/FlashLFQ/PEP/ChromatographicPeakData.cs b/mzLib/FlashLFQ/PEP/ChromatographicPeakData.cs
new file mode 100644
index 000000000..fbb8f429d
--- /dev/null
+++ b/mzLib/FlashLFQ/PEP/ChromatographicPeakData.cs
@@ -0,0 +1,106 @@
+using Easy.Common.Extensions;
+using Microsoft.ML.Data;
+using System.Collections.Generic;
+using System.Collections.Immutable;
+using System.Text;
+
+namespace FlashLFQ.PEP
+{
+ public class ChromatographicPeakData
+ {
+ public static readonly IImmutableDictionary trainingInfos = new Dictionary
+ {
+ { "standard", new []
+ {
+ "PpmErrorScore",
+ "IntensityScore",
+ "RtScore",
+ "ScanCountScore",
+ "IsotopicDistributionScore",
+ "PpmErrorRaw",
+ "IntensityRaw",
+ "RtPredictionErrorRaw",
+ "ScanCountRaw",
+ "IsotopicPearsonCorrelation"
+ }
+ },
+ { "reduced", new []
+ {
+ "PpmErrorRaw",
+ "IntensityRaw",
+ "RtPredictionErrorRaw",
+ "ScanCountRaw",
+ "IsotopicPearsonCorrelation"
+ }
+ },
+ }.ToImmutableDictionary();
+
+ ///
+ /// These are used for percolator. Trainer must be told the assumed direction for each attribute as it relates to being a true positive
+ /// Here, a weight of 1 indicates that the probability of being true is for higher numbers in the set.
+ /// A weight of -1 indicates that the probability of being true is for the lower numbers in the set.
+ ///
+ public static readonly IImmutableDictionary assumedAttributeDirection = new Dictionary {
+ { "PpmErrorScore", 1 },
+ { "IntensityScore", 1 },
+ { "RtScore", 1 },
+ { "ScanCountScore", 1 },
+ { "IsotopicDistributionScore", 1 },
+ { "PpmErrorRaw", -1 },
+ { "IntensityRaw", 1 },
+ { "RtPredictionErrorRaw", -1 },
+ { "ScanCountRaw", -1 },
+ { "IsotopicPearsonCorrelation", 1 }
+ }.ToImmutableDictionary();
+
+ public string ToString(string searchType)
+ {
+ StringBuilder sb = new StringBuilder();
+ var variablesToOutput = ChromatographicPeakData.trainingInfos[searchType];
+
+ foreach (var variable in variablesToOutput)
+ {
+ var property = typeof(ChromatographicPeakData).GetProperty(variable).GetValue(this, null);
+ var floatValue = (float)property;
+ sb.Append("\t");
+ sb.Append(floatValue.ToString());
+ }
+
+ return sb.ToString();
+ }
+
+ [LoadColumn(0)]
+ public float PpmErrorScore { get; set; }
+
+ [LoadColumn(1)]
+ public float IntensityScore { get; set; }
+
+ [LoadColumn(2)]
+ public float RtScore { get; set; }
+
+ [LoadColumn(3)]
+ public float ScanCountScore { get; set; }
+
+ [LoadColumn(4)]
+ public float IsotopicDistributionScore { get; set; }
+
+ [LoadColumn(5)]
+ public float PpmErrorRaw { get; set; }
+
+ [LoadColumn(6)]
+ public float IntensityRaw { get; set; }
+
+ [LoadColumn(7)]
+ public float RtPredictionErrorRaw { get; set; }
+
+ [LoadColumn(8)]
+ public float ScanCountRaw { get; set; }
+
+ [LoadColumn(9)]
+ public float IsotopicPearsonCorrelation { get; set; }
+
+ [LoadColumn(10)]
+ public bool Label { get; set; }
+
+ }
+}
\ No newline at end of file
diff --git a/mzLib/FlashLFQ/PEP/DonorGroup.cs b/mzLib/FlashLFQ/PEP/DonorGroup.cs
new file mode 100644
index 000000000..351bdee90
--- /dev/null
+++ b/mzLib/FlashLFQ/PEP/DonorGroup.cs
@@ -0,0 +1,42 @@
+using System;
+using System.Collections;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+
+namespace FlashLFQ.PEP
+{
+ ///
+ /// This class represents a group of chromatographic peaks that are associated with a donor identification.
+ /// During MBR, one donor identification is associated with multiple acceptor identifications, with both
+ /// predicted retention times (good MBR transfers) and random retention times (decoy MBR transfers).
+ /// This class groups them together for the purpose of cross-validation/PEP scoring
+ ///
+ public class DonorGroup : IEnumerable
+ {
+ public Identification DonorId { get; }
+ public List TargetAcceptors { get; }
+ public List DecoyAcceptors { get; }
+
+ public DonorGroup(Identification donorId, List targetAcceptors, List decoyAcceptors)
+ {
+ DonorId = donorId;
+ TargetAcceptors = targetAcceptors;
+ DecoyAcceptors = decoyAcceptors;
+ }
+
+ public double BestTargetMbrScore => TargetAcceptors.Count == 0 ? 0 : TargetAcceptors.Max(acceptor => acceptor.MbrScore);
+
+ public IEnumerator GetEnumerator()
+ {
+ return TargetAcceptors.Concat(DecoyAcceptors).GetEnumerator();
+ }
+
+ IEnumerator IEnumerable.GetEnumerator()
+ {
+ return GetEnumerator();
+ }
+
+ }
+}
diff --git a/mzLib/FlashLFQ/PEP/PepAnalysisEngine.cs b/mzLib/FlashLFQ/PEP/PepAnalysisEngine.cs
new file mode 100644
index 000000000..eaddae3e8
--- /dev/null
+++ b/mzLib/FlashLFQ/PEP/PepAnalysisEngine.cs
@@ -0,0 +1,647 @@
+using Microsoft.ML;
+using Microsoft.ML.Data;
+using Microsoft.ML.Trainers.FastTree;
+using System;
+using System.Collections.Concurrent;
+using System.Collections.Generic;
+using System.IO;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+using Omics;
+using System.Collections;
+using System.Security.Policy;
+using System.Text.RegularExpressions;
+using System.Reflection;
+
+namespace FlashLFQ.PEP
+{
+ public class PepAnalysisEngine
+ {
+ public double PipScoreCutoff;
+
+ private static int _randomSeed = 42;
+
+ ///
+ /// This method contains the hyper-parameters that will be used when training the machine learning model
+ ///
+ /// Options object to be passed in to the FastTree constructor
+ public FastTreeBinaryTrainer.Options BGDTreeOptions =>
+ new FastTreeBinaryTrainer.Options
+ {
+ NumberOfThreads = 1,
+ NumberOfTrees = 100,
+ MinimumExampleCountPerLeaf = 10,
+ NumberOfLeaves = 20,
+ LearningRate = 0.2,
+ LabelColumnName = "Label",
+ FeatureColumnName = "Features",
+ Seed = _randomSeed,
+ FeatureSelectionSeed = _randomSeed,
+ RandomStart = false,
+ UnbalancedSets = true
+ };
+
+ public List Peaks { get; }
+ public string OutputFolder { get; set; }
+ public int MaxThreads { get; set; }
+ public double PepTrainingFraction { get; set; }
+
+ public PepAnalysisEngine(List peaks, string outputFolder, int maxThreads, double pepTrainingFraction = 0.25)
+ {
+ Peaks = peaks;
+ OutputFolder = outputFolder;
+ MaxThreads = maxThreads;
+ PepTrainingFraction = pepTrainingFraction;
+ }
+
+ public string ComputePEPValuesForAllPeaks()
+ {
+ string[] trainingVariables = ChromatographicPeakData.trainingInfos["standard"];
+
+ #region Construct Donor Groups
+ // this is target peak not target peptide
+ List donors= new();
+ foreach(var donorGroup in Peaks
+ .Where(peak => peak.IsMbrPeak)
+ .OrderByDescending(peak => peak.MbrScore)
+ .GroupBy(peak => peak.Identifications.First())) //Group by donor peptide
+ {
+ var donorId = donorGroup.Key;
+ var targetAcceptors = donorGroup.Where(peak => !peak.RandomRt).ToList();
+ var decoyAcceptors = donorGroup.Where(peak => peak.RandomRt).ToList();
+ donors.Add(new DonorGroup(donorId, targetAcceptors, decoyAcceptors));
+ }
+
+ // Fix the order
+ donors = OrderDonorGroups(donors);
+
+ var peakScores = donors.SelectMany(donor => donor.Select(p => p.MbrScore)).OrderByDescending(score => score).ToList();
+ PipScoreCutoff = peakScores[(int)Math.Floor(peakScores.Count * PepTrainingFraction)]; //Select the top N percent of all peaks, only use those as positive examples
+
+ MLContext mlContext = new MLContext(_randomSeed);
+ //the number of groups used for cross-validation is hard-coded at three. Do not change this number without changing other areas of effected code.
+ const int numGroups = 3;
+
+ List[] donorGroupIndices = GetDonorGroupIndices(donors, numGroups, PipScoreCutoff);
+
+ #endregion
+
+ #region Create Groups and Model
+ IEnumerable[] ChromatographicPeakDataGroups = new IEnumerable[numGroups];
+ for (int i = 0; i < numGroups; i++)
+ {
+ ChromatographicPeakDataGroups[i] = CreateChromatographicPeakData(donors, donorGroupIndices[i], MaxThreads);
+
+ if (!ChromatographicPeakDataGroups[i].Any(p => p.Label == true)
+ || !ChromatographicPeakDataGroups[i].Any(p => p.Label == false))
+ {
+ return "Posterior error probability analysis failed. This can occur for small data sets when some sample groups are missing positive or negative training examples.";
+ }
+ }
+
+ TransformerChain>>[] trainedModels
+ = new TransformerChain>>[numGroups];
+
+ var trainer = mlContext.BinaryClassification.Trainers.FastTree(BGDTreeOptions);
+ var pipeline = mlContext.Transforms.Concatenate("Features", trainingVariables)
+ .Append(trainer);
+
+ List allMetrics = new List();
+
+ #endregion
+
+ #region Training and Cross Validation First iteration
+
+ for (int groupIndexNumber = 0; groupIndexNumber < numGroups; groupIndexNumber++)
+ {
+
+ List allGroupIndexes = Enumerable.Range(0, numGroups).ToList();
+ allGroupIndexes.RemoveAt(groupIndexNumber);
+
+ //concat doesn't work in a loop, therefore I had to hard code the concat to group 3 out of 4 lists. if the const int numGroups value is changed, then the concat has to be changed accordingly.
+ IDataView dataView = mlContext.Data.LoadFromEnumerable(
+ ChromatographicPeakDataGroups[allGroupIndexes[0]]
+ .Concat(ChromatographicPeakDataGroups[allGroupIndexes[1]]));
+
+ trainedModels[groupIndexNumber] = pipeline.Fit(dataView);
+ var myPredictions = trainedModels[groupIndexNumber].Transform(mlContext.Data.LoadFromEnumerable(ChromatographicPeakDataGroups[groupIndexNumber]));
+ CalibratedBinaryClassificationMetrics metrics = mlContext.BinaryClassification.Evaluate(data: myPredictions, labelColumnName: "Label", scoreColumnName: "Score");
+
+ //Parallel operation of the following code requires the method to be stored and then read, once for each thread
+ //if not output directory is specified, the model cannot be stored, and we must force single-threaded operation
+ if (OutputFolder != null)
+ {
+ mlContext.Model.Save(trainedModels[groupIndexNumber], dataView.Schema, Path.Combine(OutputFolder, "model.zip"));
+ }
+
+ Compute_PEP_For_All_Peaks(donors, donorGroupIndices[groupIndexNumber], mlContext, trainedModels[groupIndexNumber], OutputFolder, MaxThreads);
+
+ allMetrics.Add(metrics);
+ }
+
+ #endregion
+ #region Iterative Training
+
+ for(int trainingIteration = 0; trainingIteration < 9; trainingIteration++)
+ {
+ ChromatographicPeakDataGroups = new IEnumerable[numGroups];
+ for (int i = 0; i < numGroups; i++)
+ {
+ ChromatographicPeakDataGroups[i] = CreateChromatographicPeakDataIteration(donors, donorGroupIndices[i], MaxThreads);
+
+ if (!ChromatographicPeakDataGroups[i].Any(p => p.Label == true)
+ || !ChromatographicPeakDataGroups[i].Any(p => p.Label == false))
+ {
+ return "Posterior error probability analysis failed. This can occur for small data sets when some sample groups are missing positive or negative training examples.";
+ }
+ }
+
+ for (int groupIndexNumber = 0; groupIndexNumber < numGroups; groupIndexNumber++)
+ {
+ List allGroupIndexes = Enumerable.Range(0, numGroups).ToList();
+ allGroupIndexes.RemoveAt(groupIndexNumber);
+
+ IDataView dataView = mlContext.Data.LoadFromEnumerable(
+ ChromatographicPeakDataGroups[allGroupIndexes[0]]
+ .Concat(ChromatographicPeakDataGroups[allGroupIndexes[1]]));
+
+ trainedModels[groupIndexNumber] = pipeline.Fit(dataView);
+ var myPredictions = trainedModels[groupIndexNumber].Transform(mlContext.Data.LoadFromEnumerable(ChromatographicPeakDataGroups[groupIndexNumber]));
+ CalibratedBinaryClassificationMetrics metrics = mlContext.BinaryClassification.Evaluate(data: myPredictions, labelColumnName: "Label", scoreColumnName: "Score");
+
+ //Parallel operation of the following code requires the method to be stored and then read, once for each thread
+ //if not output directory is specified, the model cannot be stored, and we must force single-threaded operation
+ if (OutputFolder != null)
+ {
+ mlContext.Model.Save(trainedModels[groupIndexNumber], dataView.Schema, Path.Combine(OutputFolder, "model.zip"));
+ }
+
+ Compute_PEP_For_All_Peaks(donors, donorGroupIndices[groupIndexNumber], mlContext, trainedModels[groupIndexNumber], OutputFolder, MaxThreads);
+
+ allMetrics.Add(metrics);
+ }
+ }
+ #endregion
+
+ return AggregateMetricsForOutput(allMetrics);
+ }
+
+ public static List OrderDonorGroups(List donors)
+ {
+ return donors.OrderByDescending(donor => donor.TargetAcceptors.Count)
+ .ThenByDescending(donor => donor.DecoyAcceptors.Count)
+ .ThenByDescending(donor => donor.BestTargetMbrScore)
+ .ToList();
+ }
+
+ //we add the indexes of the targets and decoys to the groups separately in the hope that we'll get at least one target and one decoy in each group.
+ //then training can possibly be more successful.
+ public static List[] GetDonorGroupIndices(List donors, int numGroups, double scoreCutoff)
+ {
+ List[] groupsOfIndices = new List[numGroups];
+ for (int i = 0; i < numGroups; i++)
+ {
+ groupsOfIndices[i] = new List();
+ }
+
+ int myIndex = 0;
+
+ while (myIndex < donors.Count)
+ {
+ int subIndex = 0;
+ while (subIndex < numGroups && myIndex < donors.Count)
+ {
+ groupsOfIndices[subIndex].Add(myIndex);
+
+ subIndex++;
+ myIndex++;
+ }
+ }
+
+ EqualizeDonorGroupIndices(donors, groupsOfIndices, scoreCutoff, numGroups);
+
+ return groupsOfIndices;
+ }
+
+ ///
+ /// Equalizes partitions used for cross-validation. The goal is to have the same number of targets and decoys in each partition
+ ///
+ /// List of all DonorGroups to be classified
+ /// An array of lists. Each list contains the indices of donor groups for a given partition
+ /// The MBR Score cutoff that determines which MBR target peaks will be used as positive training examples
+ /// /// Number of groups used for cross-validation, default = 3
+ public static void EqualizeDonorGroupIndices(List donors, List[] groupsOfIndices, double scoreCutoff, int numGroups = 3)
+ {
+ HashSet swappedDonors = new HashSet(); // Keep track of everything we've swapped so we don't swap it again
+ // Outer loop iterates over the groups of indices (partitions) three times
+ // after each inner loop iterations, the number of ttargtes and decoys in each adjacent group is equal, but commonly group 1 and 3 will have a different number
+ // of targets and decoys. Looping three times should resolve this
+ for (int i = 0; i < numGroups*3 - 1; i++)
+ {
+ int groupA = i % numGroups;
+ int groupB = (i + 1) % numGroups;
+ int targetsA = 0;
+ int targetsB = 0;
+ int decoysA = 0;
+ int decoysB = 0;
+ foreach (int index in groupsOfIndices[groupA])
+ {
+ targetsA += donors[index].TargetAcceptors.Count(peak => peak.MbrScore >= scoreCutoff);
+ decoysA += donors[index].DecoyAcceptors.Count;
+ }
+ foreach (int index in groupsOfIndices[groupB])
+ {
+ targetsB += donors[index].TargetAcceptors.Count(peak => peak.MbrScore >= scoreCutoff);
+ decoysB += donors[index].DecoyAcceptors.Count;
+ }
+
+ bool stuck = false;
+ int outerIterations = 0;
+ int minIndex = groupsOfIndices[groupA].Min();
+
+ // Calculate the difference in targets and decoys between the two groups
+ int targetSurplus = targetsA - targetsB;
+ int decoySurplus = decoysA - decoysB;
+
+ while ((Math.Abs(targetSurplus) > 1 | Math.Abs(decoySurplus) > 1) && !stuck && outerIterations < 3)
+ {
+ bool swapped = false;
+ outerIterations++;
+
+ int innerIterations = 0;
+ // start from the bottom of group 1, trying to swap peaks.
+ // If group 1 has more targets than group 2, we want to swap groups to equalize the number of targets in each group
+ while (Math.Abs(targetSurplus) > 1 & !stuck & innerIterations < 3)
+ {
+ innerIterations++;
+ swapped = false;
+ // Traverse the list of donor indices in descending order, looking for a good candidate to swap
+ foreach (int donorIndexA in groupsOfIndices[groupA].Where(idx => !swappedDonors.Contains(idx)).OrderByDescending(idx => idx))
+ {
+ int donorIndexATargetCount = donors[donorIndexA].TargetAcceptors.Count(peak => peak.MbrScore > scoreCutoff);
+ switch (targetSurplus > 0)
+ {
+ case true: // i.e., too many targets
+ if (donorIndexATargetCount < 1) continue; // No targets to swap
+ foreach (int donorIndexB in groupsOfIndices[groupB].Where(idx => !swappedDonors.Contains(idx)).OrderByDescending(idx => idx))
+ {
+ if (donors[donorIndexB].TargetAcceptors.Count(peak => peak.MbrScore > scoreCutoff) < donorIndexATargetCount)
+ {
+ GroupSwap(donors, groupsOfIndices, donorIndexA, donorIndexB, groupA, groupB,
+ scoreCutoff, swappedDonors, ref targetSurplus, ref decoySurplus);
+ swapped = true;
+ break;
+ }
+ }
+ break;
+ case false: // i.e., too few targets
+ foreach (int donorIndexB in groupsOfIndices[groupB].Where(idx => !swappedDonors.Contains(idx)).OrderByDescending(idx => idx))
+ {
+ if (donors[donorIndexB].TargetAcceptors.Count(peak => peak.MbrScore > scoreCutoff) > donorIndexATargetCount)
+ {
+ GroupSwap(donors, groupsOfIndices, donorIndexA, donorIndexB, groupA, groupB,
+ scoreCutoff, swappedDonors, ref targetSurplus, ref decoySurplus);
+ swapped = true;
+ break;
+ }
+ }
+ break;
+ }
+
+ // If we reach the index of the list of donorGroups, set stuck to true so that the outer loop will break
+ if (donorIndexA == minIndex)
+ {
+ stuck = true;
+ break;
+ }
+ if (swapped)
+ break;
+
+ }
+ }
+
+ innerIterations = 0;
+ // Now we'll do the decoys
+ while (Math.Abs(decoySurplus) > 1 & !stuck & innerIterations < 3)
+ {
+ innerIterations++;
+ swapped = false;
+ foreach (int donorIndexA in groupsOfIndices[groupA].Where(idx => !swappedDonors.Contains(idx)).OrderByDescending(idx => idx))
+ {
+ int donorIndexADecoyCount = donors[donorIndexA].DecoyAcceptors.Count();
+ switch (decoySurplus > 0)
+ {
+ case true: // i.e., too many decoys
+ if (donorIndexADecoyCount < 1) continue; // No decoys to swap
+ foreach (int donorIndexB in groupsOfIndices[groupB].Where(idx => !swappedDonors.Contains(idx)).OrderByDescending(idx => idx))
+ {
+ if (donors[donorIndexB].DecoyAcceptors.Count() < donorIndexADecoyCount)
+ {
+ GroupSwap(donors, groupsOfIndices, donorIndexA, donorIndexB, groupA, groupB,
+ scoreCutoff, swappedDonors, ref targetSurplus, ref decoySurplus);
+ swapped = true;
+ break;
+ }
+ }
+ break;
+ case false: // i.e., too few decoys
+ foreach (int donorIndexB in groupsOfIndices[groupB].Where(idx => !swappedDonors.Contains(idx)).OrderByDescending(idx => idx))
+ {
+ if (donors[donorIndexB].DecoyAcceptors.Count() > donorIndexADecoyCount)
+ {
+ GroupSwap(donors, groupsOfIndices, donorIndexA, donorIndexB, groupA, groupB,
+ scoreCutoff, swappedDonors, ref targetSurplus, ref decoySurplus);
+ swapped = true;
+ break;
+ }
+ }
+ break;
+ }
+
+ // If we reach the index of the list of donorGroups, set stuck to true so that the outer loop will break
+ if (donorIndexA == minIndex)
+ {
+ stuck = true;
+ break;
+ }
+ if (swapped)
+ break;
+ }
+ }
+ }
+ }
+ }
+
+ ///
+ /// Takes in a list of donor groups and a list of indices for each group, and swaps two groups of indices
+ /// Updates the targetSurplus and decoySurplus variables
+ /// Updates the swappedDonors hash set to keep track of which donors have been swapped
+ /// This is done to equalize the number of targets and decoys in each paritition for cross validation
+ ///
+ public static void GroupSwap(
+ List donors,
+ List[] groupsOfIndices,
+ int donorIndexA,
+ int donorIndexB,
+ int groupsOfIndicesIndexA,
+ int groupsOfIndicesIndexB,
+ double scoreCutoff,
+ HashSet swappedDonors,
+ ref int targetSurplus,
+ ref int decoySurplus)
+ {
+ // Multiply by two because the surplus is the difference between the two groups
+ // So removing one peak from one group and adding it to the other group is a difference of two
+ targetSurplus += 2 * (
+ donors[donorIndexB].TargetAcceptors.Count(peak => peak.MbrScore >= scoreCutoff) -
+ donors[donorIndexA].TargetAcceptors.Count(peak => peak.MbrScore >= scoreCutoff));
+ decoySurplus += 2 * (
+ donors[donorIndexB].DecoyAcceptors.Count -
+ donors[donorIndexA].DecoyAcceptors.Count);
+
+ groupsOfIndices[groupsOfIndicesIndexA].Add(donorIndexB);
+ groupsOfIndices[groupsOfIndicesIndexA].Remove(donorIndexA);
+ groupsOfIndices[groupsOfIndicesIndexB].Add(donorIndexA);
+ groupsOfIndices[groupsOfIndicesIndexB].Remove(donorIndexB);
+ }
+
+ ///
+ /// Creates chromatographic peak data that will be used to train the machine learning model
+ /// Classifies peaks as positive or negative training examples
+ /// Positive training examples are peaks with MBR scores above the 25th percentile,
+ /// Negative training examples are peaks with random retention times
+ ///
+ /// The list of donor groups.
+ /// The list of donor indices.
+ /// The maximum number of threads.
+ /// The enumerable of chromatographic peak data.
+ public IEnumerable CreateChromatographicPeakData(List donors, List donorIndices, int maxThreads)
+ {
+ object ChromatographicPeakDataListLock = new object();
+ List ChromatographicPeakDataList = new List();
+ int[] threads = Enumerable.Range(0, maxThreads).ToArray();
+
+ List pipScores = new();
+ foreach(int i in donorIndices)
+ {
+ pipScores.AddRange(donors[i].Select(peak => peak.MbrScore));
+ }
+ pipScores.Sort((a, b) => b.CompareTo(a)); // This is a descending sort
+ double groupSpecificPipScoreCutoff = pipScores[(int)Math.Floor(pipScores.Count * 0.25)];
+
+ Parallel.ForEach(Partitioner.Create(0, donorIndices.Count),
+ new ParallelOptions { MaxDegreeOfParallelism = maxThreads },
+ (range, loopState) =>
+ {
+ List localChromatographicPeakDataList = new List();
+ for (int i = range.Item1; i < range.Item2; i++)
+ {
+ var donor = donors[donorIndices[i]];
+ foreach (var peak in donor)
+ {
+ ChromatographicPeakData newChromatographicPeakData = new ChromatographicPeakData();
+ if (peak.RandomRt)
+ {
+ newChromatographicPeakData = CreateOneChromatographicPeakDataEntry(peak, label: false);
+ localChromatographicPeakDataList.Add(newChromatographicPeakData);
+ }
+ else if (!peak.RandomRt & peak.MbrScore >= groupSpecificPipScoreCutoff)
+ {
+ newChromatographicPeakData = CreateOneChromatographicPeakDataEntry(peak, label: true);
+ localChromatographicPeakDataList.Add(newChromatographicPeakData);
+ }
+ }
+ }
+ lock (ChromatographicPeakDataListLock)
+ {
+ ChromatographicPeakDataList.AddRange(localChromatographicPeakDataList);
+ }
+ });
+
+ ChromatographicPeakData[] pda = ChromatographicPeakDataList.ToArray();
+
+ return pda.AsEnumerable();
+ }
+
+ ///
+ /// Creates chromatographic peak data, but uses PEP values instead of MBR scores to select the positive training examples
+ ///
+ /// The list of donor groups.
+ /// The list of donor indices.
+ /// The maximum number of threads.
+ /// The enumerable of chromatographic peak data.
+ public IEnumerable CreateChromatographicPeakDataIteration(List donors, List donorIndices, int maxThreads)
+ {
+ object ChromatographicPeakDataListLock = new object();
+ List ChromatographicPeakDataList = new List();
+ int[] threads = Enumerable.Range(0, maxThreads).ToArray();
+
+ List peps = new();
+ foreach (int i in donorIndices)
+ {
+ peps.AddRange(donors[i].Select(peak => peak.MbrPep ?? 1));
+ }
+ peps.Sort();
+ double groupSpecificPepCutoff = peps[(int)Math.Floor(peps.Count * 0.25)];
+
+ Parallel.ForEach(Partitioner.Create(0, donorIndices.Count),
+ new ParallelOptions { MaxDegreeOfParallelism = maxThreads },
+ (range, loopState) =>
+ {
+ List localChromatographicPeakDataList = new List();
+ for (int i = range.Item1; i < range.Item2; i++)
+ {
+ var donor = donors[donorIndices[i]];
+ foreach (var peak in donor)
+ {
+ ChromatographicPeakData newChromatographicPeakData = new ChromatographicPeakData();
+ if (peak.RandomRt)
+ {
+ newChromatographicPeakData = CreateOneChromatographicPeakDataEntry(peak, label: false);
+ localChromatographicPeakDataList.Add(newChromatographicPeakData);
+ }
+ else if (!peak.RandomRt & peak.MbrPep <= groupSpecificPepCutoff)
+ {
+ newChromatographicPeakData = CreateOneChromatographicPeakDataEntry(peak, label: true);
+ localChromatographicPeakDataList.Add(newChromatographicPeakData);
+ }
+ }
+ }
+ lock (ChromatographicPeakDataListLock)
+ {
+ ChromatographicPeakDataList.AddRange(localChromatographicPeakDataList);
+ }
+ });
+
+ ChromatographicPeakData[] pda = ChromatographicPeakDataList.ToArray();
+
+ return pda.AsEnumerable();
+ }
+
+ public static void Compute_PEP_For_All_Peaks(
+ List donors,
+ List donorIndices,
+ MLContext mLContext,
+ TransformerChain>> trainedModel,
+ string outputFolder, int maxThreads)
+ {
+ object lockObject = new object();
+
+ //the trained model is not threadsafe. Therefore, to use the same model for each thread saved the model to disk. Then each thread reads its own copy of the model back from disk.
+ //If there is no output folder specified, then this can't happen. We set maxthreads eqaul to one and use the model that gets passed into the method.
+ if (String.IsNullOrEmpty(outputFolder))
+ {
+ maxThreads = 1;
+ }
+
+ Parallel.ForEach(Partitioner.Create(0, donorIndices.Count),
+ new ParallelOptions { MaxDegreeOfParallelism = maxThreads },
+ (range, loopState) =>
+ {
+
+ ITransformer threadSpecificTrainedModel;
+ if (maxThreads == 1)
+ {
+ threadSpecificTrainedModel = trainedModel;
+ }
+ else
+ {
+ threadSpecificTrainedModel = mLContext.Model.Load(Path.Combine(outputFolder, "model.zip"), out DataViewSchema savedModelSchema);
+ }
+
+ // one prediction engine per thread, because the prediction engine is not thread-safe
+ var threadPredictionEngine = mLContext.Model.CreatePredictionEngine(threadSpecificTrainedModel);
+
+ for (int i = range.Item1; i < range.Item2; i++)
+ {
+ DonorGroup donor = donors[donorIndices[i]];
+
+ foreach(ChromatographicPeak peak in donor)
+ {
+ ChromatographicPeakData pd = CreateOneChromatographicPeakDataEntry(peak, label: !peak.RandomRt);
+ var pepValuePrediction = threadPredictionEngine.Predict(pd);
+ peak.MbrPep = 1 - pepValuePrediction.Probability;
+ }
+ }
+ });
+ }
+
+ public static string AggregateMetricsForOutput(List allMetrics)
+ {
+ List accuracy = allMetrics.Select(m => m.Accuracy).ToList();
+ List areaUnderRocCurve = allMetrics.Select(m => m.AreaUnderRocCurve).ToList();
+ List areaUnderPrecisionRecallCurve = allMetrics.Select(m => m.AreaUnderPrecisionRecallCurve).ToList();
+ List F1Score = allMetrics.Select(m => m.F1Score).ToList();
+ List logLoss = allMetrics.Select(m => m.LogLoss).ToList();
+ List logLossReduction = allMetrics.Select(m => m.LogLossReduction).ToList();
+ List positivePrecision = allMetrics.Select(m => m.PositivePrecision).ToList();
+ List positiveRecall = allMetrics.Select(m => m.PositiveRecall).ToList();
+ List negativePrecision = allMetrics.Select(m => m.NegativePrecision).ToList();
+ List negativeRecall = allMetrics.Select(m => m.NegativeRecall).ToList();
+
+ // log-loss can stochastically take on a value of infinity.
+ // correspondingly, log-loss reduction can be negative infinity.
+ // when this happens for one or more of the metrics, it can lead to uninformative numbers.
+ // so, unless they are all infinite, we remove them from the average. If they are all infinite, we report that.
+
+ logLoss.RemoveAll(x => x == Double.PositiveInfinity);
+ logLossReduction.RemoveAll(x => x == Double.NegativeInfinity);
+
+ double logLossAverage = Double.PositiveInfinity;
+ double logLossReductionAverage = Double.NegativeInfinity;
+
+ if ((logLoss != null) && (logLoss.Any()))
+ {
+ logLossAverage = logLoss.Average();
+ }
+
+ if ((logLossReduction != null) && (logLossReduction.Any()))
+ {
+ logLossReductionAverage = logLossReduction.Average();
+ }
+
+ StringBuilder s = new StringBuilder();
+ s.AppendLine();
+ s.AppendLine("************************************************************");
+ s.AppendLine("* Metrics for Determination of PEP Using Binary Classification ");
+ s.AppendLine("*-----------------------------------------------------------");
+ s.AppendLine("* Accuracy: " + accuracy.Average().ToString());
+ s.AppendLine("* Area Under Curve: " + areaUnderRocCurve.Average().ToString());
+ s.AppendLine("* Area under Precision recall Curve: " + areaUnderPrecisionRecallCurve.Average().ToString());
+ s.AppendLine("* F1Score: " + F1Score.Average().ToString());
+ s.AppendLine("* LogLoss: " + logLossAverage.ToString());
+ s.AppendLine("* LogLossReduction: " + logLossReductionAverage.ToString());
+ s.AppendLine("* PositivePrecision: " + positivePrecision.Average().ToString());
+ s.AppendLine("* PositiveRecall: " + positiveRecall.Average().ToString());
+ s.AppendLine("* NegativePrecision: " + negativePrecision.Average().ToString());
+ s.AppendLine("* NegativeRecall: " + negativeRecall.Average().ToString());
+ s.AppendLine("************************************************************");
+ return s.ToString();
+ }
+
+ public static ChromatographicPeakData CreateOneChromatographicPeakDataEntry(ChromatographicPeak peak,bool label)
+ {
+
+ peak.PepPeakData = new ChromatographicPeakData
+ {
+ PpmErrorScore = (float)peak.PpmScore,
+ IntensityScore = (float)peak.IntensityScore,
+ RtScore = (float)peak.RtScore,
+ ScanCountScore = (float)peak.ScanCountScore,
+ IsotopicDistributionScore = (float)peak.IsotopicDistributionScore,
+
+ PpmErrorRaw = (float)Math.Abs(peak.MassError),
+ IntensityRaw = (float)Math.Log2(peak.Intensity),
+ RtPredictionErrorRaw = (float)Math.Abs(peak.RtPredictionError),
+ ScanCountRaw = (float)peak.IsotopicEnvelopes.Count,
+ IsotopicPearsonCorrelation = (float)(peak.IsotopicPearsonCorrelation),
+
+ Label = label,
+
+ };
+
+ return peak.PepPeakData;
+ }
+ }
+}
\ No newline at end of file
diff --git a/mzLib/FlashLFQ/PEP/TruePositivePrediction.cs b/mzLib/FlashLFQ/PEP/TruePositivePrediction.cs
new file mode 100644
index 000000000..5837959d8
--- /dev/null
+++ b/mzLib/FlashLFQ/PEP/TruePositivePrediction.cs
@@ -0,0 +1,18 @@
+using Microsoft.ML.Data;
+
+namespace FlashLFQ.PEP
+{
+ public class TruePositivePrediction
+ {
+ // ColumnName attribute is used to change the column name from
+ // its default value, which is the name of the field.
+ [ColumnName("PredictedLabel")]
+ public bool Prediction;
+
+ // No need to specify ColumnName attribute, because the field
+ // name "Probability" is the column name we want.
+ public float Probability;
+
+ public float Score;
+ }
+}
\ No newline at end of file
diff --git a/mzLib/FlashLFQ/Peptide.cs b/mzLib/FlashLFQ/Peptide.cs
index 39088d35a..c8bb6b64b 100644
--- a/mzLib/FlashLFQ/Peptide.cs
+++ b/mzLib/FlashLFQ/Peptide.cs
@@ -1,5 +1,7 @@
-using System.Collections.Generic;
+using Easy.Common.Extensions;
+using System.Collections.Generic;
using System.Linq;
+using System.Runtime.CompilerServices;
using System.Text;
namespace FlashLFQ
@@ -67,6 +69,18 @@ public void SetIntensity(SpectraFileInfo fileInfo, double intensity)
}
}
+ public double GetTotalIntensity()
+ {
+ if (Intensities.IsNotNullOrEmpty())
+ {
+ return Intensities.Sum(i => i.Value);
+ }
+ else
+ {
+ return 0;
+ }
+ }
+
public DetectionType GetDetectionType(SpectraFileInfo fileInfo)
{
if (DetectionTypes.TryGetValue(fileInfo, out DetectionType detectionType))
diff --git a/mzLib/FlashLFQ/RtInfo.cs b/mzLib/FlashLFQ/RtInfo.cs
index 0750e4588..ceda038e4 100644
--- a/mzLib/FlashLFQ/RtInfo.cs
+++ b/mzLib/FlashLFQ/RtInfo.cs
@@ -9,15 +9,10 @@ namespace FlashLFQ
public class RtInfo
{
public double PredictedRt { get; }
- public double Width { get; }
+ public double Width { get; set; }
public double RtStartHypothesis => PredictedRt - (Width / 2.0);
public double RtEndHypothesis => PredictedRt + (Width / 2.0);
- // These will be introduced in a later PR. For now, we're sticking with the classic version
- //private double _minimumWindowWidth = 0.5;
- //public double RtStartHypothesis => PredictedRt - Math.Max((Width / 2.0), _minimumWindowWidth/2); // the Math.Max components ensure that the width of an RT Window is at least _minimumWindowWidth wide
- //public double RtEndHypothesis => PredictedRt + Math.Max((Width / 2.0), _minimumWindowWidth/2);
-
public RtInfo(double predictedRt, double width)
{
PredictedRt = predictedRt;
diff --git a/mzLib/FlashLFQ/SpectraFileInfo.cs b/mzLib/FlashLFQ/SpectraFileInfo.cs
index 5dd5b4ab2..9cc24b6d7 100644
--- a/mzLib/FlashLFQ/SpectraFileInfo.cs
+++ b/mzLib/FlashLFQ/SpectraFileInfo.cs
@@ -1,4 +1,6 @@
-namespace FlashLFQ
+using System.IO;
+
+namespace FlashLFQ
{
public class SpectraFileInfo
{
@@ -39,5 +41,9 @@ public override int GetHashCode()
{
return FullFilePathWithExtension.GetHashCode();
}
+ public override string ToString()
+ {
+ return Path.GetFileName(FullFilePathWithExtension);
+ }
}
}
\ No newline at end of file
diff --git a/mzLib/MassSpectrometry/Deconvolution/Algorithms/ClassicDeconvolutionAlgorithm.cs b/mzLib/MassSpectrometry/Deconvolution/Algorithms/ClassicDeconvolutionAlgorithm.cs
index 4a919a7e5..8f7bb320b 100644
--- a/mzLib/MassSpectrometry/Deconvolution/Algorithms/ClassicDeconvolutionAlgorithm.cs
+++ b/mzLib/MassSpectrometry/Deconvolution/Algorithms/ClassicDeconvolutionAlgorithm.cs
@@ -1,20 +1,17 @@
using System;
using System.Collections.Generic;
using System.Linq;
-using System.Text;
-using System.Threading.Tasks;
using Chemistry;
-using Easy.Common.Extensions;
using MathNet.Numerics.Statistics;
using MzLibUtil;
namespace MassSpectrometry
{
- public class ClassicDeconvolutionAlgorithm : DeconvolutionAlgorithm
+ internal class ClassicDeconvolutionAlgorithm : DeconvolutionAlgorithm
{
private MzSpectrum spectrum;
- public ClassicDeconvolutionAlgorithm(DeconvolutionParameters deconParameters) : base(deconParameters)
+ internal ClassicDeconvolutionAlgorithm(DeconvolutionParameters deconParameters) : base(deconParameters)
{
}
@@ -25,7 +22,7 @@ public ClassicDeconvolutionAlgorithm(DeconvolutionParameters deconParameters) :
/// spectrum to deconvolute
/// Range of peaks to deconvolute
///
- public override IEnumerable Deconvolute(MzSpectrum spectrumToDeconvolute, MzRange range)
+ internal override IEnumerable Deconvolute(MzSpectrum spectrumToDeconvolute, MzRange range)
{
var deconParams = DeconvolutionParameters as ClassicDeconvolutionParameters ?? throw new MzLibException("Deconvolution params and algorithm do not match");
spectrum = spectrumToDeconvolute;
@@ -205,7 +202,7 @@ private IsotopicEnvelope FindIsotopicEnvelope(int massIndex, double candidateFor
}
}
- return new IsotopicEnvelope(listOfObservedPeaks, monoisotopicMass, chargeState, totalIntensity, Statistics.StandardDeviation(listOfRatios), massIndex);
+ return new IsotopicEnvelope(listOfObservedPeaks, monoisotopicMass, chargeState, totalIntensity, listOfRatios.StandardDeviation());
}
private int ObserveAdjacentChargeStates(IsotopicEnvelope originalEnvelope, double mostIntensePeakMz, int massIndex, double deconvolutionTolerancePpm, double intensityRatioLimit, double minChargeToLookFor, double maxChargeToLookFor, List monoisotopicMassPredictions)
diff --git a/mzLib/MassSpectrometry/Deconvolution/Algorithms/DeconvolutionAlgorithm.cs b/mzLib/MassSpectrometry/Deconvolution/Algorithms/DeconvolutionAlgorithm.cs
index e8a052e39..efb8cd248 100644
--- a/mzLib/MassSpectrometry/Deconvolution/Algorithms/DeconvolutionAlgorithm.cs
+++ b/mzLib/MassSpectrometry/Deconvolution/Algorithms/DeconvolutionAlgorithm.cs
@@ -1,13 +1,14 @@
using System;
using System.Collections.Generic;
using System.Linq;
-using System.Text;
-using System.Threading.Tasks;
using Chemistry;
using MzLibUtil;
namespace MassSpectrometry
{
+ ///
+ /// Parent class defining minimum requirement to be used
+ ///
public abstract class DeconvolutionAlgorithm
{
// For ClassicDeconv. If not used elsewhere, move to that class
@@ -79,6 +80,6 @@ protected DeconvolutionAlgorithm(DeconvolutionParameters deconParameters)
/// spectrum to be deconvoluted
/// Range of peaks to deconvolute
///
- public abstract IEnumerable Deconvolute(MzSpectrum spectrum, MzRange range);
+ internal abstract IEnumerable Deconvolute(MzSpectrum spectrum, MzRange range);
}
}
diff --git a/mzLib/MassSpectrometry/Deconvolution/Algorithms/ExampleNewDeconvolutionAlgorithmTemplate.cs b/mzLib/MassSpectrometry/Deconvolution/Algorithms/ExampleNewDeconvolutionAlgorithmTemplate.cs
index 18957d8d0..c70c10b63 100644
--- a/mzLib/MassSpectrometry/Deconvolution/Algorithms/ExampleNewDeconvolutionAlgorithmTemplate.cs
+++ b/mzLib/MassSpectrometry/Deconvolution/Algorithms/ExampleNewDeconvolutionAlgorithmTemplate.cs
@@ -1,22 +1,19 @@
using System;
using System.Collections.Generic;
using System.Diagnostics.CodeAnalysis;
-using System.Linq;
-using System.Text;
-using System.Threading.Tasks;
using MzLibUtil;
namespace MassSpectrometry
{
[ExcludeFromCodeCoverage]
- public class ExampleNewDeconvolutionAlgorithmTemplate : DeconvolutionAlgorithm
+ internal class ExampleNewDeconvolutionAlgorithmTemplate : DeconvolutionAlgorithm
{
- public ExampleNewDeconvolutionAlgorithmTemplate(DeconvolutionParameters deconParameters) : base(deconParameters)
+ internal ExampleNewDeconvolutionAlgorithmTemplate(DeconvolutionParameters deconParameters) : base(deconParameters)
{
}
- public override IEnumerable Deconvolute(MzSpectrum spectrum, MzRange range = null)
+ internal override IEnumerable Deconvolute(MzSpectrum spectrum, MzRange range = null)
{
var deconParams = DeconvolutionParameters as ExampleNewDeconvolutionParametersTemplate ?? throw new MzLibException("Deconvolution params and algorithm do not match");
range ??= spectrum.Range;
diff --git a/mzLib/MassSpectrometry/Deconvolution/Algorithms/IsoDecAlgorithm.cs b/mzLib/MassSpectrometry/Deconvolution/Algorithms/IsoDecAlgorithm.cs
new file mode 100644
index 000000000..6c0dab348
--- /dev/null
+++ b/mzLib/MassSpectrometry/Deconvolution/Algorithms/IsoDecAlgorithm.cs
@@ -0,0 +1,159 @@
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Runtime.InteropServices;
+using MzLibUtil;
+
+namespace MassSpectrometry
+{
+ ///
+ /// Performs deconvolution on a single spectrum or region of spectrum using the Isodec algorithm
+ ///
+ /// Isodec only needs to region of interest and does not use surrounding charge states as references.
+ /// Isodec can report multiple monoisotopic masses for a single peak if enabled by ReportMultipleMonoisos parameter
+ /// In this case, the resulting isotopic envelopes will have the same precursor ID.
+ ///
+ ///
+ internal class IsoDecAlgorithm : DeconvolutionAlgorithm
+ {
+ internal IsoDecAlgorithm(DeconvolutionParameters deconParameters) : base(deconParameters)
+ {
+
+ }
+
+ ///
+ /// Struct passed by pointer in memory to the Isodec.dll
+ ///
+ [StructLayout(LayoutKind.Sequential, Pack =1)]
+ public struct MatchedPeak
+ {
+ public float mz;
+ public int z;
+ public float monoiso;
+ public float peakmass;
+ public float avgmass;
+ public float area;
+ public float peakint;
+ [MarshalAs(UnmanagedType.ByValArray, SizeConst = 64)]
+ public float[] matchedindsiso;
+ [MarshalAs(UnmanagedType.ByValArray, SizeConst = 64)]
+ public float[] matchedindsexp;
+ [MarshalAs(UnmanagedType.ByValArray, SizeConst = 64)]
+ public float[] isomz;
+ [MarshalAs(UnmanagedType.ByValArray, SizeConst = 64)]
+ public float[] isodist;
+ [MarshalAs(UnmanagedType.ByValArray, SizeConst = 64)]
+ public float[] isomass;
+ [MarshalAs(UnmanagedType.ByValArray, SizeConst = 16)]
+ public float[] monoisos;
+ int startindex;
+ int endindex;
+ public float score;
+ public int realisolength;
+ }
+
+ ///
+ /// Calls the Isodec.dll to perform deconvolution on the given spectrum
+ /// The Isodec.dll requires three other dll's as dependencies: isogenmass.dll, libmmd.dll, scml_dispmd.dll
+ ///
+ ///
+ ///
+ ///
+ ///
+ ///
+ ///
+ ///
+
+ [DllImport("isodeclib.dll", EntryPoint = "process_spectrum", CallingConvention = CallingConvention.Cdecl)]
+ protected static extern int process_spectrum(double[] cmz, float[] cintensity, int c, string fname, IntPtr matchedpeaks, IsoDecDeconvolutionParameters.IsoSettings settings);
+
+ internal override IEnumerable Deconvolute(MzSpectrum spectrum, MzRange range)
+ {
+ var deconParams = DeconvolutionParameters as IsoDecDeconvolutionParameters ?? throw new MzLibException("Deconvolution params and algorithm do not match");
+
+ var firstIndex = spectrum.GetClosestPeakIndex(range.Minimum);
+ var lastIndex = spectrum.GetClosestPeakIndex(range.Maximum);
+
+ var mzs = spectrum.XArray[firstIndex..lastIndex]
+ .Select(p => p)
+ .ToArray();
+ var intensities = spectrum.YArray[firstIndex..lastIndex]
+ .Select(p => (float)p)
+ .ToArray();
+
+ var mpArray = new byte[intensities.Length * Marshal.SizeOf(typeof(MatchedPeak))];
+ GCHandle handle = GCHandle.Alloc(mpArray, GCHandleType.Pinned);
+ try
+ {
+ IntPtr matchedPeaksPtr = (IntPtr)handle.AddrOfPinnedObject();
+ IsoDecDeconvolutionParameters.IsoSettings settings = deconParams.ToIsoSettings();
+ int result = process_spectrum(mzs, intensities, intensities.Length, null, matchedPeaksPtr, settings);
+ if (result <= 0)
+ return Enumerable.Empty();
+
+ // Handle results
+ MatchedPeak[] matchedpeaks = new MatchedPeak[result];
+ for (int i = 0; i < result; i++)
+ {
+ matchedpeaks[i] = Marshal.PtrToStructure(matchedPeaksPtr + i * Marshal.SizeOf(typeof(MatchedPeak)));
+ }
+
+ return ConvertToIsotopicEnvelopes(deconParams, matchedpeaks, spectrum);
+ }
+ finally
+ {
+ handle.Free();
+ }
+ }
+
+ ///
+ /// Converts the isodec output (MatchedPeak) to IsotopicEnvelope for return
+ ///
+ ///
+ ///
+ ///
+ ///
+ private List ConvertToIsotopicEnvelopes(IsoDecDeconvolutionParameters parameters, MatchedPeak[] matchedpeaks, MzSpectrum spectrum)
+ {
+ List result = new List();
+ int currentId = 0;
+ var tolerance = new PpmTolerance(5);
+ foreach(MatchedPeak peak in matchedpeaks)
+ {
+ List<(double,double)> peaks = new List<(double,double)> ();
+ for (int i = 0; i < peak.realisolength; i++)
+ {
+
+ List indicesWithinTolerance = spectrum.GetPeakIndicesWithinTolerance(peak.isomz[i], tolerance);
+ double maxIntensity = 0;
+ int maxIndex = -1;
+ foreach (int index in indicesWithinTolerance)
+ {
+ if (spectrum.YArray[index] > maxIntensity) { maxIntensity = spectrum.YArray[index]; maxIndex = index; }
+ }
+ if (maxIndex >= 0)
+ {
+ peaks.Add((spectrum.XArray[maxIndex], spectrum.YArray[maxIndex]));
+ }
+ else
+ {
+ peaks.Add((peak.isomz[i], 0));
+ }
+
+ }
+ int charge = peak.z;
+ if(parameters.Polarity == Polarity.Negative) { charge = -peak.z; }
+ if(parameters.ReportMulitpleMonoisos)
+ {
+ foreach (float monoiso in peak.monoisos)
+ {
+ if (monoiso > 0) { result.Add(new IsotopicEnvelope(currentId, peaks, (double)monoiso, charge, peak.peakint, peak.score)); }
+ }
+ }
+ else { result.Add(new IsotopicEnvelope(currentId, peaks, (double)peak.monoiso, charge, peak.peakint, peak.score)); }
+ currentId++;
+ }
+ return result;
+ }
+ }
+}
diff --git a/mzLib/MassSpectrometry/Deconvolution/Deconvoluter.cs b/mzLib/MassSpectrometry/Deconvolution/Deconvoluter.cs
index d419561f2..af32b78f9 100644
--- a/mzLib/MassSpectrometry/Deconvolution/Deconvoluter.cs
+++ b/mzLib/MassSpectrometry/Deconvolution/Deconvoluter.cs
@@ -1,20 +1,9 @@
-using System;
-using System.Collections.Generic;
-using System.Linq;
-using System.Text;
-using System.Threading.Tasks;
-using Easy.Common.Extensions;
-using Easy.Common.Interfaces;
+using System.Collections.Generic;
+using Chemistry;
using MzLibUtil;
namespace MassSpectrometry
{
- public enum DeconvolutionType
- {
- ClassicDeconvolution,
- ExampleNewDeconvolutionTemplate,
- }
-
///
/// Context class for all deconvolution
///
@@ -30,27 +19,9 @@ public static class Deconvoluter
public static IEnumerable Deconvolute(MsDataScan scan,
DeconvolutionParameters deconvolutionParameters, MzRange rangeToGetPeaksFrom = null)
{
- rangeToGetPeaksFrom ??= scan.MassSpectrum.Range;
-
- // set deconvolution algorithm and any specific deconvolution parameters found in the MsDataScan
- DeconvolutionAlgorithm deconAlgorithm;
- switch (deconvolutionParameters.DeconvolutionType)
- {
- case DeconvolutionType.ClassicDeconvolution:
- deconAlgorithm = new ClassicDeconvolutionAlgorithm(deconvolutionParameters);
- break;
-
- case DeconvolutionType.ExampleNewDeconvolutionTemplate:
- deconAlgorithm = new ExampleNewDeconvolutionAlgorithmTemplate(deconvolutionParameters);
- break;
-
- default: throw new MzLibException("DeconvolutionType not yet supported");
- }
-
- return deconAlgorithm.Deconvolute(scan.MassSpectrum, rangeToGetPeaksFrom);
+ // set any specific deconvolution parameters found only in the MsDataScan
+ return Deconvolute(scan.MassSpectrum, deconvolutionParameters, rangeToGetPeaksFrom);
}
-
-
///
/// Static deconvolution of an MzSpectrum that does not require Deconvoluter construction
@@ -64,22 +35,53 @@ public static IEnumerable Deconvolute(MzSpectrum spectrum,
{
rangeToGetPeaksFrom ??= spectrum.Range;
+ // Short circuit deconvolution if it is called on a neutral mass spectrum
+ if (spectrum is NeutralMassSpectrum newt)
+ return DeconvoluteNeutralMassSpectrum(newt, rangeToGetPeaksFrom);
+
// set deconvolution algorithm
- DeconvolutionAlgorithm deconAlgorithm;
- switch (deconvolutionParameters.DeconvolutionType)
+ DeconvolutionAlgorithm deconAlgorithm = CreateAlgorithm(deconvolutionParameters);
+
+ // Delegate deconvolution to the algorithm
+ return deconAlgorithm.Deconvolute(spectrum, rangeToGetPeaksFrom);
+ }
+
+ ///
+ /// Factory method to create the correct deconvolution algorithm from the parameters
+ ///
+ ///
+ ///
+ ///
+ private static DeconvolutionAlgorithm CreateAlgorithm(DeconvolutionParameters parameters)
+ {
+ return parameters.DeconvolutionType switch
{
- case DeconvolutionType.ClassicDeconvolution:
- deconAlgorithm = new ClassicDeconvolutionAlgorithm(deconvolutionParameters);
- break;
+ DeconvolutionType.ClassicDeconvolution => new ClassicDeconvolutionAlgorithm(parameters),
+ DeconvolutionType.ExampleNewDeconvolutionTemplate => new ExampleNewDeconvolutionAlgorithmTemplate(parameters),
+ DeconvolutionType.IsoDecDeconvolution => new IsoDecAlgorithm(parameters),
+ _ => throw new MzLibException("DeconvolutionType not yet supported")
+ };
+ }
- case DeconvolutionType.ExampleNewDeconvolutionTemplate:
- deconAlgorithm = new ExampleNewDeconvolutionAlgorithmTemplate(deconvolutionParameters);
- break;
+ ///
+ /// Returns all peaks in the neutral mass spectrum as an isotopic envelope with a single peak
+ ///
+ ///
+ ///
+ ///
+ private static IEnumerable DeconvoluteNeutralMassSpectrum(NeutralMassSpectrum neutralSpectrum, MzRange range)
+ {
+ for (int i = 0; i < neutralSpectrum.XArray.Length; i++)
+ {
+ double neutralMass = neutralSpectrum.XArray[i];
+ double intensity = neutralSpectrum.YArray[i];
+ int chargeState = neutralSpectrum.Charges[i];
- default: throw new MzLibException("DeconvolutionType not yet supported");
+ if (range.Contains(neutralMass.ToMz(chargeState)))
+ {
+ yield return new IsotopicEnvelope(neutralMass, intensity, chargeState);
+ }
}
-
- return deconAlgorithm.Deconvolute(spectrum, rangeToGetPeaksFrom);
}
}
}
diff --git a/mzLib/MassSpectrometry/Deconvolution/DeconvolutionType.cs b/mzLib/MassSpectrometry/Deconvolution/DeconvolutionType.cs
new file mode 100644
index 000000000..f2fece52e
--- /dev/null
+++ b/mzLib/MassSpectrometry/Deconvolution/DeconvolutionType.cs
@@ -0,0 +1,15 @@
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+
+namespace MassSpectrometry
+{
+ public enum DeconvolutionType
+ {
+ ClassicDeconvolution,
+ ExampleNewDeconvolutionTemplate,
+ IsoDecDeconvolution,
+ }
+}
diff --git a/mzLib/MassSpectrometry/Deconvolution/Parameters/IsoDecDeconvolutionParameters.cs b/mzLib/MassSpectrometry/Deconvolution/Parameters/IsoDecDeconvolutionParameters.cs
new file mode 100644
index 000000000..649ee9a11
--- /dev/null
+++ b/mzLib/MassSpectrometry/Deconvolution/Parameters/IsoDecDeconvolutionParameters.cs
@@ -0,0 +1,214 @@
+using System.Runtime.InteropServices;
+
+namespace MassSpectrometry;
+public class IsoDecDeconvolutionParameters : DeconvolutionParameters
+{
+ public override DeconvolutionType DeconvolutionType { get; protected set; } = DeconvolutionType.IsoDecDeconvolution;
+
+ #region Interop Parameters
+
+ ///
+ /// The struct that is passed into the isodec.dll
+ ///
+ public struct IsoSettings
+ {
+ public int phaseres; // Precision of encoding matrix
+ public int verbose; // Verbose output
+ public int peakwindow; // Peak Detection Window
+ public float peakthresh; // Peak Detection Threshold
+ public int minpeaks; // Minimum Peaks for an allowed peak
+ public float css_thresh; // Minimum cosine similarity score for isotope distribution
+ public float matchtol; // Match Tolerance for peak detection in ppm
+ public int maxshift; // Maximum shift allowed for isotope distribution
+ [MarshalAs(UnmanagedType.ByValArray, SizeConst = 2)]
+ public float[] mzwindow; // MZ Window for isotope distribution
+ [MarshalAs(UnmanagedType.ByValArray, SizeConst = 2)]
+ public float[] plusoneintwindow; // Plus One Intensity range. Will be used for charge state 1
+ public int knockdown_rounds; // Number of knockdown rounds
+ public float min_score_diff; // Minimum score difference for isotope distribution to allow missed monoisotopic peaks
+ public float minareacovered; // Minimum area covered by isotope distribution. Use in or with css_thresh
+ public int isolength; // Isotope Distribution Length
+ public double mass_diff_c; // Mass difference between isotopes
+ public float adductmass; // Adduct Mass
+ public int minusoneaszero; // Use set the -1 isotope as 0 to help force better alignments
+ public float isotopethreshold; // Threshold for isotope distribution. Will remove relative intensities below this.
+ public float datathreshold; // Threshold for data. Will remove relative intensities below this relative to max intensity in each cluster
+ public float zscore_threshold; //Ratio above which a secondary charge state prediction will be returned.
+ }
+
+ private IsoSettings? _isoSettings;
+
+ internal IsoSettings ToIsoSettings()
+ {
+ if (_isoSettings != null)
+ return _isoSettings.Value;
+
+ IsoSettings result = new IsoSettings
+ {
+ phaseres = PhaseRes,
+ verbose = Verbose,
+ peakwindow = PeakWindow,
+ peakthresh = PeakThreshold,
+ minpeaks = MinPeaks,
+ css_thresh = CssThreshold,
+ matchtol = MatchTolerance,
+ maxshift = MaxShift,
+ mzwindow = MzWindow,
+ plusoneintwindow = PlusOneIntWindow,
+ knockdown_rounds = KnockdownRounds,
+ min_score_diff = MinScoreDiff,
+ minareacovered = MinAreaCovered,
+ isolength = IsoLength,
+ mass_diff_c = MassDiffC,
+ adductmass = AdductMass,
+ minusoneaszero = MinusOneAreasZero,
+ isotopethreshold = IsotopeThreshold,
+ datathreshold = DataThreshold,
+ zscore_threshold = ZScoreThreshold
+ };
+
+ _isoSettings = result;
+ return result;
+ }
+
+ #endregion
+
+ #region User-Accessible Parameters
+
+ ///
+ /// Precision of encoding matrix
+ ///
+ public int PhaseRes { get; set; }
+
+ ///
+ /// Minimum cosine similarity score for isotope distribution
+ ///
+ public float CssThreshold { get; set; }
+
+ ///
+ /// Match Tolerance for peak detection in ppm
+ ///
+ public float MatchTolerance { get; set; }
+
+ ///
+ /// Maximum shift allowed for isotope distribution
+ ///
+ public int MaxShift { get; set; }
+
+ ///
+ /// MZ Window for isotope distribution
+ ///
+ public float[] MzWindow { get; set; }
+
+ ///
+ /// Number of knockdown rounds
+ ///
+ public int KnockdownRounds { get; set; }
+
+ ///
+ /// Minimum area covered by isotope distribution. Use in or with css_thresh
+ ///
+ public float MinAreaCovered { get; set; }
+
+ ///
+ /// Threshold for data. Will remove relative intensities below this relative to max intensity in each cluster
+ ///
+ public float DataThreshold { get; set; }
+
+ ///
+ /// Report multiple monoisotopic peaks
+ ///
+ public bool ReportMulitpleMonoisos { get; set; }
+
+ #endregion User-Accessible Parameters
+
+ #region Hard-Coded Parameters
+
+ ///
+ /// Verbose output
+ ///
+ public int Verbose { get; protected set; } = 0;
+
+ ///
+ /// Peak Detection Window
+ ///
+ public int PeakWindow { get; protected set; } = 80;
+
+ ///
+ /// Peak Detection Threshold
+ ///
+ public float PeakThreshold { get; protected set; } = (float)0.0001;
+
+ ///
+ /// Minimum Peaks for an allowed peak
+ ///
+ public int MinPeaks { get; protected set; } = 3;
+
+ ///
+ /// Plus One Intensity range. Will be used for charge state 1
+ ///
+ public float[] PlusOneIntWindow { get; protected set; } = new float[] { (float)0.1, (float)0.6 };
+
+ ///
+ /// Minimum score difference for isotope distribution to allow missed monoisotopic peaks
+ ///
+ public float MinScoreDiff { get; protected set; } = (float)0.1;
+
+ ///
+ /// Isotope Distribution Length
+ ///
+ public int IsoLength { get; protected set; } = 64;
+
+ ///
+ /// Mass difference between isotopes
+ ///
+ public double MassDiffC { get; protected set; } = 1.0033;
+
+ ///
+ /// Adduct Mass
+ ///
+ public float AdductMass { get; protected set; } = (float)1.00727276467;
+
+ ///
+ /// Use set the -1 isotope as 0 to help force better alignments
+ ///
+ public int MinusOneAreasZero { get; protected set; } = 1;
+
+ ///
+ /// Threshold for isotope distribution. Will remove relative intensities below this.
+ ///
+ public float IsotopeThreshold { get; protected set; } = (float)0.01;
+
+ ///
+ /// Ratio above which a secondary charge state prediction will be returned.
+ ///
+ public float ZScoreThreshold { get; protected set; } = (float)0.95;
+
+ #endregion Hard-Coded Parameters
+
+ public IsoDecDeconvolutionParameters(
+ Polarity polarity = Polarity.Positive,
+ int phaseRes = 8,
+ bool reportMultipleMonoisos = true,
+ float cssThreshold = (float)0.7,
+ float matchTolerance = (float)5,
+ int maxShift = 3,
+ float[] mzWindow = null,
+ int knockdownRounds = 5,
+ float minAreaCovered = (float)0.20,
+ float relativeDataThreshold = (float)0.05)
+ : base(1, 50, polarity)
+ {
+ PhaseRes = phaseRes;
+ ReportMulitpleMonoisos = reportMultipleMonoisos;
+ CssThreshold = cssThreshold;
+ MatchTolerance = matchTolerance;
+ MaxShift = maxShift;
+ MzWindow = mzWindow ?? new float[] { (float)-1.05, (float)2.05 };
+ KnockdownRounds = knockdownRounds;
+ MinAreaCovered = minAreaCovered;
+ DataThreshold = relativeDataThreshold;
+ if (Polarity == Polarity.Negative)
+ AdductMass = (float)-1.00727276467;
+ }
+}
\ No newline at end of file
diff --git a/mzLib/MassSpectrometry/Enums/DissociationType.cs b/mzLib/MassSpectrometry/Enums/DissociationType.cs
index 1ac136197..ca738b3fa 100644
--- a/mzLib/MassSpectrometry/Enums/DissociationType.cs
+++ b/mzLib/MassSpectrometry/Enums/DissociationType.cs
@@ -109,6 +109,11 @@ public enum DissociationType
///
LowCID,
+ ///
+ /// activated ion electron photo detachment dissociation
+ ///
+ aEPD,
+
Unknown,
AnyActivationType,
Custom,
diff --git a/mzLib/MassSpectrometry/MassSpectrometry.csproj b/mzLib/MassSpectrometry/MassSpectrometry.csproj
index 6af63b9e4..e662d170d 100644
--- a/mzLib/MassSpectrometry/MassSpectrometry.csproj
+++ b/mzLib/MassSpectrometry/MassSpectrometry.csproj
@@ -20,4 +20,23 @@
+
+
+
+
+
+
+
+ Always
+
+
+ Always
+
+
+ Always
+
+
+ Always
+
+
diff --git a/mzLib/MassSpectrometry/MsDataFile.cs b/mzLib/MassSpectrometry/MsDataFile.cs
index 7242f2cf7..3e88fcfef 100644
--- a/mzLib/MassSpectrometry/MsDataFile.cs
+++ b/mzLib/MassSpectrometry/MsDataFile.cs
@@ -23,9 +23,6 @@
namespace MassSpectrometry
{
- // TODO: Define scope of class
- // Class scope is to provide to the data loaded from the DataFile.
-
///
/// A class for interacting with data collected from a Mass Spectrometer, and stored in a file
///
diff --git a/mzLib/MassSpectrometry/MsDataScan.cs b/mzLib/MassSpectrometry/MsDataScan.cs
index 6e54e05e8..71cc16502 100644
--- a/mzLib/MassSpectrometry/MsDataScan.cs
+++ b/mzLib/MassSpectrometry/MsDataScan.cs
@@ -27,9 +27,29 @@ namespace MassSpectrometry
{
public class MsDataScan
{
- public MsDataScan(MzSpectrum massSpectrum, int oneBasedScanNumber, int msnOrder, bool isCentroid, Polarity polarity, double retentionTime, MzRange scanWindowRange, string scanFilter, MZAnalyzerType mzAnalyzer,
- double totalIonCurrent, double? injectionTime, double[,] noiseData, string nativeId, double? selectedIonMz = null, int? selectedIonChargeStateGuess = null, double? selectedIonIntensity = null, double? isolationMZ = null,
- double? isolationWidth = null, DissociationType? dissociationType = null, int? oneBasedPrecursorScanNumber = null, double? selectedIonMonoisotopicGuessMz = null, string hcdEnergy = null, string scanDescription = null)
+ public MsDataScan(MzSpectrum massSpectrum,
+ int oneBasedScanNumber,
+ int msnOrder,
+ bool isCentroid,
+ Polarity polarity,
+ double retentionTime,
+ MzRange scanWindowRange,
+ string scanFilter,
+ MZAnalyzerType mzAnalyzer,
+ double totalIonCurrent,
+ double? injectionTime,
+ double[,] noiseData,
+ string nativeId,
+ double? selectedIonMz = null,
+ int? selectedIonChargeStateGuess = null,
+ double? selectedIonIntensity = null,
+ double? isolationMZ = null,
+ double? isolationWidth = null,
+ DissociationType? dissociationType = null,
+ int? oneBasedPrecursorScanNumber = null,
+ double? selectedIonMonoisotopicGuessMz = null,
+ string hcdEnergy = null,
+ string scanDescription = null)
{
OneBasedScanNumber = oneBasedScanNumber;
MsnOrder = msnOrder;
@@ -61,7 +81,7 @@ public MsDataScan(MzSpectrum massSpectrum, int oneBasedScanNumber, int msnOrder,
///
public MzSpectrum MassSpectrum { get; protected set; }
- public int OneBasedScanNumber { get; private set; }
+ public int OneBasedScanNumber { get; protected set; }
public int MsnOrder { get; }
public double RetentionTime { get; }
public Polarity Polarity { get; }
@@ -70,7 +90,7 @@ public MsDataScan(MzSpectrum massSpectrum, int oneBasedScanNumber, int msnOrder,
public string ScanFilter { get; }
public string NativeId { get; private set; }
public bool IsCentroid { get; }
- public double TotalIonCurrent { get; }
+ public double TotalIonCurrent { get; protected set; }
public double? InjectionTime { get; }
public double[,] NoiseData { get; }
@@ -82,7 +102,7 @@ public MsDataScan(MzSpectrum massSpectrum, int oneBasedScanNumber, int msnOrder,
public double? SelectedIonMZ { get; private set; } // May be adjusted by calibration
public DissociationType? DissociationType { get; }
public double? IsolationWidth { get; }
- public int? OneBasedPrecursorScanNumber { get; private set; }
+ public int? OneBasedPrecursorScanNumber { get; protected set; }
public double? SelectedIonMonoisotopicGuessIntensity { get; private set; } // May be refined
public double? SelectedIonMonoisotopicGuessMz { get; private set; } // May be refined
public string HcdEnergy { get; private set; }
diff --git a/mzLib/MassSpectrometry/MzSpectra/IsotopicEnvelope.cs b/mzLib/MassSpectrometry/MzSpectra/IsotopicEnvelope.cs
index 7e42426b1..9fc5c05b1 100644
--- a/mzLib/MassSpectrometry/MzSpectra/IsotopicEnvelope.cs
+++ b/mzLib/MassSpectrometry/MzSpectra/IsotopicEnvelope.cs
@@ -14,29 +14,57 @@ public class IsotopicEnvelope : IHasMass
///
/// Mass of most abundant observed isotopic peak, not accounting for addition or subtraction or protons due to ESI charge state induction
///
- public double MostAbundantObservedIsotopicMass { get; private set; }
+ internal double MostAbundantObservedIsotopicMass { get; private set; }
public readonly int Charge;
public readonly double TotalIntensity;
- public readonly double StDev;
- public readonly int MassIndex;
+ public readonly int PrecursorId;
public double Score { get; private set; }
- public IsotopicEnvelope(List<(double mz, double intensity)> bestListOfPeaks, double bestMonoisotopicMass, int bestChargeState, double bestTotalIntensity, double bestStDev, int bestMassIndex)
+ ///
+ /// Used for an isotopic envelope that mzLib deconvoluted (e.g., from a mass spectrum)
+ ///
+ public IsotopicEnvelope(List<(double mz, double intensity)> bestListOfPeaks, double bestMonoisotopicMass, int bestChargeState, double bestTotalIntensity, double bestStDev)
{
Peaks = bestListOfPeaks;
MonoisotopicMass = bestMonoisotopicMass;
- MostAbundantObservedIsotopicMass = GetMostAbundantObservedIsotopicMass(bestListOfPeaks, bestChargeState);
+ MostAbundantObservedIsotopicMass = bestListOfPeaks.MaxBy(p => p.intensity).mz * Math.Abs(bestChargeState);
Charge = bestChargeState;
TotalIntensity = bestTotalIntensity;
- StDev = bestStDev;
- MassIndex = bestMassIndex;
- Score = ScoreIsotopeEnvelope();
+ Score = ScoreIsotopeEnvelope(bestStDev);
+ }
+
+ ///
+ /// Used for a neutral mass read in from a deconvoluted file
+ /// Assumes the mass is correct: score is max value
+ ///
+ public IsotopicEnvelope(double monoisotopicMass, double intensity, int charge)
+ {
+ MonoisotopicMass = monoisotopicMass;
+ Charge = charge;
+ TotalIntensity = intensity;
+ Score = double.MaxValue;
+ Peaks = [(monoisotopicMass.ToMz(charge), intensity)];
}
- public double GetMostAbundantObservedIsotopicMass(List<(double mz, double intensity)> peaks, int charge)
+ ///
+ /// Used for A deconvolution method that calculates its own score.
+ ///
+ /// All missed mono products of the same peak will share an ID if enabled in IsoDec
+ ///
+ ///
+ ///
+ ///
+ ///
+ public IsotopicEnvelope(int id, List<(double mz, double intensity)> peaks, double monoisotopicmass, int chargestate, double intensity, double score)
{
- return peaks.MaxBy(p => p.intensity).mz * Math.Abs(charge);
+ PrecursorId = id;
+ Peaks = peaks;
+ MonoisotopicMass = monoisotopicmass;
+ Charge = chargestate;
+ TotalIntensity = intensity;
+ Score = score;
+ MostAbundantObservedIsotopicMass = peaks.MaxBy(p => p.intensity).mz * Math.Abs(chargestate);
}
public override string ToString()
@@ -44,10 +72,10 @@ public override string ToString()
return Charge + "\t" + Peaks[0].mz.ToString("G8") + "\t" + Peaks.Count + "\t" + TotalIntensity;
}
- private double ScoreIsotopeEnvelope() //likely created by Stefan Solntsev using peptide data
+ private double ScoreIsotopeEnvelope(double stDev) //likely created by Stefan Solntsev using peptide data
{
return Peaks.Count >= 2 ?
- TotalIntensity / Math.Pow(StDev, 0.13) * Math.Pow(Peaks.Count, 0.4) / Math.Pow(Math.Abs(Charge), 0.06) :
+ TotalIntensity / Math.Pow(stDev, 0.13) * Math.Pow(Peaks.Count, 0.4) / Math.Pow(Math.Abs(Charge), 0.06) :
0;
}
@@ -60,6 +88,5 @@ public void SetMedianMonoisotopicMass(List monoisotopicMassPredictions)
{
MonoisotopicMass = monoisotopicMassPredictions.Median();
}
-
}
}
\ No newline at end of file
diff --git a/mzLib/MassSpectrometry/MzSpectra/MzSpectrum.cs b/mzLib/MassSpectrometry/MzSpectra/MzSpectrum.cs
index 2e9fcc7a4..62422cc96 100644
--- a/mzLib/MassSpectrometry/MzSpectra/MzSpectrum.cs
+++ b/mzLib/MassSpectrometry/MzSpectra/MzSpectrum.cs
@@ -22,10 +22,8 @@
using System;
using System.Collections;
using System.Collections.Generic;
-using System.Diagnostics;
using System.IO;
using System.Linq;
-using System.Text.Json;
namespace MassSpectrometry
{
@@ -126,7 +124,7 @@ public MzRange Range
}
}
- public double? FirstX
+ public virtual double? FirstX
{
get
{
@@ -138,7 +136,7 @@ public double? FirstX
}
}
- public double? LastX
+ public virtual double? LastX
{
get
{
@@ -373,7 +371,7 @@ public IsotopicEnvelope FindIsotopicEnvelope(int massIndex, double candidateForM
}
}
- return new IsotopicEnvelope(listOfObservedPeaks, monoisotopicMass, chargeState, totalIntensity, Statistics.StandardDeviation(listOfRatios), massIndex);
+ return new IsotopicEnvelope(listOfObservedPeaks, monoisotopicMass, chargeState, totalIntensity, listOfRatios.StandardDeviation());
}
[Obsolete("Deconvolution Has been moved to the Deconvoluter Object")]
@@ -611,6 +609,26 @@ public int GetClosestPeakIndex(double x)
return XArray.GetClosestIndex(x);
}
+ public List GetPeakIndicesWithinTolerance(double x, Tolerance tolerance)
+ {
+ if (XArray.Length == 0)
+ return [];
+
+ // find min and max allowed
+ var minX = tolerance.GetMinimumValue(x);
+ var maxX = tolerance.GetMaximumValue(x);
+
+ // check if min and max are possible to find in this spectrum
+ if (XArray.First() > maxX || XArray.Last() < minX)
+ return [];
+
+ // find index closest to extrema
+ int startingIndex = XArray.GetClosestIndex(minX, ArraySearchOption.Next);
+ int endIndex = XArray.GetClosestIndex(maxX, ArraySearchOption.Previous);
+
+ return Enumerable.Range(startingIndex, endIndex - startingIndex + 1).ToList();
+ }
+
public void ReplaceXbyApplyingFunction(Func convertor)
{
for (int i = 0; i < Size; i++)
@@ -796,7 +814,12 @@ private MzPeak GetPeak(int index)
return peakList[index];
}
- private MzPeak GeneratePeak(int index)
+ ///
+ /// The source of all peaks which populate the peakList
+ ///
+ ///
+ ///
+ protected virtual MzPeak GeneratePeak(int index)
{
return new MzPeak(XArray[index], YArray[index]);
}
diff --git a/mzLib/MassSpectrometry/MzSpectra/NeutralMassSpectrum.cs b/mzLib/MassSpectrometry/MzSpectra/NeutralMassSpectrum.cs
new file mode 100644
index 000000000..dcb5d7d2b
--- /dev/null
+++ b/mzLib/MassSpectrometry/MzSpectra/NeutralMassSpectrum.cs
@@ -0,0 +1,65 @@
+using System;
+using Chemistry;
+
+namespace MassSpectrometry
+{
+ public class NeutralMassSpectrum : MzSpectrum
+ {
+ public int[] Charges { get; init; }
+ public NeutralMassSpectrum(double[,] monoisotopicMassesIntensities, int[] charges) : base(monoisotopicMassesIntensities)
+ {
+ if (monoisotopicMassesIntensities.GetLength(0) != charges.Length)
+ throw new ArgumentException("The lengths of monoisotopicMasses, intensities, and charges must be the same.");
+
+ Charges = charges;
+
+ double minMz = double.MaxValue;
+ double maxMz = double.MinValue;
+ for (int i = 0; i < monoisotopicMassesIntensities.GetLength(0); i++)
+ {
+ var mz = monoisotopicMassesIntensities[i,0].ToMz(charges[i]);
+ if (mz < minMz)
+ minMz = mz;
+ if (mz > maxMz)
+ maxMz = mz;
+ }
+
+ FirstX = minMz;
+ LastX = maxMz;
+ }
+
+ public NeutralMassSpectrum(double[] monoisotopicMasses, double[] intensities, int[] charges, bool shouldCopy)
+ : base(monoisotopicMasses, intensities, shouldCopy)
+ {
+ if (monoisotopicMasses.GetLength(0) != intensities.Length || monoisotopicMasses.Length != charges.Length)
+ throw new ArgumentException("The lengths of monoisotopicMasses, intensities, and charges must be the same.");
+
+ Charges = charges;
+
+ double minMz = double.MaxValue;
+ double maxMz = double.MinValue;
+ for (int i = 0; i < monoisotopicMasses.Length; i++)
+ {
+ var mz = monoisotopicMasses[i].ToMz(charges[i]);
+ if (mz < minMz)
+ minMz = mz;
+ if (mz > maxMz)
+ maxMz = mz;
+ }
+
+ FirstX = minMz;
+ LastX = maxMz;
+ }
+
+ public override double? FirstX { get; } // in m/z
+ public override double? LastX { get; } // in m/z
+
+ ///
+ /// Converts to a charged spectrum
+ ///
+ protected override MzPeak GeneratePeak(int index)
+ {
+ return new MzPeak(XArray[index].ToMz(Charges[index]), YArray[index]);
+ }
+ }
+}
diff --git a/mzLib/MassSpectrometry/isodeclib.dll b/mzLib/MassSpectrometry/isodeclib.dll
new file mode 100644
index 000000000..a89f4ad96
Binary files /dev/null and b/mzLib/MassSpectrometry/isodeclib.dll differ
diff --git a/mzLib/MassSpectrometry/isogenmass.dll b/mzLib/MassSpectrometry/isogenmass.dll
new file mode 100644
index 000000000..072d3f72d
Binary files /dev/null and b/mzLib/MassSpectrometry/isogenmass.dll differ
diff --git a/mzLib/MassSpectrometry/libmmd.dll b/mzLib/MassSpectrometry/libmmd.dll
new file mode 100644
index 000000000..e8544358b
Binary files /dev/null and b/mzLib/MassSpectrometry/libmmd.dll differ
diff --git a/mzLib/MassSpectrometry/svml_dispmd.dll b/mzLib/MassSpectrometry/svml_dispmd.dll
new file mode 100644
index 000000000..147636b82
Binary files /dev/null and b/mzLib/MassSpectrometry/svml_dispmd.dll differ
diff --git a/mzLib/MzLibUtil/ClassExtensions.cs b/mzLib/MzLibUtil/ClassExtensions.cs
index 0129154a4..f725822f7 100644
--- a/mzLib/MzLibUtil/ClassExtensions.cs
+++ b/mzLib/MzLibUtil/ClassExtensions.cs
@@ -19,12 +19,93 @@
using System;
using System.Collections.Generic;
using System.Linq;
+using System.Runtime.CompilerServices;
using System.Text.RegularExpressions;
namespace MzLibUtil
{
public static class ClassExtensions
{
+ ///
+ /// Parses the full sequence to identify mods.
+ ///
+ /// Full sequence of the peptide in question
+ /// If true, the index of modifications at the N-terminus will be 0 (zero-based indexing). Otherwise, it is the index of the first amino acid (one-based indexing).
+ /// If true, the index of modifications at the C-terminus will be one more than the index of the last amino acid. Otherwise, it is the index of the last amino acid.
+ /// Dictionary with the key being the amino acid position of the mod and the value being the string representing the mod
+ public static Dictionary> ParseModifications(this string fullSequence, bool modOnNTerminus=false, bool modOnCTerminus=false)
+ {
+ // use a regex to get all modifications
+ string pattern = @"\[(.+?)\](?> modDict = new();
+
+ string fullSeq = fullSequence;
+ RemoveSpecialCharacters(ref fullSeq);
+ MatchCollection matches = regex.Matches(fullSeq);
+ int captureLengthSum = 0;
+ foreach (Match match in matches)
+ {
+ GroupCollection group = match.Groups;
+ string val = group[1].Value;
+ int startIndex = group[0].Index;
+ int captureLength = group[0].Length;
+
+ List modList = new List();
+ modList.Add(val);
+
+ // The position of the amino acids is tracked by the positionToAddToDict variable. It takes the
+ // startIndex of the modification Match and removes the cumulative length of the modifications
+ // found (including the brackets). The difference will be the number of nonmodification characters,
+ // or the number of amino acids prior to the startIndex in the sequence.
+ int positionToAddToDict = startIndex - captureLengthSum;
+
+ // Handle N terminus indexing
+ if ((positionToAddToDict == 0) && !modOnNTerminus)
+ {
+ positionToAddToDict++;
+ }
+
+ // Handle C terminus indexing
+ if ((fullSeq.Length == startIndex + captureLength) && modOnCTerminus)
+ {
+ positionToAddToDict++;
+ }
+
+ // check to see if key already exist
+ // if the already key exists, update the current position with the capture length + 1.
+ // otherwise, add the modification to the dict.
+ if (modDict.ContainsKey(positionToAddToDict))
+ {
+ modDict[positionToAddToDict].Add(val);
+ }
+ else
+ {
+ modDict.Add(positionToAddToDict, modList);
+ }
+ captureLengthSum += captureLength;
+ }
+ return modDict;
+ }
+
+ ///
+ /// Fixes an issue where the | appears and throws off the numbering if there are multiple mods on a single amino acid.
+ ///
+ ///
+ ///
+ ///
+ ///
+ public static void RemoveSpecialCharacters(ref string fullSequence, string replacement = @"", string specialCharacter = @"\|")
+ {
+ // next regex is used in the event that multiple modifications are on a missed cleavage Lysine (K)
+ Regex regexSpecialChar = new(specialCharacter);
+ fullSequence = regexSpecialChar.Replace(fullSequence, replacement);
+ }
+
public static double[] BoxCarSmooth(this double[] data, int points)
{
// Force to be odd
@@ -57,6 +138,18 @@ public static T[] SubArray(this T[] data, int index, int length)
return result;
}
+ public static bool ToEnum(this int modeInt, out T result) where T : Enum
+ {
+ Type enumType = typeof(T);
+ if (!Enum.IsDefined(enumType, modeInt))
+ {
+ result = default(T);
+ return false;
+ }
+ result = (T)Enum.ToObject(enumType, modeInt);
+ return true;
+ }
+
///
/// Checks if two collections are equivalent, regardless of the order of their contents
///
diff --git a/mzLib/MzLibUtil/MzLibException.cs b/mzLib/MzLibUtil/MzLibException.cs
index cf86074d8..885081433 100644
--- a/mzLib/MzLibUtil/MzLibException.cs
+++ b/mzLib/MzLibUtil/MzLibException.cs
@@ -1,13 +1,9 @@
-using System;
+#nullable enable
+using System;
namespace MzLibUtil
{
[Serializable]
- public class MzLibException : Exception
- {
- public MzLibException(string message)
- : base(message)
- {
- }
- }
+ public class MzLibException(string message, Exception? innerException = null)
+ : Exception(message, innerException);
}
\ No newline at end of file
diff --git a/mzLib/MzLibUtil/MzLibUtil.csproj b/mzLib/MzLibUtil/MzLibUtil.csproj
index c6b5cf526..60e7fb93f 100644
--- a/mzLib/MzLibUtil/MzLibUtil.csproj
+++ b/mzLib/MzLibUtil/MzLibUtil.csproj
@@ -11,10 +11,9 @@
-
+
-
diff --git a/mzLib/MzLibUtil/MzLibUtil.csproj.DotSettings b/mzLib/MzLibUtil/MzLibUtil.csproj.DotSettings
new file mode 100644
index 000000000..52f9c2892
--- /dev/null
+++ b/mzLib/MzLibUtil/MzLibUtil.csproj.DotSettings
@@ -0,0 +1,2 @@
+
+ True
\ No newline at end of file
diff --git a/mzLib/MzLibUtil/ObjectPools/DictionaryPool.cs b/mzLib/MzLibUtil/ObjectPools/DictionaryPool.cs
new file mode 100644
index 000000000..0e3efec5c
--- /dev/null
+++ b/mzLib/MzLibUtil/ObjectPools/DictionaryPool.cs
@@ -0,0 +1,94 @@
+using System;
+using System.Collections.Generic;
+using Microsoft.Extensions.ObjectPool;
+
+namespace MzLibUtil;
+
+// Example Usage:
+// var pool = new DictionaryPool();
+// var dictionary = pool.Get();
+// try {
+// dictionary.Add(1,1);
+// Do Work
+// }
+// finally {
+// pool.Return(dictionary);
+// }
+
+///
+/// Provides a pool for instances to reduce memory allocations.
+/// This class uses the from Microsoft.Extensions.ObjectPool
+/// to manage the pooling of objects.
+///
+/// The type of keys in the .
+/// The type of values in the .
+///
+/// This class is not thread-safe and should not be shared between threads.
+/// This class should be pulled from outside a try finally loop and finally should return the Dictionary to the pool to ensure proper pooling in the case of a caught exception.
+///
+public class DictionaryPool where TKey : notnull
+{
+ private readonly ObjectPool> _pool;
+
+ ///
+ /// Initializes a new instance of the class.
+ ///
+ /// Initial capacity for the pooled Dictionary instances.
+ public DictionaryPool(int initialCapacity = 16)
+ {
+ var policy = new DictionaryPooledObjectPolicy(initialCapacity);
+ var provider = new DefaultObjectPoolProvider { MaximumRetained = Environment.ProcessorCount * 2 };
+ _pool = provider.Create(policy);
+ }
+
+ ///
+ /// Retrieves a Dictionary instance from the pool.
+ ///
+ /// A Dictionary instance.
+ public Dictionary Get() => _pool.Get();
+
+ ///
+ /// Returns a Dictionary instance back to the pool.
+ ///
+ /// The Dictionary instance to return.
+ public void Return(Dictionary dictionary)
+ {
+ if (dictionary == null) throw new ArgumentNullException(nameof(dictionary));
+ dictionary.Clear(); // Ensure the Dictionary is clean before returning it to the pool
+ _pool.Return(dictionary);
+ }
+
+ ///
+ /// Policy for pooling Dictionary instances with a specified initial capacity.
+ ///
+ /// The type of keys in the Dictionary.
+ /// The type of values in the Dictionary.
+ /// The initial capacity for the pooled Dictionary instances.
+ private class DictionaryPooledObjectPolicy(int initialCapacity)
+ : PooledObjectPolicy>
+ where TKeyItem : notnull
+ {
+ private int InitialCapacity { get; } = initialCapacity;
+
+ ///
+ /// Creates a new Dictionary instance with the specified initial capacity.
+ ///
+ /// A new Dictionary instance.
+ public override Dictionary Create()
+ {
+ return new Dictionary(capacity: InitialCapacity);
+ }
+
+ ///
+ /// Returns a Dictionary instance to the pool after clearing it.
+ ///
+ /// The Dictionary instance to return.
+ /// True if the Dictionary instance can be reused; otherwise, false.
+ public override bool Return(Dictionary obj)
+ {
+ // Ensure the Dictionary can be safely reused
+ obj.Clear();
+ return true;
+ }
+ }
+}
diff --git a/mzLib/MzLibUtil/ObjectPools/HashSetPool.cs b/mzLib/MzLibUtil/ObjectPools/HashSetPool.cs
new file mode 100644
index 000000000..ae33ffd99
--- /dev/null
+++ b/mzLib/MzLibUtil/ObjectPools/HashSetPool.cs
@@ -0,0 +1,88 @@
+using System;
+using System.Collections.Generic;
+using Microsoft.Extensions.ObjectPool;
+
+namespace MzLibUtil;
+
+
+// Example Usage:
+// var pool = new HashSetPool();
+// var hashSet = pool.Get();
+// try {
+// hashSet.Add(1);
+// Do Work
+// }
+// finally {
+// pool.Return(hashSet);
+// }
+
+///
+/// Provides a pool for instances to reduce memory allocations.
+/// This class uses the from Microsoft.Extensions.ObjectPool
+/// to manage the pooling of objects.
+///
+/// The type of elements in the .
+///
+/// This class is not thread-safe and should not be shared between threads.
+/// This class should be pulled from outside a try finally loop and finally should return the HashSet to the pool to ensure proper pooling in the case of a caught exception
+/// See example found in DigestionAgent.GetDigestionSiteIndices() for proper usage
+///
+public class HashSetPool
+{
+ private readonly ObjectPool> _pool;
+
+ ///
+ /// Initializes a new instance of the class.
+ ///
+ /// Initial capacity for the pooled HashSet instances.
+ public HashSetPool(int initialCapacity = 16)
+ {
+ var policy = new HashSetPooledObjectPolicy(initialCapacity);
+ _pool = new DefaultObjectPool>(policy);
+ }
+
+ ///
+ /// Retrieves a instance from the pool.
+ ///
+ /// A instance.
+ public HashSet Get() => _pool.Get();
+
+ ///
+ /// Returns a instance back to the pool.
+ ///
+ /// The instance to return.
+ public void Return(HashSet hashSet)
+ {
+ if (hashSet == null) throw new ArgumentNullException(nameof(hashSet));
+ hashSet.Clear(); // Ensure the HashSet is clean before returning it to the pool
+ _pool.Return(hashSet);
+ }
+
+ ///
+ /// Defines the policy for creating and returning instances to the pool.
+ ///
+ /// The type of elements in the .
+ private class HashSetPooledObjectPolicy(int initialCapacity) : PooledObjectPolicy>
+ {
+ ///
+ /// Creates a new instance with the specified initial capacity.
+ ///
+ /// A new instance.
+ public override HashSet Create()
+ {
+ return new HashSet(capacity: initialCapacity);
+ }
+
+ ///
+ /// Returns a instance to the pool after clearing it.
+ ///
+ /// The instance to return.
+ /// Always returns true.
+ public override bool Return(HashSet obj)
+ {
+ // Ensure the HashSet can be safely reused
+ obj.Clear();
+ return true;
+ }
+ }
+}
diff --git a/mzLib/MzLibUtil/ObjectPools/ListPool.cs b/mzLib/MzLibUtil/ObjectPools/ListPool.cs
new file mode 100644
index 000000000..9f25a7926
--- /dev/null
+++ b/mzLib/MzLibUtil/ObjectPools/ListPool.cs
@@ -0,0 +1,90 @@
+using System;
+using System.Collections.Generic;
+using Microsoft.Extensions.ObjectPool;
+
+namespace MzLibUtil;
+
+// Example Usage:
+// var pool = new ListPool();
+// var list = pool.Get();
+// try {
+// list.Add(1);
+// Do Work
+// }
+// finally {
+// pool.Return(list);
+// }
+
+///
+/// Provides a pool for instances to reduce memory allocations.
+/// This class uses the from Microsoft.Extensions.ObjectPool
+/// to manage the pooling of objects.
+///
+/// The type of elements in the .
+///
+/// This class is not thread-safe and should not be shared between threads.
+/// This class should be pulled from outside a try finally loop and finally should return the List to the pool to ensure proper pooling in the case of a caught exception.
+///
+public class ListPool
+{
+ private readonly ObjectPool> _pool;
+
+ ///
+ /// Initializes a new instance of the class.
+ ///
+ /// Initial capacity for the pooled HashSet instances.
+ public ListPool(int initialCapacity = 16)
+ {
+ var policy = new ListPooledObjectPolicy(initialCapacity);
+ var provider = new DefaultObjectPoolProvider { MaximumRetained = Environment.ProcessorCount * 2 };
+ _pool = provider.Create(policy);
+ }
+
+ ///
+ /// Retrieves a HashSet instance from the pool.
+ ///
+ /// A HashSet instance.
+ public List Get() => _pool.Get();
+
+ ///
+ /// Returns a HashSet instance back to the pool.
+ ///
+ /// The HashSet instance to return.
+ public void Return(List list)
+ {
+ if (list == null) throw new ArgumentNullException(nameof(list));
+ list.Clear(); // Ensure the HashSet is clean before returning it to the pool
+ _pool.Return(list);
+ }
+
+ ///
+ /// Policy for pooling List instances with a specified initial capacity.
+ ///
+ /// The type of elements in the List.
+ /// The initial capacity for the pooled List instances.
+ private class ListPooledObjectPolicy(int initialCapacity) : PooledObjectPolicy>
+ {
+ private int InitialCapacity { get; } = initialCapacity;
+
+ ///
+ /// Creates a new List instance with the specified initial capacity.
+ ///
+ /// A new List instance.
+ public override List Create()
+ {
+ return new List(capacity: InitialCapacity);
+ }
+
+ ///
+ /// Resets the List instance to a clean state before returning it to the pool.
+ ///
+ /// The List instance to reset and return.
+ /// True if the List instance can be returned to the pool; otherwise, false.
+ public override bool Return(List obj)
+ {
+ // Ensure the List can be safely reused
+ obj.Clear();
+ return true;
+ }
+ }
+}
\ No newline at end of file
diff --git a/mzLib/MzLibUtil/PositionFrequencyAnalysis.cs b/mzLib/MzLibUtil/PositionFrequencyAnalysis.cs
new file mode 100644
index 000000000..cdb7d33fb
--- /dev/null
+++ b/mzLib/MzLibUtil/PositionFrequencyAnalysis.cs
@@ -0,0 +1,203 @@
+using System;
+using System.Collections.Generic;
+using System.Text.RegularExpressions;
+using Easy.Common.Extensions;
+
+namespace MzLibUtil
+{
+ // Should this have all of the parent data (i.e. protein group, protein, peptide, peptide position)? Unnecessary for now, but probably useful later.
+ public class UtilModification
+ {
+ public string IdWithMotif { get; set; }
+ public int PeptidePositionZeroIsNTerminus { get; set; } //NEED TO ENFORCE THIS EVERYWHERE OR CHECK IF ZERO OR ONE
+
+
+ public double Intensity { get; set; }
+
+ public UtilModification(string name, int position, double intensity)
+ {
+ IdWithMotif = name;
+ PeptidePositionZeroIsNTerminus = position;
+ Intensity = intensity;
+ }
+
+ }
+ public class UtilPeptide
+ {
+ public string FullSequence { get; set; }
+ public string BaseSequence { get; set; }
+ public UtilProtein ParentProtein { get; set; }
+ public int IndexInProtein { get; set; }
+ public Dictionary> ModifiedAminoAcidPositions { get; set; }
+ public double Intensity { get; set; }
+
+ public UtilPeptide(string fullSequence, Dictionary> mods = null)
+ {
+ FullSequence = fullSequence;
+ ModifiedAminoAcidPositions = mods.IsNotNullOrEmpty() ? mods : new Dictionary>();
+ SetBaseSequence();
+ }
+ public void SetBaseSequence(string modPattern = @"\[(.+?)\](? mods)
+ {
+ throw new NotImplementedException();
+ }
+ public void PeptideToProteinPositions(int offset=0)
+ {
+ offset = offset != 0 ? offset : ParentProtein.Sequence.IndexOf(BaseSequence);
+ foreach (var modpos in ModifiedAminoAcidPositions.Keys)
+ {
+ int positionInProtein = modpos + offset;
+ Dictionary mods = ModifiedAminoAcidPositions[modpos];
+ foreach (var mod in mods.Values)
+ {
+ mod.PeptidePositionZeroIsNTerminus = positionInProtein;
+ }
+ ModifiedAminoAcidPositions.Add(positionInProtein, mods);
+ ModifiedAminoAcidPositions.Remove(modpos);
+ }
+ }
+ }
+
+ public class UtilProtein
+ {
+ public string Name { get; set; }
+ public string Sequence { get; set; }
+ public Dictionary Peptides { get; set; }
+
+ public UtilProtein(string name, Dictionary peptides=null)
+ {
+ Name = name;
+ if (peptides != null) Peptides = peptides;
+ else Peptides= new Dictionary();
+ }
+ }
+
+ public class UtilProteinGroup
+ {
+ public string Name { get; set;}
+ public Dictionary Proteins { get; set; }
+
+ public UtilProteinGroup(string name, Dictionary proteins = null)
+ {
+ Name = name;
+ if (proteins != null) Proteins = proteins;
+ else Proteins= new Dictionary();
+ }
+ }
+ public class PositionFrequencyAnalysis
+ {
+ ///
+ /// Calculates the occupancy of post-translational modifications at the peptide level.
+ ///
+ /// A List of Tuples whose entries are ordered as (string FullSequence, string BaseSequence, List ProteinGroups, Intensity) for each peptide.
+ /// If true, the index of modifications at the N-terminus will be 0 (zero-based indexing). Otherwise, it is the index of the first amino acid (one-based indexing).
+ /// If true, the index of modifications at the C-terminus will be one more than the index of the last amino acid. Otherwise, it is the index of the last amino acid.
+ /// A nested dictionary whose key mappings are as follows: string ProteinGroup-> string Protein-> string BaseSequence-> int ModifiedAminoAcidIndex-> string ModificationName-> double Intensity
+ /// Note: Each BaseSequence dictionary contains a ModifiedAminoAcidIndex key of -1 that then contains a ModificationName key called "Total" that is used to track the total intensity observed for
+ /// all of the amino acids in that peptide.
+ ///
+
+ public Dictionary Occupancy { get; private set; }
+ public string OccupancyLevel { get; private set; }
+
+
+ public void PeptidePTMOccupancy(List, double>> peptides, bool modOnNTerminus = true, bool modOnCTerminus = true)
+ {
+ var proteinGroups = new Dictionary();
+
+ // Go through the peptides given
+ foreach (var pep in peptides)
+ {
+ string fullSeq = pep.Item1;
+ string baseSeq = pep.Item2.IsNotNullOrEmpty() ? pep.Item2 : new string(pep.Item1.ToCharArray());
+ ClassExtensions.RemoveSpecialCharacters(ref baseSeq, @"", @"\[(.+?)\](? pgs = pep.Item3;
+ double peptideIntensity = pep.Item4;
+
+ // Go through the peptide's protein groups
+ foreach (var pgName in pgs)
+ {
+ // If have not seen that protein group, store it
+ if (!proteinGroups.ContainsKey(pgName))
+ {
+ proteinGroups[pgName] = new UtilProteinGroup(pgName);
+ }
+ var proteinGroup = proteinGroups[pgName];
+
+ // Go through the proteins in each protein group
+ foreach (var proteinName in pgName.Split('|'))
+ {
+ // Add the protein to the protein group's dictionary if it has not been added
+ if (!proteinGroup.Proteins.ContainsKey(proteinName))
+ {
+ proteinGroup.Proteins[proteinName] = new UtilProtein(proteinName);
+ }
+ var protein = proteinGroup.Proteins[proteinName];
+
+ // If the peptide's base sequence has not been seen, add it to the protein's dictionary
+ if (!protein.Peptides.ContainsKey(baseSeq))
+ {
+ protein.Peptides[baseSeq] = new UtilPeptide(fullSeq);
+ protein.Peptides[baseSeq].Intensity = 0;
+ }
+
+ // Increase the total intensity of the peptide base sequence to track the total intensity of all amino acids in that sequence
+ protein.Peptides[baseSeq].Intensity += peptideIntensity;
+ var peptide = protein.Peptides[baseSeq];
+
+ // Want both arguments passed here to be true if need to later filter out peptide terminal mods that are not protein terminal mods
+ Dictionary> peptideMods = fullSeq.ParseModifications(modOnNTerminus, modOnCTerminus);
+ // Go through the modified positions found froum the full sequence
+ foreach (var modpos in peptideMods)
+ {
+ // If that position has not been recorded as containing a modification, add it to the base sequence's dictonary
+ if (!peptide.ModifiedAminoAcidPositions.ContainsKey(modpos.Key))
+ {
+ peptide.ModifiedAminoAcidPositions[modpos.Key] = new Dictionary();
+ }
+ var modifiedPosition = peptide.ModifiedAminoAcidPositions[modpos.Key];
+
+ // Go through the modifications found at a modified amino acid index
+ foreach (var mod in modpos.Value)
+ {
+ //If the name of that modification has not been seen, record that modification in the index's dictionary with an intensity of 0
+ if (!modifiedPosition.ContainsKey(mod))
+ {
+ modifiedPosition[mod] = new UtilModification(mod, modpos.Key, 0);
+ }
+ // Increase the intensity of the modification by the intensity of the peptide
+ modifiedPosition[mod].Intensity += peptideIntensity;
+ }
+ }
+ }
+ }
+ }
+ Occupancy = proteinGroups;
+ OccupancyLevel = "peptide";
+ }
+
+ public void PeptideToProteinPTMOccupancy(Dictionary proteinSequences) // combine this to previous method.
+ {
+ foreach (var pg in Occupancy.Keys)
+ {
+ UtilProteinGroup proteinGroup = Occupancy[pg];
+ foreach (var prot in proteinGroup.Proteins.Keys)
+ {
+ UtilProtein protein = proteinGroup.Proteins[prot];
+ foreach (var pep in protein.Peptides.Keys)
+ {
+ UtilPeptide peptide = protein.Peptides[pep];
+ peptide.ParentProtein = protein;
+ peptide.PeptideToProteinPositions();
+ }
+ }
+ }
+ OccupancyLevel = "protein";
+ }
+ }
+}
diff --git a/mzLib/Omics/BioPolymerWithSetModsExtensions.cs b/mzLib/Omics/BioPolymerWithSetModsExtensions.cs
index 2e5d29718..20d0e7abe 100644
--- a/mzLib/Omics/BioPolymerWithSetModsExtensions.cs
+++ b/mzLib/Omics/BioPolymerWithSetModsExtensions.cs
@@ -18,9 +18,9 @@ public static string FullSequenceWithMassShift(this IBioPolymerWithSetMods withS
var subsequence = new StringBuilder();
// modification on peptide N-terminus
- if (withSetMods.AllModsOneIsNterminus.TryGetValue(1, out Modification mod))
+ if (withSetMods.AllModsOneIsNterminus.TryGetValue(1, out Modification? mod))
{
- subsequence.Append('[' + mod.MonoisotopicMass.RoundedDouble(6).ToString() + ']');
+ subsequence.Append($"[{mod.MonoisotopicMass.RoundedDouble(6)}]");
}
for (int r = 0; r < withSetMods.Length; r++)
@@ -32,11 +32,11 @@ public static string FullSequenceWithMassShift(this IBioPolymerWithSetMods withS
{
if (mod.MonoisotopicMass > 0)
{
- subsequence.Append("[+" + mod.MonoisotopicMass.RoundedDouble(6).ToString() + ']');
+ subsequence.Append($"[+{mod.MonoisotopicMass.RoundedDouble(6)}]");
}
else
{
- subsequence.Append("[" + mod.MonoisotopicMass.RoundedDouble(6).ToString() + ']');
+ subsequence.Append($"[{mod.MonoisotopicMass.RoundedDouble(6)}]");
}
}
}
@@ -46,11 +46,11 @@ public static string FullSequenceWithMassShift(this IBioPolymerWithSetMods withS
{
if (mod.MonoisotopicMass > 0)
{
- subsequence.Append("[+" + mod.MonoisotopicMass.RoundedDouble(6).ToString() + ']');
+ subsequence.Append($"[+{mod.MonoisotopicMass.RoundedDouble(6)}]");
}
else
{
- subsequence.Append("[" + mod.MonoisotopicMass.RoundedDouble(6).ToString() + ']');
+ subsequence.Append($"[{mod.MonoisotopicMass.RoundedDouble(6)}]");
}
}
return subsequence.ToString();
@@ -68,14 +68,15 @@ public static string EssentialSequence(this IBioPolymerWithSetMods withSetMods,
string essentialSequence = withSetMods.BaseSequence;
if (modstoWritePruned != null)
{
- var sbsequence = new StringBuilder();
+ var sbsequence = new StringBuilder(withSetMods.FullSequence.Length);
// variable modification on peptide N-terminus
if (withSetMods.AllModsOneIsNterminus.TryGetValue(1, out Modification pep_n_term_variable_mod))
{
if (modstoWritePruned.ContainsKey(pep_n_term_variable_mod.ModificationType))
{
- sbsequence.Append('[' + pep_n_term_variable_mod.ModificationType + ":" + pep_n_term_variable_mod.IdWithMotif + ']');
+ sbsequence.Append(
+ $"[{pep_n_term_variable_mod.ModificationType}:{pep_n_term_variable_mod.IdWithMotif}]");
}
}
for (int r = 0; r < withSetMods.Length; r++)
@@ -86,7 +87,8 @@ public static string EssentialSequence(this IBioPolymerWithSetMods withSetMods,
{
if (modstoWritePruned.ContainsKey(residue_variable_mod.ModificationType))
{
- sbsequence.Append('[' + residue_variable_mod.ModificationType + ":" + residue_variable_mod.IdWithMotif + ']');
+ sbsequence.Append(
+ $"[{residue_variable_mod.ModificationType}:{residue_variable_mod.IdWithMotif}]");
}
}
}
@@ -96,7 +98,8 @@ public static string EssentialSequence(this IBioPolymerWithSetMods withSetMods,
{
if (modstoWritePruned.ContainsKey(pep_c_term_variable_mod.ModificationType))
{
- sbsequence.Append('[' + pep_c_term_variable_mod.ModificationType + ":" + pep_c_term_variable_mod.IdWithMotif + ']');
+ sbsequence.Append(
+ $"[{pep_c_term_variable_mod.ModificationType}:{pep_c_term_variable_mod.IdWithMotif}]");
}
}
@@ -112,12 +115,13 @@ public static string EssentialSequence(this IBioPolymerWithSetMods withSetMods,
///
public static string DetermineFullSequence(this IBioPolymerWithSetMods withSetMods)
{
- var subSequence = new StringBuilder();
+ // start string builder with initial capacity to avoid resizing costs.
+ var subSequence = new StringBuilder(withSetMods.BaseSequence.Length + withSetMods.AllModsOneIsNterminus.Count * 30);
// modification on peptide N-terminus
- if (withSetMods.AllModsOneIsNterminus.TryGetValue(1, out Modification mod))
+ if (withSetMods.AllModsOneIsNterminus.TryGetValue(1, out Modification? mod))
{
- subSequence.Append('[' + mod.ModificationType + ":" + mod.IdWithMotif + ']');
+ subSequence.Append($"[{mod.ModificationType}:{mod.IdWithMotif}]");
}
for (int r = 0; r < withSetMods.Length; r++)
@@ -127,14 +131,14 @@ public static string DetermineFullSequence(this IBioPolymerWithSetMods withSetMo
// modification on this residue
if (withSetMods.AllModsOneIsNterminus.TryGetValue(r + 2, out mod))
{
- subSequence.Append('[' + mod.ModificationType + ":" + mod.IdWithMotif + ']');
+ subSequence.Append($"[{mod.ModificationType}:{mod.IdWithMotif}]");
}
}
// modification on peptide C-terminus
if (withSetMods.AllModsOneIsNterminus.TryGetValue(withSetMods.Length + 2, out mod))
{
- subSequence.Append('[' + mod.ModificationType + ":" + mod.IdWithMotif + ']');
+ subSequence.Append($"[{mod.ModificationType}:{mod.IdWithMotif}]");
}
return subSequence.ToString();
diff --git a/mzLib/Omics/Digestion/DigestionAgent.cs b/mzLib/Omics/Digestion/DigestionAgent.cs
index d860659b9..734d5e65f 100644
--- a/mzLib/Omics/Digestion/DigestionAgent.cs
+++ b/mzLib/Omics/Digestion/DigestionAgent.cs
@@ -1,14 +1,12 @@
-using Omics.Modifications;
-using System;
-using System.Collections.Generic;
-using System.Linq;
-using System.Text;
-using System.Threading.Tasks;
+using MzLibUtil;
+using Omics.Modifications;
namespace Omics.Digestion
{
public abstract class DigestionAgent
{
+ protected static readonly HashSetPool HashSetPool = new HashSetPool(8);
+
protected DigestionAgent(string name, CleavageSpecificity cleavageSpecificity, List motifList, Modification cleavageMod)
{
Name = name;
@@ -17,7 +15,7 @@ protected DigestionAgent(string name, CleavageSpecificity cleavageSpecificity, L
CleavageMod = cleavageMod;
}
- public string Name { get; init; }
+ public readonly string Name;
public CleavageSpecificity CleavageSpecificity { get; init; }
public List DigestionMotifs { get; init; }
public Modification CleavageMod { get; set; }
@@ -27,6 +25,16 @@ public override string ToString()
return Name;
}
+ public override bool Equals(object? obj)
+ {
+ return obj is DigestionAgent agent && agent.Name == Name;
+ }
+
+ public override int GetHashCode()
+ {
+ return Name.GetHashCode();
+ }
+
///
/// Is length of given peptide okay, given minimum and maximum?
///
@@ -68,40 +76,48 @@ protected static bool ValidMaxLength(int? length, int maxLength)
///
public List GetDigestionSiteIndices(string sequence)
{
- var indices = new List();
-
- for (int r = 0; r < sequence.Length; r++)
+ var indices = HashSetPool.Get(); // use hash set to ensure no duplicates
+ try // Try block is to ensure that, even if an error gets thrown, the hashset is returned to the pool
{
- var cutSiteIndex = -1;
- bool cleavagePrevented = false;
+ indices.Add(0); // The start of the protein is treated as a cleavage site to retain the n-terminal peptide
- foreach (DigestionMotif motif in DigestionMotifs)
+ for (int r = 0; r < sequence.Length; r++)
{
- var motifResults = motif.Fits(sequence, r);
- bool motifFits = motifResults.Item1;
- bool motifPreventsCleavage = motifResults.Item2;
+ var cutSiteIndex = -1;
+ bool cleavagePrevented = false;
- if (motifFits && r + motif.CutIndex < sequence.Length)
+ foreach (DigestionMotif motif in DigestionMotifs)
{
- cutSiteIndex = Math.Max(r + motif.CutIndex, cutSiteIndex);
+ var motifResults = motif.Fits(sequence, r);
+ bool motifFits = motifResults.Item1;
+ bool motifPreventsCleavage = motifResults.Item2;
+
+ if (motifFits && r + motif.CutIndex < sequence.Length)
+ {
+ cutSiteIndex = Math.Max(r + motif.CutIndex, cutSiteIndex);
+ }
+
+ if (motifPreventsCleavage) // if any motif prevents cleave
+ {
+ cleavagePrevented = true;
+ }
}
- if (motifPreventsCleavage) // if any motif prevents cleave
+ // if no motif prevents cleave
+ if (!cleavagePrevented && cutSiteIndex != -1)
{
- cleavagePrevented = true;
+ indices.Add(cutSiteIndex);
}
}
- // if no motif prevents cleave
- if (!cleavagePrevented && cutSiteIndex != -1)
- {
- indices.Add(cutSiteIndex);
- }
+ indices.Add(sequence.Length); // The end of the protein is treated as a cleavage site to retain the c-terminal peptide
+ return indices.ToList(); // convert the hashset to a list for return.
+ }
+ finally
+ {
+ // return hashset to pool. This clears it and gets it ready for the next time it is needed from the pool.
+ HashSetPool.Return(indices);
}
-
- indices.Add(0); // The start of the protein is treated as a cleavage site to retain the n-terminal peptide
- indices.Add(sequence.Length); // The end of the protein is treated as a cleavage site to retain the c-terminal peptide
- return indices.Distinct().OrderBy(i => i).ToList();
}
}
}
diff --git a/mzLib/Omics/Digestion/DigestionProduct.cs b/mzLib/Omics/Digestion/DigestionProduct.cs
index 55aed3255..b43980e23 100644
--- a/mzLib/Omics/Digestion/DigestionProduct.cs
+++ b/mzLib/Omics/Digestion/DigestionProduct.cs
@@ -1,15 +1,13 @@
-using System;
-using System.Collections.Generic;
-using System.ComponentModel;
-using System.Linq;
-using System.Text;
-using System.Threading.Tasks;
+using MzLibUtil;
using Omics.Modifications;
namespace Omics.Digestion
{
public abstract class DigestionProduct
{
+ protected static readonly DictionaryPool> DictionaryPool = new();
+ protected static readonly DictionaryPool FixedModDictionaryPool = new(8);
+
protected string _baseSequence;
protected DigestionProduct(IBioPolymer parent, int oneBasedStartResidue, int oneBasedEndResidue, int missedCleavages,
@@ -41,33 +39,66 @@ protected DigestionProduct(IBioPolymer parent, int oneBasedStartResidue, int one
public int Length => BaseSequence.Length; //how many residues long the peptide is
public char this[int zeroBasedIndex] => BaseSequence[zeroBasedIndex];
- protected static IEnumerable> GetVariableModificationPatterns(Dictionary> possibleVariableModifications, int maxModsForPeptide, int peptideLength)
+ #region Digestion Helper Methods
+
+ ///
+ /// Generates all possible variable modification patterns for a peptide, which includes variable and localized modifications but excludes fixed mods
+ ///
+ /// A dictionary of possible variable modifications with their positions.
+ /// The maximum number of modifications allowed for the peptide.
+ /// The length of the peptide.
+ /// An enumerable of dictionaries representing different modification patterns.
+ ///
+ /// This method generates all possible combinations of variable modifications for a given peptide.
+ /// It first calculates the total number of available modifications and the maximum number of variable modifications allowed.
+ /// Then, it iterates through all possible numbers of modifications and generates the corresponding modification patterns.
+ /// The returned dictionary is then appended with fixed modifications and used to construct a peptide with set mods
+ ///
+ protected static IEnumerable> GetVariableModificationPatterns(Dictionary> possibleVariableModifications, int maxModsForPeptide, int peptideLength)
{
- if (possibleVariableModifications.Count == 0)
- {
- yield return null;
- }
- else
- {
- var possible_variable_modifications = new Dictionary>(possibleVariableModifications);
+ if (possibleVariableModifications.Count <= 0)
+ yield break;
+
+ int[] baseVariableModificationPattern = new int[peptideLength + 4];
+ int totalAvailableMods = possibleVariableModifications.Values.Sum(modList => modList?.Count ?? 0);
+ int maxVariableMods = Math.Min(totalAvailableMods, maxModsForPeptide);
+ var variableModKvpList = possibleVariableModifications.ToList();
- int[] base_variable_modification_pattern = new int[peptideLength + 4];
- var totalAvailableMods = possible_variable_modifications.Sum(b => b.Value == null ? 0 : b.Value.Count);
- for (int variable_modifications = 0; variable_modifications <= Math.Min(totalAvailableMods, maxModsForPeptide); variable_modifications++)
+ for (int variable_modifications = 0; variable_modifications <= maxVariableMods; variable_modifications++)
+ {
+ foreach (int[] variable_modification_pattern in GetVariableModificationPatternsRecursive(variableModKvpList,
+ possibleVariableModifications.Count - variable_modifications, baseVariableModificationPattern, 0))
{
- foreach (int[] variable_modification_pattern in GetVariableModificationPatterns(new List>>(possible_variable_modifications),
- possible_variable_modifications.Count - variable_modifications, base_variable_modification_pattern, 0))
+ // use modification pattern to construct a dictionary of modifications for the peptide
+ var modificationPattern = new Dictionary(possibleVariableModifications.Count);
+
+ foreach (var variableModSet in possibleVariableModifications)
{
- yield return GetNewVariableModificationPattern(variable_modification_pattern, possible_variable_modifications);
+ int modIndex = variable_modification_pattern[variableModSet.Key] - 1;
+ if (modIndex >= 0)
+ {
+ modificationPattern.Add(variableModSet.Key, variableModSet.Value.ElementAt(modIndex));
+ }
}
+
+ yield return modificationPattern;
}
}
}
- protected Dictionary GetFixedModsOneIsNorFivePrimeTerminus(int length,
- IEnumerable allKnownFixedModifications)
+ ///
+ /// Sets the fixed modifications for the peptide, considering the N-terminal and C-terminal positions, by populating the dictionary.
+ ///
+ /// The length of the peptide.
+ /// A collection of all known fixed modifications.
+ /// A reference to a dictionary that will hold the fixed modifications, with the key representing the position.
+ ///
+ /// This method iterates through all known fixed modifications and assigns them to the appropriate positions in the peptide.
+ /// It considers different location restrictions such as N-terminal, C-terminal, and anywhere within the peptide.
+ ///
+ protected void PopulateFixedModsOneIsNorFivePrimeTerminus(int length,
+ IEnumerable allKnownFixedModifications, in Dictionary fixedModsOneIsNterminus)
{
- var fixedModsOneIsNterminus = new Dictionary(length + 3);
foreach (Modification mod in allKnownFixedModifications)
{
switch (mod.LocationRestriction)
@@ -76,18 +107,28 @@ protected Dictionary GetFixedModsOneIsNorFivePrimeTerminus(in
case "Oligo 5'-terminal.":
case "N-terminal.":
case "Peptide N-terminal.":
- //the modification is protease associated and is applied to the n-terminal cleaved residue, not at the beginign of the protein
- if (mod.ModificationType == "Protease" && ModificationLocalization.ModFits(mod, Parent.BaseSequence, 1, length, OneBasedStartResidue))
+ //the modification is protease associated and is applied to the n-terminal cleaved residue, not at the beginning of the protein
+ if (ModificationLocalization.ModFits(mod, Parent.BaseSequence, 1, length, OneBasedStartResidue))
{
- if (OneBasedStartResidue != 1)
+ if (mod.ModificationType == "Protease") // Protease N-terminal or 5' modification
{
- fixedModsOneIsNterminus[2] = mod;
+ if (OneBasedStartResidue != 1)
+ fixedModsOneIsNterminus[2] = mod;
+ }
+ else if (OneBasedStartResidue == 1) // Modified BioPolymer Start Residue (e.g. Protein N-Terminal)
+ {
+ if (!fixedModsOneIsNterminus.TryAdd(1, mod)) // Check if a protein N-terminal mod is already present
+ {
+ if (mod.LocationRestriction is "N-terminal." or "5'-terminal.") // Only overwrite if new mod is N-terminal, not peptide N-terminal
+ {
+ fixedModsOneIsNterminus[1] = mod;
+ }
+ }
+ }
+ else //Normal N-terminal peptide modification
+ {
+ fixedModsOneIsNterminus[1] = mod;
}
- }
- //Normal N-terminal peptide modification
- else if (ModificationLocalization.ModFits(mod, Parent.BaseSequence, 1, length, OneBasedStartResidue))
- {
- fixedModsOneIsNterminus[1] = mod;
}
break;
@@ -106,17 +147,27 @@ protected Dictionary GetFixedModsOneIsNorFivePrimeTerminus(in
case "C-terminal.":
case "Peptide C-terminal.":
//the modification is protease associated and is applied to the c-terminal cleaved residue, not if it is at the end of the protein
- if (mod.ModificationType == "Protease" && ModificationLocalization.ModFits(mod, Parent.BaseSequence, length, length, OneBasedStartResidue + length - 1))
+ if (ModificationLocalization.ModFits(mod, Parent.BaseSequence, length, length, OneBasedStartResidue + length - 1))
{
- if (OneBasedEndResidue != Parent.Length)
+ if (mod.ModificationType == "Protease") // Protease N-terminal or 3' modification
{
- fixedModsOneIsNterminus[length + 1] = mod;
+ if (OneBasedEndResidue != Parent.Length)
+ fixedModsOneIsNterminus[length + 1] = mod;
+ }
+ else if (OneBasedEndResidue == Parent.Length) // Modified BioPolymer End Residue (e.g. Protein C-Terminal)
+ {
+ if (!fixedModsOneIsNterminus.TryAdd(length + 2, mod)) // Check if a protein C-terminal mod is already present
+ {
+ if (mod.LocationRestriction is "C-terminal." or "3'-terminal.") // Only overwrite if new mod is C-terminal, not peptide C-terminal
+ {
+ fixedModsOneIsNterminus[length + 2] = mod;
+ }
+ }
+ }
+ else //Normal C-terminal peptide modification
+ {
+ fixedModsOneIsNterminus[length + 2] = mod;
}
- }
- //Normal C-terminal peptide modification
- else if (ModificationLocalization.ModFits(mod, Parent.BaseSequence, length, length, OneBasedStartResidue + length - 1))
- {
- fixedModsOneIsNterminus[length + 2] = mod;
}
break;
@@ -124,11 +175,143 @@ protected Dictionary GetFixedModsOneIsNorFivePrimeTerminus(in
throw new NotSupportedException("This terminus localization is not supported.");
}
}
- return fixedModsOneIsNterminus;
}
+ ///
+ /// Populates the variable modifications dictionary from both the variable modifications and the localized mods from xml reading,
+ /// considering the N-terminal, C-terminal, and internal positions.
+ ///
+ /// A list of all variable modifications.
+ /// A reference to a dictionary that will hold the variable modifications, with the key representing the position.
+ ///
+ /// This method iterates through all variable modifications and assigns them to the appropriate positions in the peptide.
+ /// It considers different location restrictions such as N-terminal, C-terminal, and anywhere within the peptide.
+ ///
+ protected void PopulateVariableModifications(List allVariableMods, in Dictionary> twoBasedDictToPopulate)
+ {
+ int peptideLength = OneBasedEndResidue - OneBasedStartResidue + 1;
+ var pepNTermVariableMods = new SortedSet();
+ twoBasedDictToPopulate.Add(1, pepNTermVariableMods);
+
+ var pepCTermVariableMods = new SortedSet();
+ twoBasedDictToPopulate.Add(peptideLength + 2, pepCTermVariableMods);
+
+ // VARIABLE MODS
+ foreach (Modification variableModification in allVariableMods)
+ {
+ // Check if can be a n-term mod
+ if (CanBeNTerminalOrFivePrime(variableModification, peptideLength) && !ModificationLocalization.UniprotModExists(Parent, 1, variableModification))
+ {
+ pepNTermVariableMods.Add(variableModification);
+ }
- private static IEnumerable GetVariableModificationPatterns(List>> possibleVariableModifications,
+ for (int r = 0; r < peptideLength; r++)
+ {
+ if (ModificationLocalization.ModFits(variableModification, Parent.BaseSequence, r + 1, peptideLength, OneBasedStartResidue + r)
+ && variableModification.LocationRestriction == "Anywhere." && !ModificationLocalization.UniprotModExists(Parent, r + 1, variableModification))
+ {
+ if (!twoBasedDictToPopulate.TryGetValue(r + 2, out var residueVariableMods))
+ {
+ residueVariableMods = new SortedSet() { variableModification };
+ twoBasedDictToPopulate.Add(r + 2, residueVariableMods);
+ }
+ else
+ {
+ residueVariableMods.Add(variableModification);
+ }
+ }
+ }
+ // Check if can be a c-term mod
+ if (CanBeCTerminalOrThreePrime(variableModification, peptideLength) && !ModificationLocalization.UniprotModExists(Parent, peptideLength, variableModification))
+ {
+ pepCTermVariableMods.Add(variableModification);
+ }
+ }
+
+ // LOCALIZED MODS
+ foreach (var kvp in Parent.OneBasedPossibleLocalizedModifications)
+ {
+ bool inBounds = kvp.Key >= OneBasedStartResidue && kvp.Key <= OneBasedEndResidue;
+ if (!inBounds)
+ {
+ continue;
+ }
+
+ int locInPeptide = kvp.Key - OneBasedStartResidue + 1;
+ foreach (Modification modWithMass in kvp.Value)
+ {
+ if (modWithMass is not Modification variableModification)
+ continue;
+
+ // Check if can be a n-term mod
+ if (locInPeptide == 1 && CanBeNTerminalOrFivePrime(variableModification, peptideLength) && !Parent.IsDecoy)
+ {
+ pepNTermVariableMods.Add(variableModification);
+ }
+
+ int r = locInPeptide - 1;
+ if (r >= 0 && r < peptideLength
+ && (Parent.IsDecoy ||
+ (ModificationLocalization.ModFits(variableModification, Parent.BaseSequence, r + 1, peptideLength, OneBasedStartResidue + r)
+ && variableModification.LocationRestriction == "Anywhere.")))
+ {
+ if (!twoBasedDictToPopulate.TryGetValue(r + 2, out var residueVariableMods))
+ {
+ residueVariableMods = new SortedSet() { variableModification };
+ twoBasedDictToPopulate.Add(r + 2, residueVariableMods);
+ }
+ else
+ {
+ residueVariableMods.Add(variableModification);
+ }
+ }
+
+ // Check if can be a c-term mod
+ if (locInPeptide == peptideLength && CanBeCTerminalOrThreePrime(variableModification, peptideLength) && !Parent.IsDecoy)
+ {
+ pepCTermVariableMods.Add(variableModification);
+ }
+ }
+ }
+ }
+
+ ///
+ /// Appends fixed modifications to the variable modification pattern when no variable mod exists.
+ ///
+ /// The dictionary containing fixed modifications.
+ /// The dictionary containing the variable modification pattern.
+ /// The number of fixed modifications appended.
+ ///
+ /// This method iterates through the fixed modifications and adds them to the variable modification pattern
+ /// if they are not already present. The number of fixed modifications appended is returned via the out parameter.
+ ///
+ protected void AppendFixedModificationsToVariable(in Dictionary fixedModDict, in Dictionary variableModPattern, out int numFixedMods)
+ {
+ numFixedMods = 0;
+ foreach (var fixedModPattern in fixedModDict)
+ {
+ if (variableModPattern.ContainsKey(fixedModPattern.Key))
+ continue;
+ numFixedMods++;
+ variableModPattern.Add(fixedModPattern.Key, fixedModPattern.Value);
+ }
+ }
+
+ ///
+ /// Recursively generates all possible variable modification patterns for a peptide.
+ ///
+ /// A list of key-value pairs representing possible variable modifications and their positions.
+ /// The number of unmodified residues desired in the pattern.
+ /// An array representing the current modification pattern.
+ /// The current index in the list of possible modifications.
+ /// An enumerable of arrays representing different modification patterns. The array index corresponds to the location of the modification
+ /// in the peptide, while the value at that index determines which index in the list of modifications
+ /// to add to the final variable modification pattern
+ ///
+ /// This method uses recursion to generate all possible combinations of variable modifications for a given peptide.
+ /// It considers both modified and unmodified residues and generates patterns accordingly.
+ ///
+ private static IEnumerable GetVariableModificationPatternsRecursive(List>> possibleVariableModifications,
int unmodifiedResiduesDesired, int[] variableModificationPattern, int index)
{
if (index < possibleVariableModifications.Count - 1)
@@ -136,7 +319,7 @@ private static IEnumerable GetVariableModificationPatterns(List 0)
{
variableModificationPattern[possibleVariableModifications[index].Key] = 0;
- foreach (int[] new_variable_modification_pattern in GetVariableModificationPatterns(possibleVariableModifications,
+ foreach (int[] new_variable_modification_pattern in GetVariableModificationPatternsRecursive(possibleVariableModifications,
unmodifiedResiduesDesired - 1, variableModificationPattern, index + 1))
{
yield return new_variable_modification_pattern;
@@ -147,7 +330,7 @@ private static IEnumerable GetVariableModificationPatterns(List GetVariableModificationPatterns(List GetNewVariableModificationPattern(int[] variableModificationArray,
- IEnumerable>> possibleVariableModifications)
+ ///
+ /// Determines if a modification can be applied to the N-terminal or 5' end of the peptide.
+ ///
+ /// The modification to check.
+ /// The length of the peptide.
+ /// True if the modification can be applied to the N-terminal or 5' end; otherwise, false.
+ private bool CanBeNTerminalOrFivePrime(Modification mod, int peptideLength)
{
- var modification_pattern = new Dictionary();
-
- foreach (KeyValuePair> kvp in possibleVariableModifications)
- {
- if (variableModificationArray[kvp.Key] > 0)
- {
- modification_pattern.Add(kvp.Key, kvp.Value[variableModificationArray[kvp.Key] - 1]);
- }
- }
-
- return modification_pattern;
+ return mod.LocationRestriction is "5'-terminal." or "Oligo 5'-terminal." or "N-terminal." or "Peptide N-terminal."
+ && ModificationLocalization.ModFits(mod, Parent.BaseSequence, 1, peptideLength, OneBasedStartResidue);
}
+ ///
+ /// Determines if a modification can be applied to the C-terminal or 3' end of the peptide.
+ ///
+ /// The modification to check.
+ /// The length of the peptide.
+ /// True if the modification can be applied to the C-terminal or 3' end; otherwise, false.
+ private bool CanBeCTerminalOrThreePrime(Modification mod, int peptideLength)
+ {
+ return mod.LocationRestriction is "3'-terminal." or "Oligo 3'-terminal." or "C-terminal." or "Peptide C-terminal."
+ && ModificationLocalization.ModFits(mod, Parent.BaseSequence, peptideLength, peptideLength, OneBasedStartResidue + peptideLength - 1);
+ }
+ #endregion
}
}
diff --git a/mzLib/Omics/Fragmentation/FragmentationTerminus.cs b/mzLib/Omics/Fragmentation/FragmentationTerminus.cs
index 146309caa..788041690 100644
--- a/mzLib/Omics/Fragmentation/FragmentationTerminus.cs
+++ b/mzLib/Omics/Fragmentation/FragmentationTerminus.cs
@@ -1,19 +1,12 @@
-using System;
-using System.Collections.Generic;
-using System.Linq;
-using System.Text;
-using System.Threading.Tasks;
-
-namespace Omics.Fragmentation
+namespace Omics.Fragmentation
{
public enum FragmentationTerminus
- {
- Both, //N- and C-terminus
- N, //N-terminus only
- C, //C-terminus only
+ {
+ Both, //N- and C-terminus
+ N, //N-terminus only
+ C, //C-terminus only
None, //used for internal fragments, could be used for top down intact mass?
FivePrime, // 5' for NucleicAcids
ThreePrime, // 3' for NucleicAcids
- }
-
+ }
}
diff --git a/mzLib/Omics/Fragmentation/Oligo/DissociationTypeCollection.cs b/mzLib/Omics/Fragmentation/Oligo/DissociationTypeCollection.cs
index d5b020160..4302fadcb 100644
--- a/mzLib/Omics/Fragmentation/Oligo/DissociationTypeCollection.cs
+++ b/mzLib/Omics/Fragmentation/Oligo/DissociationTypeCollection.cs
@@ -1 +1,161 @@
-using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using Chemistry;
using MassSpectrometry;
namespace Omics.Fragmentation.Oligo
{
///
/// Methods dealing with specific product type for RNA molecules
///
public static class DissociationTypeCollection
{
///
/// Product Ion types by dissociation method
///
private static readonly Dictionary> ProductsFromDissociationType =
new Dictionary>()
{
{ DissociationType.Unknown, new List() },
{
DissociationType.CID,
new List { ProductType.aBaseLoss, ProductType.c, ProductType.dWaterLoss, ProductType.w, ProductType.y, ProductType.yWaterLoss, ProductType.M }
},
{ DissociationType.LowCID, new List() { } },
{ DissociationType.IRMPD, new List() { } },
{ DissociationType.ECD, new List { } },
{
DissociationType.PQD,
new List
{
ProductType.a, ProductType.aBaseLoss, ProductType.b, ProductType.c, ProductType.d, ProductType.dWaterLoss,
ProductType.w, ProductType.x, ProductType.y, ProductType.yWaterLoss, ProductType.d, ProductType.M
}
},
{ DissociationType.ETD, new List { } },
{
DissociationType.HCD,
new List { ProductType.w, ProductType.y, ProductType.aBaseLoss, ProductType.dWaterLoss, ProductType.M }
},
{ DissociationType.AnyActivationType, new List { } },
{ DissociationType.EThcD, new List { } },
{ DissociationType.Custom, new List { } },
{ DissociationType.ISCID, new List { } }
};
///
/// Returns list of products types based upon the dissociation type
///
///
///
public static List GetRnaProductTypesFromDissociationType(this DissociationType dissociationType) =>
ProductsFromDissociationType[dissociationType];
///
/// Mass to be added or subtracted
///
private static readonly Dictionary FragmentIonCaps =
new Dictionary
{
{ ProductType.a, ChemicalFormula.ParseFormula("H") },
{ ProductType.aWaterLoss, ChemicalFormula.ParseFormula("H-1O-1") },
{ ProductType.b, ChemicalFormula.ParseFormula("OH") },
{ ProductType.bWaterLoss, ChemicalFormula.ParseFormula("H-1") },
{ ProductType.c, ChemicalFormula.ParseFormula("O3H2P") },
{ ProductType.cWaterLoss, ChemicalFormula.ParseFormula("O2P") },
{ ProductType.d, ChemicalFormula.ParseFormula("O4H2P") },
{ ProductType.dWaterLoss, ChemicalFormula.ParseFormula("O3P") },
{ ProductType.w, ChemicalFormula.ParseFormula("H") },
{ ProductType.wWaterLoss, ChemicalFormula.ParseFormula("H-1O-1") },
{ ProductType.x, ChemicalFormula.ParseFormula("O-1H") },
{ ProductType.xWaterLoss, ChemicalFormula.ParseFormula("O-2H-1") },
{ ProductType.y, ChemicalFormula.ParseFormula("O-3P-1") },
{ ProductType.yWaterLoss, ChemicalFormula.ParseFormula("O-4H-2P-1") },
{ ProductType.z, ChemicalFormula.ParseFormula("O-4P-1") },
{ ProductType.zWaterLoss, ChemicalFormula.ParseFormula("O-5H-2P-1") },
//fragment - Base chemical formula is the corresponding fragment chemical formula subtracing 1 H as H is lost when base is removed
{ ProductType.aBaseLoss, ChemicalFormula.ParseFormula("H-2") }, // "H-1" -H
{ ProductType.bBaseLoss, ChemicalFormula.ParseFormula("O1H-2") }, //"OH1" -H
{ ProductType.cBaseLoss, ChemicalFormula.ParseFormula("O3H-1P") }, //"O3P" -H
{ ProductType.dBaseLoss, ChemicalFormula.ParseFormula("O4H-1P") }, //"O4H2P" -H
{ ProductType.wBaseLoss, ChemicalFormula.ParseFormula("H-2") }, //"H"-H
{ ProductType.xBaseLoss, ChemicalFormula.ParseFormula("O-1H-2") }, //"O-1H" -H
{ ProductType.yBaseLoss, ChemicalFormula.ParseFormula("O-3H-2P-1") }, //"O-3P-1" -H
{ ProductType.zBaseLoss, ChemicalFormula.ParseFormula("O-4H-3P-1") }, //"O-4H-1P-1" -1
{ ProductType.M, new ChemicalFormula() }
};
///
/// Returns mass shift by product type
///
///
///
public static double GetRnaMassShiftFromProductType(this ProductType type) => FragmentIonCaps[type].MonoisotopicMass;
public static FragmentationTerminus GetRnaTerminusType(this ProductType fragmentType)
{
switch (fragmentType)
{
case ProductType.a:
case ProductType.aWaterLoss:
case ProductType.aBaseLoss:
case ProductType.b:
case ProductType.bWaterLoss:
case ProductType.bBaseLoss:
case ProductType.c:
case ProductType.cWaterLoss:
case ProductType.cBaseLoss:
case ProductType.d:
case ProductType.dWaterLoss:
case ProductType.dBaseLoss:
return FragmentationTerminus.FivePrime;
case ProductType.w:
case ProductType.wWaterLoss:
case ProductType.wBaseLoss:
case ProductType.x:
case ProductType.xWaterLoss:
case ProductType.xBaseLoss:
case ProductType.y:
case ProductType.yWaterLoss:
case ProductType.yBaseLoss:
case ProductType.z:
case ProductType.zWaterLoss:
case ProductType.zBaseLoss:
return FragmentationTerminus.ThreePrime;
case ProductType.M:
return FragmentationTerminus.None;
case ProductType.aStar:
case ProductType.aDegree:
case ProductType.bAmmoniaLoss:
case ProductType.yAmmoniaLoss:
case ProductType.zPlusOne:
case ProductType.D:
case ProductType.Ycore:
case ProductType.Y:
default:
throw new ArgumentOutOfRangeException(nameof(fragmentType), fragmentType, null);
}
}
///
/// Product ion types by Fragmentation Terminus
///
private static readonly Dictionary>
ProductIonTypesFromSpecifiedTerminus = new Dictionary>
{
{
FragmentationTerminus.FivePrime, new List
{
ProductType.a, ProductType.aWaterLoss, ProductType.aBaseLoss,
ProductType.b, ProductType.bWaterLoss, ProductType.bBaseLoss,
ProductType.c, ProductType.cWaterLoss, ProductType.cBaseLoss,
ProductType.d, ProductType.dWaterLoss, ProductType.dBaseLoss,
}
},
{
FragmentationTerminus.ThreePrime, new List
{
ProductType.w, ProductType.wWaterLoss, ProductType.wBaseLoss,
ProductType.x, ProductType.xWaterLoss, ProductType.xBaseLoss,
ProductType.y, ProductType.yWaterLoss, ProductType.yBaseLoss,
ProductType.z, ProductType.zWaterLoss, ProductType.zBaseLoss,
}
},
{
FragmentationTerminus.Both, new List
{
ProductType.a, ProductType.aWaterLoss, ProductType.aBaseLoss,
ProductType.b, ProductType.bWaterLoss, ProductType.bBaseLoss,
ProductType.c, ProductType.cWaterLoss, ProductType.cBaseLoss,
ProductType.d, ProductType.dWaterLoss, ProductType.dBaseLoss,
ProductType.w, ProductType.wWaterLoss, ProductType.wBaseLoss,
ProductType.x, ProductType.xWaterLoss, ProductType.xBaseLoss,
ProductType.y, ProductType.yWaterLoss, ProductType.yBaseLoss,
ProductType.z, ProductType.zWaterLoss, ProductType.zBaseLoss,
ProductType.M
}
}
};
public static List GetRnaTerminusSpecificProductTypes(
this FragmentationTerminus fragmentationTerminus)
{
return ProductIonTypesFromSpecifiedTerminus[fragmentationTerminus];
}
///
/// Returns all product ion types based upon specified terminus
///
///
///
///
public static List GetRnaTerminusSpecificProductTypesFromDissociation(
this DissociationType dissociationType, FragmentationTerminus fragmentationTerminus)
{
var terminusSpecific = fragmentationTerminus.GetRnaTerminusSpecificProductTypes();
var dissociationSpecific = dissociationType.GetRnaProductTypesFromDissociationType();
return terminusSpecific.Intersect(dissociationSpecific).ToList();
}
}
}
\ No newline at end of file
+using Chemistry;
+using MassSpectrometry;
+
+namespace Omics.Fragmentation.Oligo
+{
+ ///
+ /// Methods dealing with specific product type for RNA molecules
+ ///
+ public static class DissociationTypeCollection
+ {
+ ///
+ /// Product Ion types by dissociation method
+ ///
+ ///
+ /// HCD ions were taken from the following paper: https://www.nature.com/articles/s41598-023-36193-2
+ /// Ion types below here should be validated with experimental results.
+ /// Base and water losses occur very frequently and may also be present in these activation types.
+ /// CID, UVPD, and aEPD ions were taken from the following paper: https://pubs.acs.org/doi/10.1021/acs.analchem.3c05428?ref=PDF
+ /// NETD ions were taken from the following paper: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC7161943/
+ /// lowCID ions were taken from this Thermo Poster: https://assets.thermofisher.com/TFS-Assets/CMD/Flyers/fl-489263-asms23-optimized-fragmentation-oligonucleotides-suppresses-undesired-fragmentation-fl489263-en.pdf
+ ///
+ public static Dictionary> ProductsFromDissociationType =
+ new Dictionary>()
+ {
+ { DissociationType.Unknown, new List() },
+ { DissociationType.Custom, new List() },
+ {
+ DissociationType.AnyActivationType, new List
+ {
+ ProductType.a, ProductType.aBaseLoss, ProductType.aWaterLoss,
+ ProductType.b, ProductType.bBaseLoss, ProductType.bWaterLoss,
+ ProductType.c, ProductType.cBaseLoss, ProductType.cWaterLoss,
+ ProductType.d, ProductType.dBaseLoss, ProductType.dWaterLoss,
+ ProductType.w, ProductType.wBaseLoss, ProductType.wWaterLoss,
+ ProductType.x, ProductType.xBaseLoss, ProductType.xWaterLoss,
+ ProductType.y, ProductType.yBaseLoss, ProductType.yWaterLoss,
+ ProductType.z, ProductType.zBaseLoss, ProductType.zWaterLoss,
+ ProductType.M
+ }
+ },
+ {
+ DissociationType.CID, new List
+ {
+ ProductType.a, ProductType.aBaseLoss, ProductType.c, ProductType.dWaterLoss, ProductType.w,
+ ProductType.y, ProductType.yWaterLoss, ProductType.M
+ }
+ },
+ {
+ DissociationType.HCD, new List
+ {
+ ProductType.a, ProductType.aBaseLoss, ProductType.b, ProductType.c, ProductType.d,
+ ProductType.dWaterLoss, ProductType.w, ProductType.x, ProductType.y, ProductType.z,
+ ProductType.M
+ }
+ },
+ {
+ DissociationType.UVPD, new List
+ {
+ ProductType.a, ProductType.c, ProductType.d, ProductType.w, ProductType.M
+ }
+ },
+ {
+ DissociationType.aEPD, new List
+ {
+ ProductType.a, ProductType.c, ProductType.d, ProductType.w, ProductType.x, ProductType.z, ProductType.M
+ }
+ },
+ {
+ DissociationType.NETD, new List
+ {
+ ProductType.w, ProductType.d, ProductType.M
+ }
+ },
+ {
+ DissociationType.LowCID, new List()
+ {
+ ProductType.aBaseLoss, ProductType.c, ProductType.dWaterLoss, ProductType.w,
+ ProductType.y, ProductType.yWaterLoss, ProductType.M
+ }
+ },
+ { DissociationType.IRMPD, new List() { } },
+ { DissociationType.ECD, new List { } },
+ { DissociationType.PQD, new List { } },
+ { DissociationType.ETD, new List { } },
+ { DissociationType.EThcD, new List { } },
+ };
+
+ ///
+ /// Returns all dissociation types with implemented product type collections
+ ///
+ public static IEnumerable AllImplementedDissociationTypes =>
+ ProductsFromDissociationType.Where(p => p.Value.Any())
+ .Select(p => p.Key);
+
+ ///
+ /// Returns list of products types based upon the dissociation type
+ ///
+ ///
+ ///
+ public static List GetRnaProductTypesFromDissociationType(this DissociationType dissociationType) =>
+ ProductsFromDissociationType[dissociationType];
+
+ ///
+ /// Returns mass shift by product type
+ ///
+ ///
+ ///
+ public static double GetRnaMassShiftFromProductType(this ProductType type) => FragmentIonCaps[type].MonoisotopicMass;
+
+ ///
+ /// Mass to be added or subtracted
+ ///
+ private static readonly Dictionary FragmentIonCaps =
+ new Dictionary